tensortRT环境配置
tensortRT环境配置,可以参照
https://blog.csdn.net/caobin_cumt/article/details/125579033
和
https://blog.csdn.net/qq_28912651/article/details/128921341
配置tensorRT之前要配置cuda环境,本文并不是讲tensorRT的环境,网上资料也多,可以参照网上进行编译配置。
cmake配置
# 项目特定的逻辑。
#
cmake_minimum_required (VERSION 3.8)
#OPENCV
find_package(OpenCV REQUIRED)
#添加头文件
include_directories(${OpenCV_INCLUDE_DIRS})
#cuda
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
#这个是tensorRT官方提供的一些工具包
link_directories(C:/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3/lib/x64)
#tensorrt
include_directories(${TENSORRT_INCLUDE_DIR})
include_directories(D:/tensorRT/TensorRT-8.2.1.8/samples/common)
link_directories(${TENSORRT_LIBRARY_DIR})
# 将源代码添加到此项目的可执行文件。
add_executable (CMakeProject1 "CMakeProject1.cpp" "CMakeProject1.h")
target_link_libraries(CMakeProject1 ${OpenCV_LIBS})
target_link_libraries(CMakeProject1 nvinfer)
target_link_libraries(CMakeProject1 cudart)
# TODO: 如有需要,请添加测试并安装目标。
模型运行
使用CRNN模型进行验证,当然也可以使用sample文件下的项目进行快速验证,代码如下:
// CMakeProject1.cpp: 定义应用程序的入口点。
//
#include "CMakeProject1.h"
#include "argsParser.h"
#include "buffers.h"
#include "common.h"
#include "logger.h"
#include "parserOnnxConfig.h"
#include "NvInfer.h"
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2\imgproc\imgproc.hpp>
#include <cuda_runtime_api.h>
#include <chrono>
#include <map>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
using namespace std;
using namespace cv;
static const int BATCH_SIZE = 1;
static const int INPUT_H = 32;
static const int INPUT_W = 320;
static const int OUTPUT_SIZE = 80 * 27;
const char* INPUT_BLOB_NAME = "input";
const char* OUTPUT_BLOB_NAME = "output";
const std::string alphabet = "0123456789ABCJNORSTVYbcmy_";
class Logger : public ILogger
{
void log(Severity severity, const char* msg) noexcept override
{
// suppress info-level messages
if (severity <= Severity::kWARNING)
std::cout << msg << std::endl;
}
} logger;
bool doInference(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* input, float* output, int batchSize) {
// DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
auto ret =(cudaMemcpyAsync(buffers[0], input, batchSize * 1 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
if (ret != 0) {
return FALSE;
}
context.enqueue(batchSize, buffers, stream, nullptr);
ret = (cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
if (ret != 0) {
return FALSE;
}
cudaStreamSynchronize(stream);
return TRUE;
}
std::string strDecode(std::vector<int>& preds, bool raw) {
std::string str;
if (raw) {
for (auto v : preds) {
str.push_back(alphabet[v]);
}
}
else {
for (size_t i = 0; i < preds.size(); i++) {
if (preds[i] == 0 || (i > 0 && preds[i - 1] == preds[i])) continue;
str.push_back(alphabet[preds[i]]);
}
}
return str;
}
int main()
{
//图片加载
Mat image = imread("D:/tensorRT/TensorRT-8.2.1.8/samples/Project1/IMG100---101A500201104078.png");
if (image.empty()) {
return 0;
}
size_t size{ 0 };
char* trtModelStream{ nullptr };
//读取权重
std::ifstream file("D:/tensorRT/TensorRT-8.2.1.8/ONNX/conx.engine", std::ios::binary);
if (!file.good()) {
std::cout << "Wrong" << std::endl;
return 0;
}
if (file.good()) {
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream = new char[size];
file.read(trtModelStream, size);
assert(trtModelStream);
file.close();
}
//创建日志文件
IRuntime* runtime = createInferRuntime(logger);
assert(runtime != nullptr);
//创建环境,上下文
ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
delete[] trtModelStream;
assert(engine != nullptr);
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
if (!context)
{
std::cout << "context Wrong" << std::endl;
return false;
}
assert(engine->getNbBindings() == 2);
void* buffers[2];
// prepare input data
static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
// prepare output data;
static float prob[BATCH_SIZE * OUTPUT_SIZE];
const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
assert(inputIndex == 0);
assert(outputIndex == 1);
// Create GPU buffers on device
auto ret =cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 1 * INPUT_H * INPUT_W * sizeof(float));
if (ret != 0) {
std::cout << "inputIndex Wrong" << std::endl;
}
ret = cudaMalloc(&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float));
if (ret != 0) {
std::cout << "inputIndex Wrong" << std::endl;
}
// Create stream
cudaStream_t stream;
ret = cudaStreamCreate(&stream);
if (ret != 0) {
std::cout << "inputIndex Wrong" << std::endl;
}
Mat imageChange;
resize(image, imageChange, Size(320, 32), 0, 0, INTER_LINEAR);
cout << imageChange.cols << endl;
cout << imageChange.rows << endl;
cout << imageChange.channels() << endl;
for (int i = 0; i < INPUT_H * INPUT_W; i++) {
for (int j = 0; j <= 2; j++) {
//cout << (float)imageChange.at<Vec3b>(i)[j] << endl;
data[i] = ((float)imageChange.at<Vec3b>(i)[j] / 255.0 - 0.5) * 2.0;
}
}
// Run inference
auto start = std::chrono::system_clock::now();
if (!doInference(*context, stream, buffers, data, prob, BATCH_SIZE)) {
std::cout << "doInference Wrong" << std::endl;
return 0;
}
auto end = std::chrono::system_clock::now();
std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
std::vector<int> preds;
for (int i = 0; i < 80; i++) {
int maxj = 0;
for (int j = 1; j < 27; j++) {
if (prob[27 * i + j] > prob[27 * i + maxj]) maxj = j;
}
preds.push_back(maxj);
}
std::cout << "raw: " << strDecode(preds, true) << std::endl;
std::cout << "sim: " << strDecode(preds, false) << std::endl;
return 0;
}
网友评论