美文网首页
tensorRT win10 VS2019 cmake 环境部署

tensorRT win10 VS2019 cmake 环境部署

作者: dillqq | 来源:发表于2023-05-15 22:36 被阅读0次

tensortRT环境配置

tensortRT环境配置,可以参照
https://blog.csdn.net/caobin_cumt/article/details/125579033

https://blog.csdn.net/qq_28912651/article/details/128921341
配置tensorRT之前要配置cuda环境,本文并不是讲tensorRT的环境,网上资料也多,可以参照网上进行编译配置。

cmake配置

# 项目特定的逻辑。
#
cmake_minimum_required (VERSION 3.8)

#OPENCV
find_package(OpenCV REQUIRED)
#添加头文件
include_directories(${OpenCV_INCLUDE_DIRS})


#cuda
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS})
#这个是tensorRT官方提供的一些工具包
link_directories(C:/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v11.3/lib/x64)


#tensorrt
include_directories(${TENSORRT_INCLUDE_DIR})
include_directories(D:/tensorRT/TensorRT-8.2.1.8/samples/common)
link_directories(${TENSORRT_LIBRARY_DIR})


# 将源代码添加到此项目的可执行文件。
add_executable (CMakeProject1 "CMakeProject1.cpp" "CMakeProject1.h")

target_link_libraries(CMakeProject1 ${OpenCV_LIBS})
target_link_libraries(CMakeProject1 nvinfer)
target_link_libraries(CMakeProject1 cudart)

# TODO: 如有需要,请添加测试并安装目标。

模型运行

使用CRNN模型进行验证,当然也可以使用sample文件下的项目进行快速验证,代码如下:

// CMakeProject1.cpp: 定义应用程序的入口点。
//



#include "CMakeProject1.h"

#include "argsParser.h"
#include "buffers.h"
#include "common.h"
#include "logger.h"
#include "parserOnnxConfig.h"

#include "NvInfer.h"

#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2\imgproc\imgproc.hpp>

#include <cuda_runtime_api.h>
#include <chrono>
#include <map>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>

using namespace std;
using namespace cv;




static const int BATCH_SIZE = 1;
static const int INPUT_H = 32;
static const int INPUT_W = 320;
static const int OUTPUT_SIZE = 80 * 27;
const char* INPUT_BLOB_NAME = "input";
const char* OUTPUT_BLOB_NAME = "output";
const std::string alphabet = "0123456789ABCJNORSTVYbcmy_";

class Logger : public ILogger
{
    void log(Severity severity, const char* msg) noexcept override
    {
        // suppress info-level messages
        if (severity <= Severity::kWARNING)
            std::cout << msg << std::endl;
    }
} logger;


bool doInference(IExecutionContext& context, cudaStream_t& stream, void** buffers, float* input, float* output, int batchSize) {
    // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host
    auto ret =(cudaMemcpyAsync(buffers[0], input, batchSize * 1 * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream));
    if (ret != 0) {
        return FALSE;
    }
    context.enqueue(batchSize, buffers, stream, nullptr);
    ret = (cudaMemcpyAsync(output, buffers[1], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream));
    if (ret != 0) {
        return FALSE;
    }
    cudaStreamSynchronize(stream);
    return TRUE;
}


std::string strDecode(std::vector<int>& preds, bool raw) {
    std::string str;
    if (raw) {
        for (auto v : preds) {
            str.push_back(alphabet[v]);
        }
    }
    else {
        for (size_t i = 0; i < preds.size(); i++) {
            if (preds[i] == 0 || (i > 0 && preds[i - 1] == preds[i])) continue;
            str.push_back(alphabet[preds[i]]);
        }
    }
    return str;
}





int main()
{
    //图片加载
    Mat image = imread("D:/tensorRT/TensorRT-8.2.1.8/samples/Project1/IMG100---101A500201104078.png");
    if (image.empty()) {
        return 0;
    }
    size_t size{ 0 };
    char* trtModelStream{ nullptr };
    //读取权重
    std::ifstream file("D:/tensorRT/TensorRT-8.2.1.8/ONNX/conx.engine", std::ios::binary);
    if (!file.good()) {
        std::cout << "Wrong" << std::endl;
        return 0;
    }

    if (file.good()) {
        file.seekg(0, file.end);
        size = file.tellg();
        file.seekg(0, file.beg);
        trtModelStream = new char[size];
        file.read(trtModelStream, size);
        assert(trtModelStream);
        file.close();
    }
    //创建日志文件
    IRuntime* runtime = createInferRuntime(logger);
    assert(runtime != nullptr);
    //创建环境,上下文
    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
    delete[] trtModelStream;
    assert(engine != nullptr);
    IExecutionContext* context = engine->createExecutionContext();
    assert(context != nullptr);
    if (!context)
    {
        std::cout << "context Wrong" << std::endl;
        return false;
    }
    assert(engine->getNbBindings() == 2);
    void* buffers[2];
    // prepare input data 
    static float data[BATCH_SIZE * 3 * INPUT_H * INPUT_W];
    // prepare output data;
    static float prob[BATCH_SIZE * OUTPUT_SIZE];

    const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
    const int outputIndex = engine->getBindingIndex(OUTPUT_BLOB_NAME);
    assert(inputIndex == 0);
    assert(outputIndex == 1);

    // Create GPU buffers on device 
    auto ret =cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 1 * INPUT_H * INPUT_W * sizeof(float));
    if (ret != 0) {
        std::cout << "inputIndex Wrong" << std::endl;
    }
    ret = cudaMalloc(&buffers[outputIndex], BATCH_SIZE * OUTPUT_SIZE * sizeof(float));
    if (ret != 0) {
        std::cout << "inputIndex Wrong" << std::endl;
    }
    // Create stream
    cudaStream_t stream;
    ret = cudaStreamCreate(&stream);
    if (ret != 0) {
        std::cout << "inputIndex Wrong" << std::endl;
    }


    Mat imageChange;
    resize(image, imageChange, Size(320, 32), 0, 0, INTER_LINEAR);
    cout << imageChange.cols << endl;
    cout << imageChange.rows << endl;
    cout << imageChange.channels() << endl;
    for (int i = 0; i < INPUT_H * INPUT_W; i++) {
        for (int j = 0; j <= 2; j++) {
            //cout << (float)imageChange.at<Vec3b>(i)[j] << endl;
            data[i] = ((float)imageChange.at<Vec3b>(i)[j] / 255.0 - 0.5) * 2.0;
        }
    }


    // Run inference
    auto start = std::chrono::system_clock::now();
    if (!doInference(*context, stream, buffers, data, prob, BATCH_SIZE)) {
        std::cout << "doInference Wrong" << std::endl;
        return 0;
    }

    auto end = std::chrono::system_clock::now();
    std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
    std::vector<int> preds;
    for (int i = 0; i < 80; i++) {
        int maxj = 0;
        for (int j = 1; j < 27; j++) {
            if (prob[27 * i + j] > prob[27 * i + maxj]) maxj = j;
        }
        preds.push_back(maxj);
    }
    std::cout << "raw: " << strDecode(preds, true) << std::endl;
    std::cout << "sim: " << strDecode(preds, false) << std::endl;
    return 0;
}

相关文章

网友评论

      本文标题:tensorRT win10 VS2019 cmake 环境部署

      本文链接:https://www.haomeiwen.com/subject/wksvsdtx.html