美文网首页
paddle lite 在 x86 平台运行的cpp示例

paddle lite 在 x86 平台运行的cpp示例

作者: book_02 | 来源:发表于2023-12-19 18:05 被阅读0次

    本文是通过稍微改造官方 Paddle-Lite-Demo 库里的例子来运行的

    本文例子是在ubunut下运行的

    1. 下载 Paddle-Lite-Demo

    git clone https://github.com/PaddlePaddle/Paddle-Lite-Demo.git
    
    cd Paddle-Lite-Demo
    mkdir _data     # 创建 _data 文件夹存放预测库文件、模型文件和转换后的模型文件
    cd _data
    

    2. 下载x86平台的cpp预测库

    可以源码编译,也可以从release页面直接下载编译好的
    https://github.com/PaddlePaddle/Paddle-Lite/releases

    下面以直接下载v2.12inference_lite_lib.x86.linux.with_log.tar.gz为例

    wget https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.12/inference_lite_lib.x86.linux.with_log.tar.gz
    tar zxf inference_lite_lib.x86.linux.with_log.tar.gz
    

    3. 模型下载和模型转换

    模型下载:

    wget http://paddle-inference-dist.bj.bcebos.com/mobilenet_v1.tar.gz
    tar zxf mobilenet_v1.tar.gz
    

    模型转换通过opt工具来完成,opt工具有多重获取方式,下面以python的方式来获取opt工具

    python -m pip install paddlelite   # 安装paddlelite来获取opt工具
    
    paddle_lite_opt --model_dir=./mobilenet_v1 --optimize_out_type=naive_buffer --optimize_out=./mobilenet_v1_opt --valid_targets=x86
    

    paddle_lite_opt是安装上面安装paddlelite后就有的工具

    注意--valid_targets=x86,选择x86平台

    此时会生成转换后的文件 mobilenet_v1_opt.nb

    4. 改造demo里的cpp工程

    修改 image_classification/linux/app/cxx/image_classification/ 里的例子

    这个例子本身是为armlinux准备的,里面用到了neon来进行加速,所以要想在x86平台上运行的话,可以把neon的代码改为直接cpp的实现

    主要修改工程里的CMakeLists.txtimage_classification.ccrun.sh这几个文件

    这几个文件的完整代码放在了最后,这几个文件的主要修改如下:

    4.1 CMakeLists.txt

    注释掉下面两行,使得x86上可以跑,原始工程只能在armv7hfarmv8上跑

    else()
        # message(FATAL_ERROR "Unknown arch abi ${TARGET_ARCH_ABI}, only support armv8 and armv7hf.")
        # return()
    endif()
    

    增加MKL的引用,MKLpaddle lite的依赖库,库在paddle lite的安装包里

    set(MKLML_DIR "${PADDLE_LITE_DIR}/../third_party/mklml/")
    link_directories(${MKLML_DIR}/lib)
    include_directories(${MKLML_DIR}/include)
    

    4.2 image_classification.cc

    删除掉neon部分代码,有2处

    #include <arm_neon.h>         // NOLINT
    
    neon_mean_scale()函数和其引用
    

    增加cpp的实现如下:

    // fill tensor with mean and scale and trans layout: nhwc -> nchw, cpp code
    void cpp_mean_scale(const float *din, float *dout, int size, float *mean,
                        float *scale)
    {
        float *dout_c0 = dout;
        float *dout_c1 = dout + size;
        float *dout_c2 = dout + size * 2;
    
        for (int i = 0; i < size; ++i)
        {
            *(dout_c0++) = (*(din++) - mean[0]) / scale[0];
            *(dout_c1++) = (*(din++) - mean[1]) / scale[1];
            *(dout_c2++) = (*(din++) - mean[2]) / scale[2];
        }
    }
    
    

    代码里适配的OpenCV 3.x,如果自己使用的是OpenCV 4.x,做几处枚举变更:

            // cv::cvtColor(resize_image, resize_image, CV_BGRA2RGB);       // for OpenCV 3.x
            cv::cvtColor(resize_image, resize_image, cv::COLOR_BGRA2RGB); // for OpenCV 4.x
    
    
            // cap.set(CV_CAP_PROP_FRAME_WIDTH, 640);   // for OpenCV 3.x
            // cap.set(CV_CAP_PROP_FRAME_HEIGHT, 480);
            cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); // for OpenCV 4.x
            cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480);
    

    4.3 run.sh

    主要修改如下:

    1. TARGET_ARCH_ABI=x86
    2. PADDLE_LITE_DIR设置为自己下载的预测库的地址
    3. MODELS_DIR设置为自己转换后的模型文件的地址,模型文件名称也改为自己转换后的模型名称
    4. LD_LIBRARY_PATH里增加mkllib路径
    5. 其他可以保持原来样式
    #!/bin/bash
    
    # configure
    TARGET_ARCH_ABI=x86 
    # TARGET_ARCH_ABI=armv8 # for RK3399, set to default arch abi
    #TARGET_ARCH_ABI=armv7hf # for Raspberry Pi 3B
    PADDLE_LITE_DIR="$(pwd)/../../../../../_data/inference_lite_lib.with_log/cxx"
    IMAGES_DIR="$(pwd)/../../../../assets/images"
    LABELS_DIR="$(pwd)/../../../../assets/labels"
    MODELS_DIR="$(pwd)/../../../../../_data"
    if [ "x$1" != "x" ]; then
        TARGET_ARCH_ABI=$1
    fi
    
    # build
    rm -rf build
    mkdir build
    cd build
    cmake -DPADDLE_LITE_DIR=${PADDLE_LITE_DIR} -DTARGET_ARCH_ABI=${TARGET_ARCH_ABI} ..
    make
    
    #run
    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${PADDLE_LITE_DIR}/lib:${PADDLE_LITE_DIR}/../third_party/mklml/lib
    ./image_classification ${MODELS_DIR}/mobilenet_v1_opt.nb ${LABELS_DIR}/labels.txt 3 ${IMAGES_DIR}/tabby_cat.jpg ./result.jpg
    

    5.运行图像分类任务

    进入相关目录,运行如下命令

    cd ..
    cd image_classification/linux/app/cxx/image_classification/
    
    sh run.sh
    

    运行后会得到结果图像如下


    关闭图像后控制台会得到分类结果如下:

    ====== output summary ====== 
    results: 3
    Top0  tabby, tabby cat - 0.475015
    Top1  Egyptian cat - 0.409479
    Top2  tiger cat - 0.095745
    Preprocess time: 0.414000 ms
    Prediction time: 13.263800 ms
    Postprocess time: 0.116000 ms
    

    6. 修改文件的完整代码

    6.1 CMakeLists.txt

    cmake_minimum_required(VERSION 3.10)
    set(CMAKE_SYSTEM_NAME Linux)
    if(TARGET_ARCH_ABI STREQUAL "armv8")
        set(CMAKE_SYSTEM_PROCESSOR aarch64)
        set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc")
        set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++")
    elseif(TARGET_ARCH_ABI STREQUAL "armv7hf")
        set(CMAKE_SYSTEM_PROCESSOR arm)
        set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
        set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
    else()
        # message(FATAL_ERROR "Unknown arch abi ${TARGET_ARCH_ABI}, only support armv8 and armv7hf.")
        # return()
    endif()
    
    project(image_classification)
    message(STATUS "TARGET ARCH ABI: ${TARGET_ARCH_ABI}")
    message(STATUS "PADDLE LITE DIR: ${PADDLE_LITE_DIR}")
    include_directories(${PADDLE_LITE_DIR}/include)
    link_directories(${PADDLE_LITE_DIR}/lib)
    
    set(MKLML_DIR "${PADDLE_LITE_DIR}/../third_party/mklml/")
    link_directories(${MKLML_DIR}/lib)
    include_directories(${MKLML_DIR}/include)
    
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
    if(TARGET_ARCH_ABI STREQUAL "armv8")
        set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}")
        set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}")
    elseif(TARGET_ARCH_ABI STREQUAL "armv7hf")
        set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
        set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" )
    endif()
    find_package(OpenMP REQUIRED)
    if(OpenMP_FOUND OR OpenMP_CXX_FOUND)
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
        message(STATUS "Found OpenMP ${OpenMP_VERSION} ${OpenMP_CXX_VERSION}")
        message(STATUS "OpenMP C flags:  ${OpenMP_C_FLAGS}")
        message(STATUS "OpenMP CXX flags:  ${OpenMP_CXX_FLAGS}")
        message(STATUS "OpenMP OpenMP_CXX_LIB_NAMES:  ${OpenMP_CXX_LIB_NAMES}")
        message(STATUS "OpenMP OpenMP_CXX_LIBRARIES:  ${OpenMP_CXX_LIBRARIES}")
    else()
        message(FATAL_ERROR "Could not found OpenMP!")
        return()
    endif()
    find_package(OpenCV REQUIRED)
    if(OpenCV_FOUND OR OpenCV_CXX_FOUND)
        include_directories(${OpenCV_INCLUDE_DIRS})
        message(STATUS "OpenCV library status:")
        message(STATUS "    version: ${OpenCV_VERSION}")
        message(STATUS "    libraries: ${OpenCV_LIBS}")
        message(STATUS "    include path: ${OpenCV_INCLUDE_DIRS}")
    else()
        message(FATAL_ERROR "Could not found OpenCV!")
        return()
    endif()
    add_executable(image_classification image_classification.cc)
    target_link_libraries(image_classification paddle_light_api_shared ${OpenCV_LIBS})
    
    

    6.2 image_classification.cc

    // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
    //
    // Licensed under the Apache License, Version 2.0 (the "License");
    // you may not use this file except in compliance with the License.
    // You may obtain a copy of the License at
    //
    //     http://www.apache.org/licenses/LICENSE-2.0
    //
    // Unless required by applicable law or agreed to in writing, software
    // distributed under the License is distributed on an "AS IS" BASIS,
    // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    // See the License for the specific language governing permissions and
    // limitations under the License.
    
    #include "paddle_api.h"       // NOLINT
    #include <fstream>            // NOLINT
    #include <limits>             // NOLINT
    #include <opencv2/opencv.hpp> // NOLINT
    #include <stdio.h>            // NOLINT
    #include <sys/time.h>         // NOLINT
    #include <unistd.h>           // NOLINT
    #include <vector>             // NOLINT
    /////////////////////////////////////////////////////////////////////////
    // If this demo is linked to static library: libpaddle_api_light_bundled.a
    // , you should include `paddle_use_ops.h` and `paddle_use_kernels.h` to
    // avoid linking errors such as `unsupport ops or kernels`.
    /////////////////////////////////////////////////////////////////////////
    // #include "paddle_use_kernels.h"  // NOLINT
    // #include "paddle_use_ops.h"      // NOLINT
    
    int WARMUP_COUNT = 0;
    int REPEAT_COUNT = 1;
    const int CPU_THREAD_NUM = 2;
    const paddle::lite_api::PowerMode CPU_POWER_MODE =
        paddle::lite_api::PowerMode::LITE_POWER_HIGH;
    const std::vector<int64_t> INPUT_SHAPE = {1, 3, 224, 224};
    
    struct RESULT
    {
        std::string class_name;
        int class_id;
        float score;
    };
    using namespace paddle::lite_api; // NOLINT
    
    inline int64_t get_current_us()
    {
        struct timeval time;
        gettimeofday(&time, NULL);
        return 1000000LL * (int64_t)time.tv_sec + (int64_t)time.tv_usec;
    }
    
    std::vector<std::string> load_labels(const std::string &path)
    {
        std::ifstream file;
        std::vector<std::string> labels;
        file.open(path);
        while (file)
        {
            std::string line;
            std::getline(file, line);
            std::string::size_type pos = line.find(" ");
            if (pos != std::string::npos)
            {
                line = line.substr(pos);
            }
            labels.push_back(line);
        }
        file.clear();
        file.close();
        return labels;
    }
    
    // fill tensor with mean and scale and trans layout: nhwc -> nchw, cpp code
    void cpp_mean_scale(const float *din, float *dout, int size, float *mean,
                        float *scale)
    {
        float *dout_c0 = dout;
        float *dout_c1 = dout + size;
        float *dout_c2 = dout + size * 2;
    
        for (int i = 0; i < size; ++i)
        {
            *(dout_c0++) = (*(din++) - mean[0]) / scale[0];
            *(dout_c1++) = (*(din++) - mean[1]) / scale[1];
            *(dout_c2++) = (*(din++) - mean[2]) / scale[2];
        }
    }
    
    void pre_process(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
                     int width, int height)
    {
        // Prepare input data from image
        std::unique_ptr<Tensor> input_tensor(std::move(predictor->GetInput(0)));
        input_tensor->Resize({1, 3, height, width});
        // read img and pre-process
        float means[3] = {0.485f, 0.456f, 0.406f};
        float scales[3] = {0.229f, 0.224f, 0.225f};
        cv::Mat resize_image;
        cv::resize(img, resize_image, cv::Size(height, width), 0, 0);
        if (resize_image.channels() == 4)
        {
            // cv::cvtColor(resize_image, resize_image, CV_BGRA2RGB);       // for OpenCV 3.x
            cv::cvtColor(resize_image, resize_image, cv::COLOR_BGRA2RGB); // for OpenCV 4.x
        }
    
        cv::Mat norm_image;
        resize_image.convertTo(norm_image, CV_32FC3, 1 / 255.f);
        const float *dimg = reinterpret_cast<const float *>(norm_image.data);
        auto *data = input_tensor->mutable_data<float>();
        cpp_mean_scale(dimg, data, width * height, means, scales);
    }
    
    std::vector<RESULT>
    post_process(std::shared_ptr<PaddlePredictor> predictor, const int topk,
                 const std::vector<std::string> &labels, // NOLINT
                 cv::Mat &output_image)
    { // NOLINT
        std::unique_ptr<const Tensor> output_tensor(
            std::move(predictor->GetOutput(0)));
        auto *scores = output_tensor->data<float>();
        auto shape_out = output_tensor->shape();
        int64_t size = 1;
        for (auto &i : shape_out)
        {
            size *= i;
        }
        std::vector<std::pair<float, int>> vec;
        vec.resize(size);
        for (int i = 0; i < size; i++)
        {
            vec[i] = std::make_pair(scores[i], i);
        }
    
        std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
                          std::greater<std::pair<float, int>>());
    
        std::vector<RESULT> results(topk);
        for (int i = 0; i < topk; i++)
        {
            float score = vec[i].first;
            int index = vec[i].second;
            results[i].class_name = "Unknown";
            if (index >= 0 && index < labels.size())
            {
                results[i].class_name = labels[index];
            }
            results[i].score = score;
            cv::putText(output_image,
                        "Top" + std::to_string(i + 1) + "." + results[i].class_name +
                            ":" + std::to_string(results[i].score),
                        cv::Point2d(5, i * 18 + 20), cv::FONT_HERSHEY_PLAIN, 1,
                        cv::Scalar(51, 255, 255));
        }
        return results;
    }
    
    cv::Mat process(const std::string model_file, cv::Mat &input_image, // NOLINT
                    const std::vector<std::string> &word_labels,        // NOLINT
                    const int topk,
                    std::shared_ptr<paddle::lite_api::PaddlePredictor> predictor)
    {
        // Preprocess image and fill the data of input tensor
        double preprocess_start_time = get_current_us();
        pre_process(predictor, input_image, INPUT_SHAPE[3], INPUT_SHAPE[2]);
        double preprocess_end_time = get_current_us();
        double preprocess_time =
            (preprocess_end_time - preprocess_start_time) / 1000.0f;
    
        // Run predictor
        // warm up to skip the first inference and get more stable time, remove it in
        // actual products
        for (int i = 0; i < WARMUP_COUNT; i++)
        {
            predictor->Run();
        }
        // repeat to obtain the average time, set REPEAT_COUNT=1 in actual products
        double sum_duration = 0.0;
        double max_duration = 1e-5;
        double min_duration = 1e5;
        double avg_duration = -1;
        for (int i = 0; i < REPEAT_COUNT; i++)
        {
            auto start = get_current_us();
            predictor->Run();
            auto end = get_current_us();
            double cur_time_cost = (end - start) / 1000.0f;
            if (cur_time_cost > max_duration)
            {
                max_duration = cur_time_cost;
            }
            if (cur_time_cost < min_duration)
            {
                min_duration = cur_time_cost;
            }
            sum_duration += cur_time_cost;
            printf("iter %d cost: %f ms\n", i, cur_time_cost);
        }
        avg_duration = sum_duration / static_cast<float>(REPEAT_COUNT);
        printf("warmup: %d repeat: %d, average: %f ms, max: %f ms, min: %f ms\n",
               WARMUP_COUNT, REPEAT_COUNT, avg_duration, max_duration, min_duration);
    
        // 5. Get output and postprocess to output detected objects
        std::cout << "\n====== output summary ====== " << std::endl;
        cv::Mat output_image = input_image.clone();
        double postprocess_start_time = get_current_us();
        std::vector<RESULT> results =
            post_process(predictor, topk, word_labels, output_image);
        double postprocess_end_time = get_current_us();
        double postprocess_time =
            (postprocess_end_time - postprocess_start_time) / 1000.0f;
    
        printf("results: %ld\n", results.size());
        for (int i = 0; i < results.size(); i++)
        {
            printf("Top%d %s - %f\n", i, results[i].class_name.c_str(),
                   results[i].score);
        }
        printf("Preprocess time: %f ms\n", preprocess_time);
        printf("Prediction time: %f ms\n", avg_duration);
        printf("Postprocess time: %f ms\n\n", postprocess_time);
        return output_image;
    }
    
    void run_model(const std::string model_file,
                   const std::vector<std::string> &word_labels, const int topk,
                   bool use_cap, std::string input_image_path,
                   std::string output_image_path)
    {
        // 1. Set MobileConfig
        MobileConfig config;
        config.set_model_from_file(model_file);
        config.set_power_mode(CPU_POWER_MODE);
        config.set_threads(CPU_THREAD_NUM);
    
        // 2. Create PaddlePredictor by MobileConfig
        std::shared_ptr<PaddlePredictor> predictor =
            CreatePaddlePredictor<MobileConfig>(config);
    
        // 3. Prepare input data from image
        if (use_cap)
        {
            cv::VideoCapture cap(-1);
            // cap.set(CV_CAP_PROP_FRAME_WIDTH, 640);   // for OpenCV 3.x
            // cap.set(CV_CAP_PROP_FRAME_HEIGHT, 480);
            cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); // for OpenCV 4.x
            cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480);
            if (!cap.isOpened())
            {
                return;
            }
            while (1)
            {
                cv::Mat input_image;
                cap >> input_image;
                cv::Mat output_image =
                    process(model_file, input_image, word_labels, topk, predictor);
                cv::imshow("image classification", output_image);
                if (cv::waitKey(1) == char('q'))
                { // NOLINT
                    break;
                }
            }
            cap.release();
            cv::destroyAllWindows();
        }
        else
        {
            cv::Mat input_image = cv::imread(input_image_path, 1);
            cv::Mat output_image =
                process(model_file, input_image, word_labels, topk, predictor);
            cv::imwrite(output_image_path, output_image);
            cv::imshow("image classification", output_image);
            cv::waitKey(0);
        }
    }
    int main(int argc, char **argv)
    {
        if (argc < 3)
        {
            std::cerr << "[ERROR] usage: " << argv[0]
                      << " ./image_classification_demo model_dir label_path [top_k] "
                      << "[input_image_path] [output_image_path] \n"
                      << "use images from camera if input_image_path isn't provided \n";
            exit(1);
        }
    
        std::string model_path = argv[1];
        std::string label_path = argv[2];
        int topk = 1;
        if (argc > 3)
        {
            topk = atoi(argv[3]);
        }
        // Load Labels
        std::vector<std::string> word_labels = load_labels(label_path);
        std::string input_image_path = "";
        std::string output_image_path = "";
        bool use_cap = true;
        if (argc > 4)
        {
            input_image_path = argv[4];
            WARMUP_COUNT = 1;
            REPEAT_COUNT = 5;
            use_cap = false;
        }
        if (argc > 4)
        {
            output_image_path = argv[5];
        }
    
        run_model(model_path, word_labels, topk, use_cap, input_image_path,
                  output_image_path);
    
        return 0;
    }
    

    6.3 run.sh

    #!/bin/bash
    
    # configure
    TARGET_ARCH_ABI=x86 
    # TARGET_ARCH_ABI=armv8 # for RK3399, set to default arch abi
    #TARGET_ARCH_ABI=armv7hf # for Raspberry Pi 3B
    PADDLE_LITE_DIR="$(pwd)/../../../../../_data/inference_lite_lib.with_log/cxx"
    IMAGES_DIR="$(pwd)/../../../../assets/images"
    LABELS_DIR="$(pwd)/../../../../assets/labels"
    MODELS_DIR="$(pwd)/../../../../../_data"
    if [ "x$1" != "x" ]; then
        TARGET_ARCH_ABI=$1
    fi
    
    # build
    rm -rf build
    mkdir build
    cd build
    cmake -DPADDLE_LITE_DIR=${PADDLE_LITE_DIR} -DTARGET_ARCH_ABI=${TARGET_ARCH_ABI} ..
    make
    
    #run
    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${PADDLE_LITE_DIR}/lib:${PADDLE_LITE_DIR}/../third_party/mklml/lib
    ./image_classification ${MODELS_DIR}/mobilenet_v1_opt.nb ${LABELS_DIR}/labels.txt 3 ${IMAGES_DIR}/tabby_cat.jpg ./result.jpg
    

    相关文章

      网友评论

          本文标题:paddle lite 在 x86 平台运行的cpp示例

          本文链接:https://www.haomeiwen.com/subject/lrnzgdtx.html