本文是通过稍微改造官方 Paddle-Lite-Demo 库里的例子来运行的
本文例子是在ubunut下运行的
1. 下载 Paddle-Lite-Demo
git clone https://github.com/PaddlePaddle/Paddle-Lite-Demo.git
cd Paddle-Lite-Demo
mkdir _data # 创建 _data 文件夹存放预测库文件、模型文件和转换后的模型文件
cd _data
2. 下载x86平台的cpp预测库
可以源码编译,也可以从release页面直接下载编译好的
https://github.com/PaddlePaddle/Paddle-Lite/releases
下面以直接下载v2.12
的inference_lite_lib.x86.linux.with_log.tar.gz
为例
wget https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.12/inference_lite_lib.x86.linux.with_log.tar.gz
tar zxf inference_lite_lib.x86.linux.with_log.tar.gz
3. 模型下载和模型转换
模型下载:
wget http://paddle-inference-dist.bj.bcebos.com/mobilenet_v1.tar.gz
tar zxf mobilenet_v1.tar.gz
模型转换通过opt工具来完成,opt工具有多重获取方式,下面以python的方式来获取opt工具
python -m pip install paddlelite # 安装paddlelite来获取opt工具
paddle_lite_opt --model_dir=./mobilenet_v1 --optimize_out_type=naive_buffer --optimize_out=./mobilenet_v1_opt --valid_targets=x86
paddle_lite_opt
是安装上面安装paddlelite
后就有的工具
注意--valid_targets=x86
,选择x86
平台
此时会生成转换后的文件 mobilenet_v1_opt.nb
4. 改造demo里的cpp工程
修改 image_classification/linux/app/cxx/image_classification/
里的例子
这个例子本身是为armlinux准备的,里面用到了neon来进行加速,所以要想在x86平台上运行的话,可以把neon的代码改为直接cpp的实现
主要修改工程里的CMakeLists.txt
、 image_classification.cc
、 run.sh
这几个文件
这几个文件的完整代码放在了最后,这几个文件的主要修改如下:
4.1 CMakeLists.txt
注释掉下面两行,使得x86上可以跑,原始工程只能在armv7hf
和armv8
上跑
else()
# message(FATAL_ERROR "Unknown arch abi ${TARGET_ARCH_ABI}, only support armv8 and armv7hf.")
# return()
endif()
增加MKL
的引用,MKL
是paddle lite
的依赖库,库在paddle lite
的安装包里
set(MKLML_DIR "${PADDLE_LITE_DIR}/../third_party/mklml/")
link_directories(${MKLML_DIR}/lib)
include_directories(${MKLML_DIR}/include)
4.2 image_classification.cc
删除掉neon部分代码,有2处
#include <arm_neon.h> // NOLINT
neon_mean_scale()函数和其引用
增加cpp的实现如下:
// fill tensor with mean and scale and trans layout: nhwc -> nchw, cpp code
void cpp_mean_scale(const float *din, float *dout, int size, float *mean,
float *scale)
{
float *dout_c0 = dout;
float *dout_c1 = dout + size;
float *dout_c2 = dout + size * 2;
for (int i = 0; i < size; ++i)
{
*(dout_c0++) = (*(din++) - mean[0]) / scale[0];
*(dout_c1++) = (*(din++) - mean[1]) / scale[1];
*(dout_c2++) = (*(din++) - mean[2]) / scale[2];
}
}
代码里适配的OpenCV 3.x
,如果自己使用的是OpenCV 4.x
,做几处枚举变更:
// cv::cvtColor(resize_image, resize_image, CV_BGRA2RGB); // for OpenCV 3.x
cv::cvtColor(resize_image, resize_image, cv::COLOR_BGRA2RGB); // for OpenCV 4.x
// cap.set(CV_CAP_PROP_FRAME_WIDTH, 640); // for OpenCV 3.x
// cap.set(CV_CAP_PROP_FRAME_HEIGHT, 480);
cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); // for OpenCV 4.x
cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480);
4.3 run.sh
主要修改如下:
TARGET_ARCH_ABI=x86
-
PADDLE_LITE_DIR
设置为自己下载的预测库的地址 -
MODELS_DIR
设置为自己转换后的模型文件的地址,模型文件名称也改为自己转换后的模型名称 -
LD_LIBRARY_PATH
里增加mkl
的lib
路径 - 其他可以保持原来样式
#!/bin/bash
# configure
TARGET_ARCH_ABI=x86
# TARGET_ARCH_ABI=armv8 # for RK3399, set to default arch abi
#TARGET_ARCH_ABI=armv7hf # for Raspberry Pi 3B
PADDLE_LITE_DIR="$(pwd)/../../../../../_data/inference_lite_lib.with_log/cxx"
IMAGES_DIR="$(pwd)/../../../../assets/images"
LABELS_DIR="$(pwd)/../../../../assets/labels"
MODELS_DIR="$(pwd)/../../../../../_data"
if [ "x$1" != "x" ]; then
TARGET_ARCH_ABI=$1
fi
# build
rm -rf build
mkdir build
cd build
cmake -DPADDLE_LITE_DIR=${PADDLE_LITE_DIR} -DTARGET_ARCH_ABI=${TARGET_ARCH_ABI} ..
make
#run
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${PADDLE_LITE_DIR}/lib:${PADDLE_LITE_DIR}/../third_party/mklml/lib
./image_classification ${MODELS_DIR}/mobilenet_v1_opt.nb ${LABELS_DIR}/labels.txt 3 ${IMAGES_DIR}/tabby_cat.jpg ./result.jpg
5.运行图像分类任务
进入相关目录,运行如下命令
cd ..
cd image_classification/linux/app/cxx/image_classification/
sh run.sh
运行后会得到结果图像如下
关闭图像后控制台会得到分类结果如下:
====== output summary ======
results: 3
Top0 tabby, tabby cat - 0.475015
Top1 Egyptian cat - 0.409479
Top2 tiger cat - 0.095745
Preprocess time: 0.414000 ms
Prediction time: 13.263800 ms
Postprocess time: 0.116000 ms
6. 修改文件的完整代码
6.1 CMakeLists.txt
cmake_minimum_required(VERSION 3.10)
set(CMAKE_SYSTEM_NAME Linux)
if(TARGET_ARCH_ABI STREQUAL "armv8")
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc")
set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++")
elseif(TARGET_ARCH_ABI STREQUAL "armv7hf")
set(CMAKE_SYSTEM_PROCESSOR arm)
set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc")
set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++")
else()
# message(FATAL_ERROR "Unknown arch abi ${TARGET_ARCH_ABI}, only support armv8 and armv7hf.")
# return()
endif()
project(image_classification)
message(STATUS "TARGET ARCH ABI: ${TARGET_ARCH_ABI}")
message(STATUS "PADDLE LITE DIR: ${PADDLE_LITE_DIR}")
include_directories(${PADDLE_LITE_DIR}/include)
link_directories(${PADDLE_LITE_DIR}/lib)
set(MKLML_DIR "${PADDLE_LITE_DIR}/../third_party/mklml/")
link_directories(${MKLML_DIR}/lib)
include_directories(${MKLML_DIR}/include)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if(TARGET_ARCH_ABI STREQUAL "armv8")
set(CMAKE_CXX_FLAGS "-march=armv8-a ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-march=armv8-a ${CMAKE_C_FLAGS}")
elseif(TARGET_ARCH_ABI STREQUAL "armv7hf")
set(CMAKE_CXX_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" )
endif()
find_package(OpenMP REQUIRED)
if(OpenMP_FOUND OR OpenMP_CXX_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
message(STATUS "Found OpenMP ${OpenMP_VERSION} ${OpenMP_CXX_VERSION}")
message(STATUS "OpenMP C flags: ${OpenMP_C_FLAGS}")
message(STATUS "OpenMP CXX flags: ${OpenMP_CXX_FLAGS}")
message(STATUS "OpenMP OpenMP_CXX_LIB_NAMES: ${OpenMP_CXX_LIB_NAMES}")
message(STATUS "OpenMP OpenMP_CXX_LIBRARIES: ${OpenMP_CXX_LIBRARIES}")
else()
message(FATAL_ERROR "Could not found OpenMP!")
return()
endif()
find_package(OpenCV REQUIRED)
if(OpenCV_FOUND OR OpenCV_CXX_FOUND)
include_directories(${OpenCV_INCLUDE_DIRS})
message(STATUS "OpenCV library status:")
message(STATUS " version: ${OpenCV_VERSION}")
message(STATUS " libraries: ${OpenCV_LIBS}")
message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}")
else()
message(FATAL_ERROR "Could not found OpenCV!")
return()
endif()
add_executable(image_classification image_classification.cc)
target_link_libraries(image_classification paddle_light_api_shared ${OpenCV_LIBS})
6.2 image_classification.cc
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle_api.h" // NOLINT
#include <fstream> // NOLINT
#include <limits> // NOLINT
#include <opencv2/opencv.hpp> // NOLINT
#include <stdio.h> // NOLINT
#include <sys/time.h> // NOLINT
#include <unistd.h> // NOLINT
#include <vector> // NOLINT
/////////////////////////////////////////////////////////////////////////
// If this demo is linked to static library: libpaddle_api_light_bundled.a
// , you should include `paddle_use_ops.h` and `paddle_use_kernels.h` to
// avoid linking errors such as `unsupport ops or kernels`.
/////////////////////////////////////////////////////////////////////////
// #include "paddle_use_kernels.h" // NOLINT
// #include "paddle_use_ops.h" // NOLINT
int WARMUP_COUNT = 0;
int REPEAT_COUNT = 1;
const int CPU_THREAD_NUM = 2;
const paddle::lite_api::PowerMode CPU_POWER_MODE =
paddle::lite_api::PowerMode::LITE_POWER_HIGH;
const std::vector<int64_t> INPUT_SHAPE = {1, 3, 224, 224};
struct RESULT
{
std::string class_name;
int class_id;
float score;
};
using namespace paddle::lite_api; // NOLINT
inline int64_t get_current_us()
{
struct timeval time;
gettimeofday(&time, NULL);
return 1000000LL * (int64_t)time.tv_sec + (int64_t)time.tv_usec;
}
std::vector<std::string> load_labels(const std::string &path)
{
std::ifstream file;
std::vector<std::string> labels;
file.open(path);
while (file)
{
std::string line;
std::getline(file, line);
std::string::size_type pos = line.find(" ");
if (pos != std::string::npos)
{
line = line.substr(pos);
}
labels.push_back(line);
}
file.clear();
file.close();
return labels;
}
// fill tensor with mean and scale and trans layout: nhwc -> nchw, cpp code
void cpp_mean_scale(const float *din, float *dout, int size, float *mean,
float *scale)
{
float *dout_c0 = dout;
float *dout_c1 = dout + size;
float *dout_c2 = dout + size * 2;
for (int i = 0; i < size; ++i)
{
*(dout_c0++) = (*(din++) - mean[0]) / scale[0];
*(dout_c1++) = (*(din++) - mean[1]) / scale[1];
*(dout_c2++) = (*(din++) - mean[2]) / scale[2];
}
}
void pre_process(std::shared_ptr<PaddlePredictor> predictor, cv::Mat img,
int width, int height)
{
// Prepare input data from image
std::unique_ptr<Tensor> input_tensor(std::move(predictor->GetInput(0)));
input_tensor->Resize({1, 3, height, width});
// read img and pre-process
float means[3] = {0.485f, 0.456f, 0.406f};
float scales[3] = {0.229f, 0.224f, 0.225f};
cv::Mat resize_image;
cv::resize(img, resize_image, cv::Size(height, width), 0, 0);
if (resize_image.channels() == 4)
{
// cv::cvtColor(resize_image, resize_image, CV_BGRA2RGB); // for OpenCV 3.x
cv::cvtColor(resize_image, resize_image, cv::COLOR_BGRA2RGB); // for OpenCV 4.x
}
cv::Mat norm_image;
resize_image.convertTo(norm_image, CV_32FC3, 1 / 255.f);
const float *dimg = reinterpret_cast<const float *>(norm_image.data);
auto *data = input_tensor->mutable_data<float>();
cpp_mean_scale(dimg, data, width * height, means, scales);
}
std::vector<RESULT>
post_process(std::shared_ptr<PaddlePredictor> predictor, const int topk,
const std::vector<std::string> &labels, // NOLINT
cv::Mat &output_image)
{ // NOLINT
std::unique_ptr<const Tensor> output_tensor(
std::move(predictor->GetOutput(0)));
auto *scores = output_tensor->data<float>();
auto shape_out = output_tensor->shape();
int64_t size = 1;
for (auto &i : shape_out)
{
size *= i;
}
std::vector<std::pair<float, int>> vec;
vec.resize(size);
for (int i = 0; i < size; i++)
{
vec[i] = std::make_pair(scores[i], i);
}
std::partial_sort(vec.begin(), vec.begin() + topk, vec.end(),
std::greater<std::pair<float, int>>());
std::vector<RESULT> results(topk);
for (int i = 0; i < topk; i++)
{
float score = vec[i].first;
int index = vec[i].second;
results[i].class_name = "Unknown";
if (index >= 0 && index < labels.size())
{
results[i].class_name = labels[index];
}
results[i].score = score;
cv::putText(output_image,
"Top" + std::to_string(i + 1) + "." + results[i].class_name +
":" + std::to_string(results[i].score),
cv::Point2d(5, i * 18 + 20), cv::FONT_HERSHEY_PLAIN, 1,
cv::Scalar(51, 255, 255));
}
return results;
}
cv::Mat process(const std::string model_file, cv::Mat &input_image, // NOLINT
const std::vector<std::string> &word_labels, // NOLINT
const int topk,
std::shared_ptr<paddle::lite_api::PaddlePredictor> predictor)
{
// Preprocess image and fill the data of input tensor
double preprocess_start_time = get_current_us();
pre_process(predictor, input_image, INPUT_SHAPE[3], INPUT_SHAPE[2]);
double preprocess_end_time = get_current_us();
double preprocess_time =
(preprocess_end_time - preprocess_start_time) / 1000.0f;
// Run predictor
// warm up to skip the first inference and get more stable time, remove it in
// actual products
for (int i = 0; i < WARMUP_COUNT; i++)
{
predictor->Run();
}
// repeat to obtain the average time, set REPEAT_COUNT=1 in actual products
double sum_duration = 0.0;
double max_duration = 1e-5;
double min_duration = 1e5;
double avg_duration = -1;
for (int i = 0; i < REPEAT_COUNT; i++)
{
auto start = get_current_us();
predictor->Run();
auto end = get_current_us();
double cur_time_cost = (end - start) / 1000.0f;
if (cur_time_cost > max_duration)
{
max_duration = cur_time_cost;
}
if (cur_time_cost < min_duration)
{
min_duration = cur_time_cost;
}
sum_duration += cur_time_cost;
printf("iter %d cost: %f ms\n", i, cur_time_cost);
}
avg_duration = sum_duration / static_cast<float>(REPEAT_COUNT);
printf("warmup: %d repeat: %d, average: %f ms, max: %f ms, min: %f ms\n",
WARMUP_COUNT, REPEAT_COUNT, avg_duration, max_duration, min_duration);
// 5. Get output and postprocess to output detected objects
std::cout << "\n====== output summary ====== " << std::endl;
cv::Mat output_image = input_image.clone();
double postprocess_start_time = get_current_us();
std::vector<RESULT> results =
post_process(predictor, topk, word_labels, output_image);
double postprocess_end_time = get_current_us();
double postprocess_time =
(postprocess_end_time - postprocess_start_time) / 1000.0f;
printf("results: %ld\n", results.size());
for (int i = 0; i < results.size(); i++)
{
printf("Top%d %s - %f\n", i, results[i].class_name.c_str(),
results[i].score);
}
printf("Preprocess time: %f ms\n", preprocess_time);
printf("Prediction time: %f ms\n", avg_duration);
printf("Postprocess time: %f ms\n\n", postprocess_time);
return output_image;
}
void run_model(const std::string model_file,
const std::vector<std::string> &word_labels, const int topk,
bool use_cap, std::string input_image_path,
std::string output_image_path)
{
// 1. Set MobileConfig
MobileConfig config;
config.set_model_from_file(model_file);
config.set_power_mode(CPU_POWER_MODE);
config.set_threads(CPU_THREAD_NUM);
// 2. Create PaddlePredictor by MobileConfig
std::shared_ptr<PaddlePredictor> predictor =
CreatePaddlePredictor<MobileConfig>(config);
// 3. Prepare input data from image
if (use_cap)
{
cv::VideoCapture cap(-1);
// cap.set(CV_CAP_PROP_FRAME_WIDTH, 640); // for OpenCV 3.x
// cap.set(CV_CAP_PROP_FRAME_HEIGHT, 480);
cap.set(cv::CAP_PROP_FRAME_WIDTH, 640); // for OpenCV 4.x
cap.set(cv::CAP_PROP_FRAME_HEIGHT, 480);
if (!cap.isOpened())
{
return;
}
while (1)
{
cv::Mat input_image;
cap >> input_image;
cv::Mat output_image =
process(model_file, input_image, word_labels, topk, predictor);
cv::imshow("image classification", output_image);
if (cv::waitKey(1) == char('q'))
{ // NOLINT
break;
}
}
cap.release();
cv::destroyAllWindows();
}
else
{
cv::Mat input_image = cv::imread(input_image_path, 1);
cv::Mat output_image =
process(model_file, input_image, word_labels, topk, predictor);
cv::imwrite(output_image_path, output_image);
cv::imshow("image classification", output_image);
cv::waitKey(0);
}
}
int main(int argc, char **argv)
{
if (argc < 3)
{
std::cerr << "[ERROR] usage: " << argv[0]
<< " ./image_classification_demo model_dir label_path [top_k] "
<< "[input_image_path] [output_image_path] \n"
<< "use images from camera if input_image_path isn't provided \n";
exit(1);
}
std::string model_path = argv[1];
std::string label_path = argv[2];
int topk = 1;
if (argc > 3)
{
topk = atoi(argv[3]);
}
// Load Labels
std::vector<std::string> word_labels = load_labels(label_path);
std::string input_image_path = "";
std::string output_image_path = "";
bool use_cap = true;
if (argc > 4)
{
input_image_path = argv[4];
WARMUP_COUNT = 1;
REPEAT_COUNT = 5;
use_cap = false;
}
if (argc > 4)
{
output_image_path = argv[5];
}
run_model(model_path, word_labels, topk, use_cap, input_image_path,
output_image_path);
return 0;
}
6.3 run.sh
#!/bin/bash
# configure
TARGET_ARCH_ABI=x86
# TARGET_ARCH_ABI=armv8 # for RK3399, set to default arch abi
#TARGET_ARCH_ABI=armv7hf # for Raspberry Pi 3B
PADDLE_LITE_DIR="$(pwd)/../../../../../_data/inference_lite_lib.with_log/cxx"
IMAGES_DIR="$(pwd)/../../../../assets/images"
LABELS_DIR="$(pwd)/../../../../assets/labels"
MODELS_DIR="$(pwd)/../../../../../_data"
if [ "x$1" != "x" ]; then
TARGET_ARCH_ABI=$1
fi
# build
rm -rf build
mkdir build
cd build
cmake -DPADDLE_LITE_DIR=${PADDLE_LITE_DIR} -DTARGET_ARCH_ABI=${TARGET_ARCH_ABI} ..
make
#run
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${PADDLE_LITE_DIR}/lib:${PADDLE_LITE_DIR}/../third_party/mklml/lib
./image_classification ${MODELS_DIR}/mobilenet_v1_opt.nb ${LABELS_DIR}/labels.txt 3 ${IMAGES_DIR}/tabby_cat.jpg ./result.jpg
网友评论