5 年之前 · 1a1f8c81cc
--- a/deploy/cpp/demo/classifier.cpp
+++ b/deploy/cpp/demo/classifier.cpp
@@ -34,6 +34,7 @@ DEFINE_string(key, "", "key of encryption");
 
				 DEFINE_string(image, "", "Path of test image file");

			
 
				 DEFINE_string(image_list, "", "Path of test image list file");

			
 
				 DEFINE_int32(batch_size, 1, "Batch size of infering");

			
 
				+DEFINE_int32(thread_num, omp_get_num_procs(), "Number of preprocessing threads");

			
 
				 

			
 
				 int main(int argc, char** argv) {

			
 
				   // Parsing command-line

			
@@ -75,12 +76,13 @@ int main(int argc, char** argv) {
 
				       int im_vec_size = std::min((int)image_paths.size(), i + FLAGS_batch_size);      

			
 
				       std::vector<cv::Mat> im_vec(im_vec_size - i);

			
 
				       std::vector<PaddleX::ClsResult> results(im_vec_size - i, PaddleX::ClsResult());

			
 
				-      #pragma omp parallel for num_threads(im_vec_size - i)

			
 
				+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);

			
 
				+      #pragma omp parallel for num_threads(thread_num)

			
 
				       for(int j = i; j < im_vec_size; ++j){

			
 
				         im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));

			
 
				       }

			
 
				       auto imread_end = system_clock::now();

			
 
				-      model.predict(im_vec, results);

			
 
				+      model.predict(im_vec, results, thread_num);

			
 
				 

			
 
				       auto imread_duration = duration_cast<microseconds>(imread_end - start);

			
 
				       total_imread_time_s += double(imread_duration.count()) * microseconds::period::num / microseconds::period::den;

			
--- a/deploy/cpp/demo/detector.cpp
+++ b/deploy/cpp/demo/detector.cpp
@@ -36,12 +36,14 @@ DEFINE_string(key, "", "key of encryption");
 
				 DEFINE_string(image, "", "Path of test image file");

			
 
				 DEFINE_string(image_list, "", "Path of test image list file");

			
 
				 DEFINE_string(save_dir, "output", "Path to save visualized image");

			
 
				-DEFINE_int32(batch_size, 1, "");

			
 
				+DEFINE_int32(batch_size, 1, "Batch size of infering");

			
 
				+DEFINE_double(threshold, 0.5, "The minimum scores of target boxes which are shown");

			
 
				+DEFINE_int32(thread_num, omp_get_num_procs(), "Number of preprocessing threads");

			
 
				 

			
 
				 int main(int argc, char** argv) {

			
 
				   // 解析命令行参数

			
 
				   google::ParseCommandLineFlags(&argc, &argv, true);

			
 
				-

			
 
				+  

			
 
				   if (FLAGS_model_dir == "") {

			
 
				     std::cerr << "--model_dir need to be defined" << std::endl;

			
 
				     return -1;

			
@@ -50,7 +52,7 @@ int main(int argc, char** argv) {
 
				     std::cerr << "--image or --image_list need to be defined" << std::endl;

			
 
				     return -1;

			
 
				   }

			
 
				-

			
 
				+  std::cout << "Thread num: " << FLAGS_thread_num << std::endl;

			
 
				   // 加载模型

			
 
				   PaddleX::Model model;

			
 
				   model.Init(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_trt, FLAGS_gpu_id, FLAGS_key, FLAGS_batch_size);

			
@@ -78,12 +80,13 @@ int main(int argc, char** argv) {
 
				       int im_vec_size = std::min((int)image_paths.size(), i + FLAGS_batch_size);

			
 
				       std::vector<cv::Mat> im_vec(im_vec_size - i);

			
 
				       std::vector<PaddleX::DetResult> results(im_vec_size - i, PaddleX::DetResult());

			
 
				-      #pragma omp parallel for num_threads(im_vec_size - i)

			
 
				+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);

			
 
				+      #pragma omp parallel for num_threads(thread_num)

			
 
				       for(int j = i; j < im_vec_size; ++j){

			
 
				         im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));

			
 
				       }

			
 
				       auto imread_end = system_clock::now();

			
 
				-      model.predict(im_vec, results);

			
 
				+      model.predict(im_vec, results, thread_num);

			
 
				       auto imread_duration = duration_cast<microseconds>(imread_end - start);

			
 
				       total_imread_time_s += double(imread_duration.count()) * microseconds::period::num / microseconds::period::den;

			
 
				       auto end = system_clock::now();

			
@@ -106,7 +109,7 @@ int main(int argc, char** argv) {
 
				       // 可视化

			
 
				       for(int j = 0; j < im_vec_size - i; ++j) {

			
 
				         cv::Mat vis_img =

			
 
				-            PaddleX::Visualize(im_vec[j], results[j], model.labels, colormap, 0.5);

			
 
				+            PaddleX::Visualize(im_vec[j], results[j], model.labels, colormap, FLAGS_threshold);

			
 
				         std::string save_path =

			
 
				             PaddleX::generate_save_path(FLAGS_save_dir, image_paths[i + j]);

			
 
				         cv::imwrite(save_path, vis_img);

			
@@ -130,7 +133,7 @@ int main(int argc, char** argv) {
 
				 

			
 
				     // 可视化

			
 
				     cv::Mat vis_img =

			
 
				-        PaddleX::Visualize(im, result, model.labels, colormap, 0.5);

			
 
				+        PaddleX::Visualize(im, result, model.labels, colormap, FLAGS_threshold);

			
 
				     std::string save_path =

			
 
				         PaddleX::generate_save_path(FLAGS_save_dir, FLAGS_image);

			
 
				     cv::imwrite(save_path, vis_img);

			
--- a/deploy/cpp/demo/segmenter.cpp
+++ b/deploy/cpp/demo/segmenter.cpp
@@ -36,6 +36,7 @@ DEFINE_string(image, "", "Path of test image file");
 
				 DEFINE_string(image_list, "", "Path of test image list file");

			
 
				 DEFINE_string(save_dir, "output", "Path to save visualized image");

			
 
				 DEFINE_int32(batch_size, 1, "Batch size of infering");

			
 
				+DEFINE_int32(thread_num, omp_get_num_procs(), "Number of preprocessing threads");

			
 
				 

			
 
				 int main(int argc, char** argv) {

			
 
				   // 解析命令行参数

			
@@ -76,12 +77,13 @@ int main(int argc, char** argv) {
 
				       int im_vec_size = std::min((int)image_paths.size(), i + FLAGS_batch_size);

			
 
				       std::vector<cv::Mat> im_vec(im_vec_size - i);

			
 
				       std::vector<PaddleX::SegResult> results(im_vec_size - i, PaddleX::SegResult());

			
 
				-      #pragma omp parallel for num_threads(im_vec_size - i)

			
 
				+      int thread_num = std::min(FLAGS_thread_num, im_vec_size - i);

			
 
				+      #pragma omp parallel for num_threads(thread_num)

			
 
				       for(int j = i; j < im_vec_size; ++j){

			
 
				         im_vec[j - i] = std::move(cv::imread(image_paths[j], 1));

			
 
				       }

			
 
				       auto imread_end = system_clock::now();

			
 
				-      model.predict(im_vec, results);

			
 
				+      model.predict(im_vec, results, thread_num);

			
 
				       auto imread_duration = duration_cast<microseconds>(imread_end - start);

			
 
				       total_imread_time_s += double(imread_duration.count()) * microseconds::period::num / microseconds::period::den;

			
 
				       auto end = system_clock::now();

			
--- a/deploy/cpp/include/paddlex/paddlex.h
+++ b/deploy/cpp/include/paddlex/paddlex.h
@@ -61,19 +61,19 @@ class Model {
 
				 

			
 
				   bool preprocess(const cv::Mat& input_im, ImageBlob* blob);

			
 
				   

			
 
				-  bool preprocess(const std::vector<cv::Mat> &input_im_batch, std::vector<ImageBlob> &blob_batch);

			
 
				+  bool preprocess(const std::vector<cv::Mat> &input_im_batch, std::vector<ImageBlob> &blob_batch, int thread_num = 1);

			
 
				 

			
 
				   bool predict(const cv::Mat& im, ClsResult* result);

			
 
				 

			
 
				-  bool predict(const std::vector<cv::Mat> &im_batch, std::vector<ClsResult> &results);

			
 
				+  bool predict(const std::vector<cv::Mat> &im_batch, std::vector<ClsResult> &results, int thread_num = 1);

			
 
				 

			
 
				   bool predict(const cv::Mat& im, DetResult* result);

			
 
				 

			
 
				-  bool predict(const std::vector<cv::Mat> &im_batch, std::vector<DetResult> &result);

			
 
				+  bool predict(const std::vector<cv::Mat> &im_batch, std::vector<DetResult> &result, int thread_num = 1);

			
 
				   

			
 
				   bool predict(const cv::Mat& im, SegResult* result);

			
 
				 

			
 
				-  bool predict(const std::vector<cv::Mat> &im_batch, std::vector<SegResult> &result);

			
 
				+  bool predict(const std::vector<cv::Mat> &im_batch, std::vector<SegResult> &result, int thread_num = 1);

			
 
				   

			
 
				   std::string type;

			
 
				   std::string name;

			
--- a/deploy/cpp/src/paddlex.cpp
+++ b/deploy/cpp/src/paddlex.cpp
@@ -110,12 +110,13 @@ bool Model::preprocess(const cv::Mat& input_im, ImageBlob* blob) {
 
				 }

			
 
				 

			
 
				 // use openmp

			
 
				-bool Model::preprocess(const std::vector<cv::Mat> &input_im_batch, std::vector<ImageBlob> &blob_batch) {

			
 
				+bool Model::preprocess(const std::vector<cv::Mat> &input_im_batch, std::vector<ImageBlob> &blob_batch, int thread_num) {

			
 
				   int batch_size = input_im_batch.size();

			
 
				   bool success = true;

			
 
				   int max_h = -1;

			
 
				   int max_w = -1;

			
 
				-  #pragma omp parallel for num_threads(batch_size)

			
 
				+  thread_num = std::min(thread_num, batch_size);

			
 
				+  #pragma omp parallel for num_threads(thread_num)

			
 
				   for(int i = 0; i < input_im_batch.size(); ++i) {

			
 
				     cv::Mat im = input_im_batch[i].clone();

			
 
				     if(!transforms_.Run(&im, &blob_batch[i])){

			
@@ -164,7 +165,7 @@ bool Model::predict(const cv::Mat& im, ClsResult* result) {
 
				   return true;

			
 
				 }

			
 
				 

			
 
				-bool Model::predict(const std::vector<cv::Mat> &im_batch, std::vector<ClsResult> &results) {

			
 
				+bool Model::predict(const std::vector<cv::Mat> &im_batch, std::vector<ClsResult> &results, int thread_num) {

			
 
				   for(auto &inputs: inputs_batch_) {

			
 
				     inputs.clear();

			
 
				   }

			
@@ -180,7 +181,7 @@ bool Model::predict(const std::vector<cv::Mat> &im_batch, std::vector<ClsResult>
 
				     return false;

			
 
				   }

			
 
				   // 处理输入图像

			
 
				-  if (!preprocess(im_batch, inputs_batch_)) {

			
 
				+  if (!preprocess(im_batch, inputs_batch_, thread_num)) {

			
 
				     std::cerr << "Preprocess failed!" << std::endl;

			
 
				     return false;

			
 
				   }

			
@@ -326,7 +327,7 @@ bool Model::predict(const cv::Mat& im, DetResult* result) {
 
				   return true;

			
 
				 }

			
 
				 

			
 
				-bool Model::predict(const std::vector<cv::Mat> &im_batch, std::vector<DetResult> &result) {

			
 
				+bool Model::predict(const std::vector<cv::Mat> &im_batch, std::vector<DetResult> &result, int thread_num) {

			
 
				   if (type == "classifier") {

			
 
				     std::cerr << "Loading model is a 'classifier', ClsResult should be passed "

			
 
				                  "to function predict()!"

			
@@ -341,7 +342,7 @@ bool Model::predict(const std::vector<cv::Mat> &im_batch, std::vector<DetResult>
 
				 

			
 
				   int batch_size = im_batch.size();

			
 
				   // 处理输入图像

			
 
				-  if (!preprocess(im_batch, inputs_batch_)) {

			
 
				+  if (!preprocess(im_batch, inputs_batch_, thread_num)) {

			
 
				     std::cerr << "Preprocess failed!" << std::endl;

			
 
				     return false;

			
 
				   }

			
@@ -357,7 +358,8 @@ bool Model::predict(const std::vector<cv::Mat> &im_batch, std::vector<DetResult>
 
				                   << ", " << inputs_batch_[i].new_im_size_[1] 

			
 
				                   <<  ")" << std::endl;

			
 
				       }

			
 
				-      #pragma omp parallel for num_threads(batch_size)

			
 
				+      thread_num = std::min(thread_num, batch_size);

			
 
				+      #pragma omp parallel for num_threads(thread_num)

			
 
				       for(int i = 0; i < batch_size; ++i) {

			
 
				         int h = inputs_batch_[i].new_im_size_[0];

			
 
				         int w = inputs_batch_[i].new_im_size_[1];

			
@@ -597,7 +599,7 @@ bool Model::predict(const cv::Mat& im, SegResult* result) {
 
				   return true;

			
 
				 }

			
 
				 

			
 
				-bool Model::predict(const std::vector<cv::Mat> &im_batch, std::vector<SegResult> &result) {

			
 
				+bool Model::predict(const std::vector<cv::Mat> &im_batch, std::vector<SegResult> &result, int thread_num) {

			
 
				   for(auto &inputs: inputs_batch_) {

			
 
				     inputs.clear();

			
 
				   }

			
@@ -614,7 +616,7 @@ bool Model::predict(const std::vector<cv::Mat> &im_batch, std::vector<SegResult>
 
				   }

			
 
				 

			
 
				   // 处理输入图像

			
 
				-  if (!preprocess(im_batch, inputs_batch_)) {

			
 
				+  if (!preprocess(im_batch, inputs_batch_, thread_num)) {

			
 
				     std::cerr << "Preprocess failed!" << std::endl;

			
 
				     return false;

			
 
				   }