Browse Source

add cpu mkl supported

syyxsxx 5 years ago
parent
commit
9cc2b11901

+ 4 - 1
deploy/cpp/demo/classifier.cpp

@@ -29,6 +29,7 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -56,8 +57,10 @@ int main(int argc, char** argv) {
   model.Init(FLAGS_model_dir,
              FLAGS_use_gpu,
              FLAGS_use_trt,
+             FLAGS_use_mkl,
              FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
 
   // Predict
   int imgs = 1;

+ 4 - 1
deploy/cpp/demo/detector.cpp

@@ -31,6 +31,7 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -61,8 +62,10 @@ int main(int argc, char** argv) {
   model.Init(FLAGS_model_dir,
              FLAGS_use_gpu,
              FLAGS_use_trt,
+             FLAGS_use_mkl,
              FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
   int imgs = 1;
   std::string save_dir = "output";
   // Predict

+ 4 - 1
deploy/cpp/demo/segmenter.cpp

@@ -30,6 +30,7 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
 DEFINE_string(image, "", "Path of test image file");
@@ -58,8 +59,10 @@ int main(int argc, char** argv) {
   model.Init(FLAGS_model_dir,
              FLAGS_use_gpu,
              FLAGS_use_trt,
+             FLAGS_use_mkl,
              FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
   int imgs = 1;
   // Predict
   if (FLAGS_image_list != "") {

+ 7 - 1
deploy/cpp/demo/video_classifier.cpp

@@ -35,8 +35,12 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 DEFINE_bool(use_camera, false, "Infering with Camera");
 DEFINE_int32(camera_id, 0, "Camera id");
 DEFINE_string(video_path, "", "Path of input video");
@@ -62,8 +66,10 @@ int main(int argc, char** argv) {
   model.Init(FLAGS_model_dir,
              FLAGS_use_gpu,
              FLAGS_use_trt,
+             FLAGS_use_mkl,
              FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
 
   // Open video
   cv::VideoCapture capture;

+ 7 - 1
deploy/cpp/demo/video_detector.cpp

@@ -35,6 +35,7 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_bool(use_camera, false, "Infering with Camera");
 DEFINE_int32(camera_id, 0, "Camera id");
@@ -42,6 +43,9 @@ DEFINE_string(video_path, "", "Path of input video");
 DEFINE_bool(show_result, false, "show the result of each frame with a window");
 DEFINE_bool(save_result, true, "save the result of each frame to a video");
 DEFINE_string(key, "", "key of encryption");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 DEFINE_string(save_dir, "output", "Path to save visualized image");
 DEFINE_double(threshold,
               0.5,
@@ -64,8 +68,10 @@ int main(int argc, char** argv) {
   model.Init(FLAGS_model_dir,
              FLAGS_use_gpu,
              FLAGS_use_trt,
+             FLAGS_use_mkl,
              FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
   // Open video
   cv::VideoCapture capture;
   if (FLAGS_use_camera) {

+ 7 - 1
deploy/cpp/demo/video_segmenter.cpp

@@ -35,8 +35,12 @@ using namespace std::chrono;  // NOLINT
 DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
 DEFINE_bool(use_trt, false, "Infering with TensorRT");
+DEFINE_bool(use_mkl, true, "Infering with MKL");
 DEFINE_int32(gpu_id, 0, "GPU card id");
 DEFINE_string(key, "", "key of encryption");
+DEFINE_int32(thread_num,
+             omp_get_num_procs(),
+             "Number of preprocessing threads");
 DEFINE_bool(use_camera, false, "Infering with Camera");
 DEFINE_int32(camera_id, 0, "Camera id");
 DEFINE_string(video_path, "", "Path of input video");
@@ -62,8 +66,10 @@ int main(int argc, char** argv) {
   model.Init(FLAGS_model_dir,
              FLAGS_use_gpu,
              FLAGS_use_trt,
+             FLAGS_use_mkl,
              FLAGS_gpu_id,
-             FLAGS_key);
+             FLAGS_key,
+             FLAGS_thread_num);
   // Open video
   cv::VideoCapture capture;
   if (FLAGS_use_camera) {

+ 13 - 1
deploy/cpp/include/paddlex/paddlex.h

@@ -77,17 +77,29 @@ class Model {
   void Init(const std::string& model_dir,
             bool use_gpu = false,
             bool use_trt = false,
+            bool use_mkl = true,
             int gpu_id = 0,
             std::string key = "",
+            int thread_num = 1,
             bool use_ir_optim = true) {
-    create_predictor(model_dir, use_gpu, use_trt, gpu_id, key, use_ir_optim);
+    create_predictor(
+                     model_dir,
+                     use_gpu,
+                     use_trt,
+                     use_mkl,
+                     gpu_id,
+                     key,
+                     thread_num,
+                     use_ir_optim);
   }
 
   void create_predictor(const std::string& model_dir,
                         bool use_gpu = false,
                         bool use_trt = false,
+                        bool use_mkl = true,
                         int gpu_id = 0,
                         std::string key = "",
+                        int thread_num = 1,
                         bool use_ir_optim = true);
 
   /*

+ 6 - 0
deploy/cpp/src/paddlex.cpp

@@ -21,8 +21,10 @@ namespace PaddleX {
 void Model::create_predictor(const std::string& model_dir,
                              bool use_gpu,
                              bool use_trt,
+                             bool use_mkl,
                              int gpu_id,
                              std::string key,
+                             int thread_num,
                              bool use_ir_optim) {
   paddle::AnalysisConfig config;
   std::string model_file = model_dir + OS_PATH_SEP + "__model__";
@@ -57,6 +59,10 @@ void Model::create_predictor(const std::string& model_dir,
   if (key == "") {
     config.SetModel(model_file, params_file);
   }
+  if (use_mkl && name != "HRNet" && name != "DeepLabv3p") {
+    config.EnableMKLDNN();
+    config.SetCpuMathLibraryNumThreads(12);
+  }
   if (use_gpu) {
     config.EnableUseGpu(100, gpu_id);
   } else {