configure_hardware.cc 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/runtime/backends/lite/lite_backend.h"
  15. #include <cstring>
  16. namespace ultra_infer {
  17. #if defined(__arm__) || defined(__aarch64__)
  18. #define FD_LITE_HOST TARGET(kARM)
  19. #elif defined(__x86_64__)
  20. #define FD_LITE_HOST TARGET(kX86)
  21. #endif
  22. std::vector<paddle::lite_api::Place>
  23. GetPlacesForCpu(const LiteBackendOption &option) {
  24. std::vector<paddle::lite_api::Place> valid_places;
  25. valid_places.push_back(
  26. paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kInt8)});
  27. if (option.enable_fp16) {
  28. paddle::lite_api::MobileConfig check_fp16_config;
  29. if (check_fp16_config.check_fp16_valid()) {
  30. valid_places.push_back(
  31. paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFP16)});
  32. } else {
  33. FDWARNING << "Current CPU doesn't support float16 precision, will "
  34. "fallback to float32."
  35. << std::endl;
  36. }
  37. }
  38. valid_places.push_back(
  39. paddle::lite_api::Place{FD_LITE_HOST, PRECISION(kFloat)});
  40. return valid_places;
  41. }
  42. void LiteBackend::ConfigureCpu(const LiteBackendOption &option) {
  43. config_.set_valid_places(GetPlacesForCpu(option));
  44. }
  45. void LiteBackend::ConfigureGpu(const LiteBackendOption &option) {
  46. std::vector<paddle::lite_api::Place> valid_places;
  47. if (option.enable_fp16) {
  48. valid_places.emplace_back(paddle::lite_api::Place{
  49. TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageDefault)});
  50. valid_places.emplace_back(paddle::lite_api::Place{
  51. TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageFolder)});
  52. }
  53. valid_places.emplace_back(
  54. paddle::lite_api::Place{TARGET(kOpenCL), PRECISION(kFloat)});
  55. valid_places.emplace_back(paddle::lite_api::Place{
  56. TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageDefault)});
  57. valid_places.emplace_back(paddle::lite_api::Place{
  58. TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageFolder)});
  59. valid_places.emplace_back(
  60. paddle::lite_api::Place{TARGET(kOpenCL), PRECISION(kAny)});
  61. valid_places.emplace_back(
  62. paddle::lite_api::Place{TARGET(kOpenCL), PRECISION(kInt32)});
  63. valid_places.emplace_back(
  64. paddle::lite_api::Place{TARGET(kARM), PRECISION(kInt8)});
  65. valid_places.emplace_back(
  66. paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)});
  67. config_.set_valid_places(valid_places);
  68. }
  69. void LiteBackend::ConfigureKunlunXin(const LiteBackendOption &option) {
  70. std::vector<paddle::lite_api::Place> valid_places;
  71. // TODO(yeliang): Placing kInt8 first may cause accuracy issues of some model
  72. // valid_places.push_back(
  73. // paddle::lite_api::Place{TARGET(kXPU), PRECISION(kInt8)});
  74. if (option.enable_fp16) {
  75. valid_places.push_back(
  76. paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFP16)});
  77. }
  78. valid_places.push_back(
  79. paddle::lite_api::Place{TARGET(kXPU), PRECISION(kFloat)});
  80. config_.set_xpu_dev_per_thread(option.device_id);
  81. config_.set_xpu_workspace_l3_size_per_thread(
  82. option.kunlunxin_l3_workspace_size);
  83. config_.set_xpu_l3_cache_method(option.kunlunxin_l3_workspace_size,
  84. option.kunlunxin_locked);
  85. config_.set_xpu_l3_cache_autotune(option.kunlunxin_autotune);
  86. config_.set_xpu_conv_autotune(option.kunlunxin_autotune,
  87. option.kunlunxin_autotune_file);
  88. config_.set_xpu_multi_encoder_method(option.kunlunxin_precision,
  89. option.kunlunxin_adaptive_seqlen);
  90. config_.set_xpu_gm_workspace_method(option.kunlunxin_gm_default_size);
  91. if (option.kunlunxin_enable_multi_stream) {
  92. config_.enable_xpu_multi_stream();
  93. }
  94. auto cpu_places = GetPlacesForCpu(option);
  95. valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
  96. config_.set_valid_places(valid_places);
  97. }
  98. void LiteBackend::ConfigureTimvx(const LiteBackendOption &option) {
  99. config_.set_nnadapter_device_names({"verisilicon_timvx"});
  100. std::vector<paddle::lite_api::Place> valid_places;
  101. valid_places.push_back(
  102. paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
  103. valid_places.push_back(
  104. paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
  105. auto cpu_places = GetPlacesForCpu(option);
  106. valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
  107. config_.set_valid_places(valid_places);
  108. ConfigureNNAdapter(option);
  109. }
  110. void LiteBackend::ConfigureAscend(const LiteBackendOption &option) {
  111. config_.set_nnadapter_device_names({"huawei_ascend_npu"});
  112. std::vector<paddle::lite_api::Place> valid_places;
  113. valid_places.push_back(
  114. paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kInt8)});
  115. valid_places.push_back(
  116. paddle::lite_api::Place{TARGET(kNNAdapter), PRECISION(kFloat)});
  117. auto cpu_places = GetPlacesForCpu(option);
  118. valid_places.insert(valid_places.end(), cpu_places.begin(), cpu_places.end());
  119. config_.set_valid_places(valid_places);
  120. ConfigureNNAdapter(option);
  121. }
  122. void LiteBackend::ConfigureNNAdapter(const LiteBackendOption &option) {
  123. if (!option.nnadapter_subgraph_partition_config_path.empty()) {
  124. std::vector<char> nnadapter_subgraph_partition_config_buffer;
  125. if (ReadFile(option.nnadapter_subgraph_partition_config_path,
  126. &nnadapter_subgraph_partition_config_buffer, false)) {
  127. if (!nnadapter_subgraph_partition_config_buffer.empty()) {
  128. std::string nnadapter_subgraph_partition_config_string(
  129. nnadapter_subgraph_partition_config_buffer.data(),
  130. nnadapter_subgraph_partition_config_buffer.size());
  131. config_.set_nnadapter_subgraph_partition_config_buffer(
  132. nnadapter_subgraph_partition_config_string);
  133. }
  134. }
  135. }
  136. if (!option.nnadapter_context_properties.empty()) {
  137. config_.set_nnadapter_context_properties(
  138. option.nnadapter_context_properties);
  139. }
  140. if (!option.nnadapter_model_cache_dir.empty()) {
  141. config_.set_nnadapter_model_cache_dir(option.nnadapter_model_cache_dir);
  142. }
  143. if (!option.nnadapter_mixed_precision_quantization_config_path.empty()) {
  144. config_.set_nnadapter_mixed_precision_quantization_config_path(
  145. option.nnadapter_mixed_precision_quantization_config_path);
  146. }
  147. if (!option.nnadapter_subgraph_partition_config_path.empty()) {
  148. config_.set_nnadapter_subgraph_partition_config_path(
  149. option.nnadapter_subgraph_partition_config_path);
  150. }
  151. config_.set_nnadapter_dynamic_shape_info(option.nnadapter_dynamic_shape_info);
  152. }
  153. } // namespace ultra_infer