postprocessor.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/vision/detection/ppdet/postprocessor.h"
  15. #include "ultra_infer/vision/utils/utils.h"
  16. #include "yaml-cpp/yaml.h"
  17. namespace ultra_infer {
  18. namespace vision {
  19. namespace detection {
  20. bool PaddleDetPostprocessor::ProcessMask(
  21. const FDTensor &tensor, std::vector<DetectionResult> *results) {
  22. auto shape = tensor.Shape();
  23. int64_t out_mask_w = shape[2];
  24. int64_t out_mask_numel = shape[1] * shape[2];
  25. const auto *data = reinterpret_cast<const uint32_t *>(tensor.CpuData());
  26. int index = 0;
  27. for (int i = 0; i < results->size(); ++i) {
  28. (*results)[i].contain_masks = true;
  29. (*results)[i].masks.resize((*results)[i].boxes.size());
  30. for (int j = 0; j < (*results)[i].boxes.size(); ++j) {
  31. int x1 = static_cast<int>(round((*results)[i].boxes[j][0]));
  32. int y1 = static_cast<int>(round((*results)[i].boxes[j][1]));
  33. int x2 = static_cast<int>(round((*results)[i].boxes[j][2]));
  34. int y2 = static_cast<int>(round((*results)[i].boxes[j][3]));
  35. int keep_mask_h = y2 - y1;
  36. int keep_mask_w = x2 - x1;
  37. int keep_mask_numel = keep_mask_h * keep_mask_w;
  38. (*results)[i].masks[j].Resize(keep_mask_numel);
  39. (*results)[i].masks[j].shape = {keep_mask_h, keep_mask_w};
  40. const uint32_t *current_ptr = data + index * out_mask_numel;
  41. auto *keep_mask_ptr =
  42. reinterpret_cast<uint32_t *>((*results)[i].masks[j].Data());
  43. for (int row = y1; row < y2; ++row) {
  44. size_t keep_nbytes_in_col = keep_mask_w * sizeof(uint32_t);
  45. const uint32_t *out_row_start_ptr = current_ptr + row * out_mask_w + x1;
  46. uint32_t *keep_row_start_ptr = keep_mask_ptr + (row - y1) * keep_mask_w;
  47. std::memcpy(keep_row_start_ptr, out_row_start_ptr, keep_nbytes_in_col);
  48. }
  49. index += 1;
  50. }
  51. }
  52. return true;
  53. }
  54. bool PaddleDetPostprocessor::ProcessWithNMS(
  55. const std::vector<FDTensor> &tensors,
  56. std::vector<DetectionResult> *results) {
  57. // Get number of boxes for each input image
  58. std::vector<int> num_boxes(tensors[1].shape[0]);
  59. int total_num_boxes = 0;
  60. if (tensors[1].dtype == FDDataType::INT32) {
  61. const auto *data = static_cast<const int32_t *>(tensors[1].CpuData());
  62. for (size_t i = 0; i < tensors[1].shape[0]; ++i) {
  63. num_boxes[i] = static_cast<int>(data[i]);
  64. total_num_boxes += num_boxes[i];
  65. }
  66. } else if (tensors[1].dtype == FDDataType::INT64) {
  67. const auto *data = static_cast<const int64_t *>(tensors[1].CpuData());
  68. for (size_t i = 0; i < tensors[1].shape[0]; ++i) {
  69. num_boxes[i] = static_cast<int>(data[i]);
  70. total_num_boxes += num_boxes[i];
  71. }
  72. }
  73. // Special case for TensorRT, it has fixed output shape of NMS
  74. // So there's invalid boxes in its' output boxes
  75. int num_output_boxes = static_cast<int>(tensors[0].Shape()[0]);
  76. bool contain_invalid_boxes = false;
  77. if (total_num_boxes != num_output_boxes) {
  78. if (num_output_boxes % num_boxes.size() == 0) {
  79. contain_invalid_boxes = true;
  80. } else {
  81. FDERROR << "Cannot handle the output data for this model, unexpected "
  82. "situation."
  83. << std::endl;
  84. return false;
  85. }
  86. }
  87. // Get boxes for each input image
  88. results->resize(num_boxes.size());
  89. if (tensors[0].shape[0] == 0) {
  90. // No detected boxes
  91. return true;
  92. }
  93. const auto *box_data = static_cast<const float *>(tensors[0].CpuData());
  94. int offset = 0;
  95. for (size_t i = 0; i < num_boxes.size(); ++i) {
  96. const float *ptr = box_data + offset;
  97. (*results)[i].Reserve(num_boxes[i]);
  98. for (size_t j = 0; j < num_boxes[i]; ++j) {
  99. (*results)[i].label_ids.push_back(
  100. static_cast<int32_t>(round(ptr[j * 6])));
  101. (*results)[i].scores.push_back(ptr[j * 6 + 1]);
  102. (*results)[i].boxes.emplace_back(std::array<float, 4>(
  103. {ptr[j * 6 + 2], ptr[j * 6 + 3], ptr[j * 6 + 4], ptr[j * 6 + 5]}));
  104. }
  105. if (contain_invalid_boxes) {
  106. offset += static_cast<int>(num_output_boxes * 6 / num_boxes.size());
  107. } else {
  108. offset += static_cast<int>(num_boxes[i] * 6);
  109. }
  110. }
  111. return true;
  112. }
  113. bool PaddleDetPostprocessor::ProcessWithoutNMS(
  114. const std::vector<FDTensor> &tensors,
  115. std::vector<DetectionResult> *results) {
  116. int boxes_index = 0;
  117. int scores_index = 1;
  118. // Judge the index of the input Tensor
  119. if (tensors[0].shape[1] == tensors[1].shape[2]) {
  120. boxes_index = 0;
  121. scores_index = 1;
  122. } else if (tensors[0].shape[2] == tensors[1].shape[1]) {
  123. boxes_index = 1;
  124. scores_index = 0;
  125. } else {
  126. FDERROR << "The shape of boxes and scores should be [batch, boxes_num, "
  127. "4], [batch, classes_num, boxes_num]"
  128. << std::endl;
  129. return false;
  130. }
  131. // do multi class nms
  132. multi_class_nms_.Compute(
  133. static_cast<const float *>(tensors[boxes_index].Data()),
  134. static_cast<const float *>(tensors[scores_index].Data()),
  135. tensors[boxes_index].shape, tensors[scores_index].shape);
  136. auto num_boxes = multi_class_nms_.out_num_rois_data;
  137. auto box_data =
  138. static_cast<const float *>(multi_class_nms_.out_box_data.data());
  139. // Get boxes for each input image
  140. results->resize(num_boxes.size());
  141. int offset = 0;
  142. for (size_t i = 0; i < num_boxes.size(); ++i) {
  143. const float *ptr = box_data + offset;
  144. (*results)[i].Reserve(num_boxes[i]);
  145. for (size_t j = 0; j < num_boxes[i]; ++j) {
  146. (*results)[i].label_ids.push_back(
  147. static_cast<int32_t>(round(ptr[j * 6])));
  148. (*results)[i].scores.push_back(ptr[j * 6 + 1]);
  149. (*results)[i].boxes.emplace_back(std::array<float, 4>(
  150. {ptr[j * 6 + 2], ptr[j * 6 + 3], ptr[j * 6 + 4], ptr[j * 6 + 5]}));
  151. }
  152. offset += (num_boxes[i] * 6);
  153. }
  154. // do scale
  155. if (GetScaleFactor()[0] != 0) {
  156. for (auto &result : *results) {
  157. for (auto &box : result.boxes) {
  158. box[0] /= GetScaleFactor()[1];
  159. box[1] /= GetScaleFactor()[0];
  160. box[2] /= GetScaleFactor()[1];
  161. box[3] /= GetScaleFactor()[0];
  162. }
  163. }
  164. }
  165. return true;
  166. }
  167. bool PaddleDetPostprocessor::ProcessSolov2(
  168. const std::vector<FDTensor> &tensors,
  169. std::vector<DetectionResult> *results) {
  170. if (tensors.size() != 4) {
  171. FDERROR << "The size of tensors for solov2 must be 4." << std::endl;
  172. return false;
  173. }
  174. if (tensors[0].shape[0] != 1) {
  175. FDERROR << "SOLOv2 temporarily only supports batch size is 1." << std::endl;
  176. return false;
  177. }
  178. results->clear();
  179. results->resize(1);
  180. (*results)[0].contain_masks = true;
  181. // tensor[0] means bbox data
  182. const auto bbox_data = static_cast<const int *>(tensors[0].CpuData());
  183. // tensor[1] means label data
  184. const auto label_data_ = static_cast<const int64_t *>(tensors[1].CpuData());
  185. // tensor[2] means score data
  186. const auto score_data_ = static_cast<const float *>(tensors[2].CpuData());
  187. // tensor[3] is mask data and its shape is the same as that of the image.
  188. const auto mask_data_ = static_cast<const uint8_t *>(tensors[3].CpuData());
  189. int rows = static_cast<int>(tensors[3].shape[1]);
  190. int cols = static_cast<int>(tensors[3].shape[2]);
  191. for (int bbox_id = 0; bbox_id < bbox_data[0]; ++bbox_id) {
  192. if (score_data_[bbox_id] >= multi_class_nms_.score_threshold) {
  193. DetectionResult &result_item = (*results)[0];
  194. result_item.label_ids.emplace_back(label_data_[bbox_id]);
  195. result_item.scores.emplace_back(score_data_[bbox_id]);
  196. std::vector<int> global_mask;
  197. for (int k = 0; k < rows * cols; ++k) {
  198. global_mask.push_back(
  199. static_cast<int>(mask_data_[k + bbox_id * rows * cols]));
  200. }
  201. // find minimize bounding box from mask
  202. cv::Mat mask(rows, cols, CV_32SC1);
  203. std::memcpy(mask.data, global_mask.data(),
  204. global_mask.size() * sizeof(int));
  205. cv::Mat mask_fp;
  206. mask.convertTo(mask_fp, CV_32FC1);
  207. cv::Mat rowSum;
  208. cv::Mat colSum;
  209. std::vector<float> sum_of_row(rows);
  210. std::vector<float> sum_of_col(cols);
  211. cv::reduce(mask_fp, colSum, 0, cv::REDUCE_SUM, CV_32FC1);
  212. cv::reduce(mask_fp, rowSum, 1, cv::REDUCE_SUM, CV_32FC1);
  213. for (int row_id = 0; row_id < rows; ++row_id) {
  214. sum_of_row[row_id] = rowSum.at<float>(row_id, 0);
  215. }
  216. for (int col_id = 0; col_id < cols; ++col_id) {
  217. sum_of_col[col_id] = colSum.at<float>(0, col_id);
  218. }
  219. auto it = std::find_if(sum_of_row.begin(), sum_of_row.end(),
  220. [](int x) { return x > 0.5; });
  221. float y1 = std::distance(sum_of_row.begin(), it);
  222. auto it2 = std::find_if(sum_of_col.begin(), sum_of_col.end(),
  223. [](int x) { return x > 0.5; });
  224. float x1 = std::distance(sum_of_col.begin(), it2);
  225. auto rit = std::find_if(sum_of_row.rbegin(), sum_of_row.rend(),
  226. [](int x) { return x > 0.5; });
  227. float y2 = std::distance(rit, sum_of_row.rend());
  228. auto rit2 = std::find_if(sum_of_col.rbegin(), sum_of_col.rend(),
  229. [](int x) { return x > 0.5; });
  230. float x2 = std::distance(rit2, sum_of_col.rend());
  231. result_item.boxes.emplace_back(std::array<float, 4>({x1, y1, x2, y2}));
  232. }
  233. }
  234. return true;
  235. }
  236. bool PaddleDetPostprocessor::ProcessPPYOLOER(
  237. const std::vector<FDTensor> &tensors,
  238. std::vector<DetectionResult> *results) {
  239. if (tensors.size() != 2) {
  240. FDERROR << "The size of tensors for PPYOLOER must be 2." << std::endl;
  241. return false;
  242. }
  243. int boxes_index = 0;
  244. int scores_index = 1;
  245. multi_class_nms_rotated_.Compute(
  246. static_cast<const float *>(tensors[boxes_index].Data()),
  247. static_cast<const float *>(tensors[scores_index].Data()),
  248. tensors[boxes_index].shape, tensors[scores_index].shape);
  249. auto num_boxes = multi_class_nms_rotated_.out_num_rois_data;
  250. auto box_data =
  251. static_cast<const float *>(multi_class_nms_rotated_.out_box_data.data());
  252. // Get boxes for each input image
  253. results->resize(num_boxes.size());
  254. int offset = 0;
  255. for (size_t i = 0; i < num_boxes.size(); ++i) {
  256. const float *ptr = box_data + offset;
  257. (*results)[i].Reserve(num_boxes[i]);
  258. for (size_t j = 0; j < num_boxes[i]; ++j) {
  259. (*results)[i].label_ids.push_back(
  260. static_cast<int32_t>(round(ptr[j * 10])));
  261. (*results)[i].scores.push_back(ptr[j * 10 + 1]);
  262. (*results)[i].rotated_boxes.push_back(std::array<float, 8>(
  263. {ptr[j * 10 + 2], ptr[j * 10 + 3], ptr[j * 10 + 4], ptr[j * 10 + 5],
  264. ptr[j * 10 + 6], ptr[j * 10 + 7], ptr[j * 10 + 8],
  265. ptr[j * 10 + 9]}));
  266. }
  267. offset += (num_boxes[i] * 10);
  268. }
  269. // do scale
  270. if (GetScaleFactor()[0] != 0) {
  271. for (auto &result : *results) {
  272. for (int i = 0; i < result.rotated_boxes.size(); i++) {
  273. for (int j = 0; j < 8; j++) {
  274. auto scale = i % 2 == 0 ? GetScaleFactor()[1] : GetScaleFactor()[0];
  275. result.rotated_boxes[i][j] /= float(scale);
  276. }
  277. }
  278. }
  279. }
  280. return true;
  281. }
  282. bool PaddleDetPostprocessor::Run(const std::vector<FDTensor> &tensors,
  283. std::vector<DetectionResult> *results) {
  284. if (arch_ == "SOLOv2") {
  285. // process for SOLOv2
  286. ProcessSolov2(tensors, results);
  287. // The fourth output of solov2 is mask
  288. return ProcessMask(tensors[3], results);
  289. } else {
  290. if (tensors[0].Shape().size() == 3 &&
  291. tensors[0].Shape()[2] == 8) { // PPYOLOER
  292. return ProcessPPYOLOER(tensors, results);
  293. }
  294. // Do process according to whether NMS exists.
  295. if (with_nms_) {
  296. if (!ProcessWithNMS(tensors, results)) {
  297. return false;
  298. }
  299. } else {
  300. if (!ProcessWithoutNMS(tensors, results)) {
  301. return false;
  302. }
  303. }
  304. // for only detection
  305. if (tensors.size() <= 2) {
  306. return true;
  307. }
  308. // for maskrcnn
  309. if (tensors[2].Shape()[0] != tensors[0].Shape()[0]) {
  310. FDERROR << "The first dimension of output mask tensor:"
  311. << tensors[2].Shape()[0]
  312. << " is not equal to the first dimension of output boxes tensor:"
  313. << tensors[0].Shape()[0] << "." << std::endl;
  314. return false;
  315. }
  316. // The third output of mask-rcnn is mask
  317. return ProcessMask(tensors[2], results);
  318. }
  319. }
  320. } // namespace detection
  321. } // namespace vision
  322. } // namespace ultra_infer