result.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #pragma once
  15. #include "opencv2/core/core.hpp"
  16. #include "ultra_infer/ultra_infer_model.h"
  17. #include <set>
  18. namespace ultra_infer {
  19. /** \brief All C++ UltraInfer Vision Models APIs are defined inside this
  20. * namespace
  21. *
  22. */
  23. namespace vision {
  24. enum ULTRAINFER_DECL ResultType {
  25. UNKNOWN_RESULT,
  26. CLASSIFY,
  27. DETECTION,
  28. SEGMENTATION,
  29. OCR,
  30. MOT,
  31. FACE_DETECTION,
  32. FACE_ALIGNMENT,
  33. FACE_RECOGNITION,
  34. MATTING,
  35. MASK,
  36. KEYPOINT_DETECTION,
  37. HEADPOSE,
  38. PERCEPTION,
  39. };
  40. struct ULTRAINFER_DECL BaseResult {
  41. ResultType type = ResultType::UNKNOWN_RESULT;
  42. };
  43. /*! @brief Classify result structure for all the image classify models
  44. */
  45. struct ULTRAINFER_DECL ClassifyResult : public BaseResult {
  46. ClassifyResult() = default;
  47. /// Classify result for an image
  48. std::vector<int32_t> label_ids;
  49. /// The confidence for each classify result
  50. std::vector<float> scores;
  51. /// The feature vector of recognizer, e.g, PP-ShiTuV2 Recognizer
  52. std::vector<float> feature;
  53. ResultType type = ResultType::CLASSIFY;
  54. /// Resize ClassifyResult data buffer
  55. void Resize(int size);
  56. /// Clear ClassifyResult
  57. void Clear();
  58. /// Clear ClassifyResult and free the memory
  59. void Free();
  60. /// Copy constructor
  61. ClassifyResult(const ClassifyResult &other) = default;
  62. /// Move assignment
  63. ClassifyResult &operator=(ClassifyResult &&other);
  64. /// Debug function, convert the result to string to print
  65. std::string Str();
  66. };
  67. /*! Mask structure, used in DetectionResult for instance segmentation models
  68. */
  69. struct ULTRAINFER_DECL Mask : public BaseResult {
  70. /// Mask data buffer
  71. std::vector<uint32_t> data;
  72. /// Shape of mask
  73. std::vector<int64_t> shape; // (H,W) ...
  74. ResultType type = ResultType::MASK;
  75. /// clear Mask result
  76. void Clear();
  77. /// Clear Mask result and free the memory
  78. void Free();
  79. /// Return a mutable pointer of the mask data buffer
  80. void *Data() { return data.data(); }
  81. /// Return a pointer of the mask data buffer for read only
  82. const void *Data() const { return data.data(); }
  83. /// Reserve size for mask data buffer
  84. void Reserve(int size);
  85. /// Resize the mask data buffer
  86. void Resize(int size);
  87. /// Debug function, convert the result to string to print
  88. std::string Str();
  89. };
  90. /*! @brief Detection result structure for all the object detection models and
  91. * instance segmentation models
  92. */
  93. struct ULTRAINFER_DECL DetectionResult : public BaseResult {
  94. DetectionResult() = default;
  95. /** \brief All the detected object boxes for an input image, the size of
  96. * `boxes` is the number of detected objects, and the element of `boxes` is a
  97. * array of 4 float values, means [xmin, ymin, xmax, ymax]
  98. */
  99. std::vector<std::array<float, 4>> boxes;
  100. /** \brief All the detected rotated object boxes for an input image, the size
  101. * of `boxes` is the number of detected objects, and the element of
  102. * `rotated_boxes` is an array of 8 float values, means [x1, y1, x2, y2, x3,
  103. * y3, x4, y4]
  104. */
  105. std::vector<std::array<float, 8>> rotated_boxes;
  106. /** \brief The confidence for all the detected objects
  107. */
  108. std::vector<float> scores;
  109. /// The classify label for all the detected objects
  110. std::vector<int32_t> label_ids;
  111. /** \brief For instance segmentation model, `masks` is the predict mask for
  112. * all the detected objects
  113. */
  114. std::vector<Mask> masks;
  115. /// Shows if the DetectionResult has mask
  116. bool contain_masks = false;
  117. ResultType type = ResultType::DETECTION;
  118. /// Copy constructor
  119. DetectionResult(const DetectionResult &res);
  120. /// Move assignment
  121. DetectionResult &operator=(DetectionResult &&other);
  122. /// Clear DetectionResult
  123. void Clear();
  124. /// Clear DetectionResult and free the memory
  125. void Free();
  126. void Reserve(int size);
  127. void Resize(int size);
  128. /// Debug function, convert the result to string to print
  129. std::string Str();
  130. };
  131. /*! @brief Detection result structure for all the object detection models and
  132. * instance segmentation models
  133. */
  134. struct ULTRAINFER_DECL PerceptionResult : public BaseResult {
  135. PerceptionResult() = default;
  136. std::vector<float> scores;
  137. std::vector<int32_t> label_ids;
  138. // xmin, ymin, xmax, ymax, h, w, l
  139. std::vector<std::array<float, 7>> boxes;
  140. // cx, cy, cz
  141. std::vector<std::array<float, 3>> center;
  142. std::vector<float> observation_angle;
  143. std::vector<float> yaw_angle;
  144. // vx, vy, vz
  145. std::vector<std::array<float, 3>> velocity;
  146. // valid results for func Str(): True for printing
  147. // 0 scores
  148. // 1 label_ids
  149. // 2 boxes
  150. // 3 center
  151. // 4 observation_angle
  152. // 5 yaw_angle
  153. // 6 velocity
  154. std::vector<bool> valid;
  155. /// Copy constructor
  156. PerceptionResult(const PerceptionResult &res);
  157. /// Move assignment
  158. PerceptionResult &operator=(PerceptionResult &&other);
  159. /// Clear PerceptionResult
  160. void Clear();
  161. /// Clear PerceptionResult and free the memory
  162. void Free();
  163. void Reserve(int size);
  164. void Resize(int size);
  165. /// Debug function, convert the result to string to print
  166. std::string Str();
  167. };
  168. /*! @brief KeyPoint Detection result structure for all the keypoint detection
  169. * models
  170. */
  171. struct ULTRAINFER_DECL KeyPointDetectionResult : public BaseResult {
  172. /** \brief All the coordinates of detected keypoints for an input image, the
  173. * size of `keypoints` is num_detected_objects * num_joints, and the element
  174. * of `keypoint` is a array of 2 float values, means [x, y]
  175. */
  176. std::vector<std::array<float, 2>> keypoints;
  177. //// The confidence for all the detected points
  178. std::vector<float> scores;
  179. //// Number of joints for a detected object
  180. int num_joints = -1;
  181. ResultType type = ResultType::KEYPOINT_DETECTION;
  182. /// Clear KeyPointDetectionResult
  183. void Clear();
  184. /// Clear KeyPointDetectionResult and free the memory
  185. void Free();
  186. void Reserve(int size);
  187. void Resize(int size);
  188. /// Debug function, convert the result to string to print
  189. std::string Str();
  190. };
  191. struct ULTRAINFER_DECL OCRResult : public BaseResult {
  192. std::vector<std::array<int, 8>> boxes;
  193. std::vector<std::string> text;
  194. std::vector<float> rec_scores;
  195. std::vector<float> cls_scores;
  196. std::vector<int32_t> cls_labels;
  197. std::vector<std::array<int, 8>> table_boxes;
  198. std::vector<std::string> table_structure;
  199. std::string table_html;
  200. ResultType type = ResultType::OCR;
  201. void Clear();
  202. std::string Str();
  203. };
  204. struct ULTRAINFER_DECL OCRCURVEResult : public BaseResult {
  205. std::vector<std::vector<int>> boxes;
  206. std::vector<std::string> text;
  207. std::vector<float> rec_scores;
  208. std::vector<float> cls_scores;
  209. std::vector<int32_t> cls_labels;
  210. std::vector<std::array<int, 8>> table_boxes;
  211. std::vector<std::string> table_structure;
  212. std::string table_html;
  213. ResultType type = ResultType::OCR;
  214. void Clear();
  215. std::string Str();
  216. };
  217. /*! @brief MOT(Multi-Object Tracking) result structure for all the MOT models
  218. */
  219. struct ULTRAINFER_DECL MOTResult : public BaseResult {
  220. /** \brief All the tracking object boxes for an input image, the size of
  221. * `boxes` is the number of tracking objects, and the element of `boxes` is a
  222. * array of 4 float values, means [xmin, ymin, xmax, ymax]
  223. */
  224. std::vector<std::array<int, 4>> boxes;
  225. /** \brief All the tracking object ids
  226. */
  227. std::vector<int> ids;
  228. /** \brief The confidence for all the tracking objects
  229. */
  230. std::vector<float> scores;
  231. /** \brief The classify label id for all the tracking object
  232. */
  233. std::vector<int> class_ids;
  234. ResultType type = ResultType::MOT;
  235. /// Clear MOT result
  236. void Clear();
  237. /// Debug function, convert the result to string to print
  238. std::string Str();
  239. };
  240. /*! @brief Face detection result structure for all the face detection models
  241. */
  242. struct ULTRAINFER_DECL FaceDetectionResult : public BaseResult {
  243. /** \brief All the detected object boxes for an input image, the size of
  244. * `boxes` is the number of detected objects, and the element of `boxes` is a
  245. * array of 4 float values, means [xmin, ymin, xmax, ymax]
  246. */
  247. std::vector<std::array<float, 4>> boxes;
  248. /** \brief
  249. * If the model detect face with landmarks, every detected object box
  250. * correspoing to a landmark, which is a array of 2 float values, means
  251. * location [x,y]
  252. */
  253. std::vector<std::array<float, 2>> landmarks;
  254. /** \brief
  255. * Indicates the confidence of all targets detected from a single image, and
  256. * the number of elements is consistent with boxes.size()
  257. */
  258. std::vector<float> scores;
  259. ResultType type = ResultType::FACE_DETECTION;
  260. /** \brief
  261. * `landmarks_per_face` indicates the number of face landmarks for each
  262. * detected face if the model's output contains face landmarks (such as
  263. * YOLOv5Face, SCRFD, ...)
  264. */
  265. int landmarks_per_face;
  266. FaceDetectionResult() { landmarks_per_face = 0; }
  267. FaceDetectionResult(const FaceDetectionResult &res);
  268. /// Clear FaceDetectionResult
  269. void Clear();
  270. /// Clear FaceDetectionResult and free the memory
  271. void Free();
  272. void Reserve(int size);
  273. void Resize(int size);
  274. /// Debug function, convert the result to string to print
  275. std::string Str();
  276. };
  277. /*! @brief Face Alignment result structure for all the face alignment models
  278. */
  279. struct ULTRAINFER_DECL FaceAlignmentResult : public BaseResult {
  280. /** \brief All the coordinates of detected landmarks for an input image, and
  281. * the element of `landmarks` is a array of 2 float values, means [x, y]
  282. */
  283. std::vector<std::array<float, 2>> landmarks;
  284. ResultType type = ResultType::FACE_ALIGNMENT;
  285. /// Clear FaceAlignmentResult
  286. void Clear();
  287. /// Clear FaceAlignmentResult and free the memory
  288. void Free();
  289. void Reserve(int size);
  290. void Resize(int size);
  291. /// Debug function, convert the result to string to print
  292. std::string Str();
  293. };
  294. /*! @brief Segmentation result structure for all the segmentation models
  295. */
  296. struct ULTRAINFER_DECL SegmentationResult : public BaseResult {
  297. SegmentationResult() = default;
  298. /** \brief
  299. * `label_map` stores the pixel-level category labels for input image. the
  300. * number of pixels is equal to label_map.size()
  301. */
  302. std::vector<uint8_t> label_map;
  303. /** \brief
  304. * `score_map` stores the probability of the predicted label for each pixel of
  305. * input image.
  306. */
  307. std::vector<float> score_map;
  308. /// The output shape, means [H, W]
  309. std::vector<int64_t> shape;
  310. /// SegmentationResult whether containing score_map
  311. bool contain_score_map = false;
  312. /// Copy constructor
  313. SegmentationResult(const SegmentationResult &other) = default;
  314. /// Move assignment
  315. SegmentationResult &operator=(SegmentationResult &&other);
  316. ResultType type = ResultType::SEGMENTATION;
  317. /// Clear Segmentation result
  318. void Clear();
  319. /// Clear Segmentation result and free the memory
  320. void Free();
  321. void Reserve(int size);
  322. void Resize(int size);
  323. /// Debug function, convert the result to string to print
  324. std::string Str();
  325. };
  326. /*! @brief Face recognition result structure for all the Face recognition models
  327. */
  328. struct ULTRAINFER_DECL FaceRecognitionResult : public BaseResult {
  329. /** \brief The feature embedding that represents the final extraction of the
  330. * face recognition model can be used to calculate the feature similarity
  331. * between faces.
  332. */
  333. std::vector<float> embedding;
  334. ResultType type = ResultType::FACE_RECOGNITION;
  335. FaceRecognitionResult() {}
  336. FaceRecognitionResult(const FaceRecognitionResult &res);
  337. /// Clear FaceRecognitionResult
  338. void Clear();
  339. /// Clear FaceRecognitionResult and free the memory
  340. void Free();
  341. void Reserve(int size);
  342. void Resize(int size);
  343. /// Debug function, convert the result to string to print
  344. std::string Str();
  345. };
  346. /*! @brief Matting result structure for all the Matting models
  347. */
  348. struct ULTRAINFER_DECL MattingResult : public BaseResult {
  349. /** \brief
  350. `alpha` is a one-dimensional vector, which is the predicted alpha transparency
  351. value. The range of values is [0., 1.], and the length is hxw. h, w are the
  352. height and width of the input image
  353. */
  354. std::vector<float> alpha; // h x w
  355. /** \brief
  356. If the model can predict foreground, `foreground` save the predicted
  357. foreground image, the shape is [height,width,channel] generally.
  358. */
  359. std::vector<float> foreground; // h x w x c (c=3 default)
  360. /** \brief
  361. * The shape of output result, when contain_foreground == false, shape only
  362. * contains (h, w), when contain_foreground == true, shape contains (h, w, c),
  363. * and c is generally 3
  364. */
  365. std::vector<int64_t> shape;
  366. /** \brief
  367. If the model can predict alpha matte and foreground, contain_foreground =
  368. true, default false
  369. */
  370. bool contain_foreground = false;
  371. ResultType type = ResultType::MATTING;
  372. MattingResult() {}
  373. MattingResult(const MattingResult &res);
  374. /// Clear matting result
  375. void Clear();
  376. /// Free matting result
  377. void Free();
  378. void Reserve(int size);
  379. void Resize(int size);
  380. /// Debug function, convert the result to string to print
  381. std::string Str();
  382. };
  383. /*! @brief HeadPose result structure for all the headpose models
  384. */
  385. struct ULTRAINFER_DECL HeadPoseResult : public BaseResult {
  386. /** \brief EulerAngles for an input image, and the element of `euler_angles`
  387. * is a vector, contains {yaw, pitch, roll}
  388. */
  389. std::vector<float> euler_angles;
  390. ResultType type = ResultType::HEADPOSE;
  391. /// Clear HeadPoseResult
  392. void Clear();
  393. /// Clear HeadPoseResult and free the memory
  394. void Free();
  395. void Reserve(int size);
  396. void Resize(int size);
  397. /// Debug function, convert the result to string to print
  398. std::string Str();
  399. };
  400. } // namespace vision
  401. } // namespace ultra_infer