vision_pybind.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "ultra_infer/pybind/main.h"
  15. namespace ultra_infer {
  16. void BindFDMat(pybind11::module &m);
  17. void BindFDMatBatch(pybind11::module &m);
  18. void BindProcessors(pybind11::module &m);
  19. void BindDetection(pybind11::module &m);
  20. void BindClassification(pybind11::module &m);
  21. void BindSegmentation(pybind11::module &m);
  22. void BindMatting(pybind11::module &m);
  23. void BindFaceDet(pybind11::module &m);
  24. void BindFaceAlign(pybind11::module &m);
  25. void BindFaceId(pybind11::module &m);
  26. void BindOcr(pybind11::module &m);
  27. void BindTracking(pybind11::module &m);
  28. void BindKeyPointDetection(pybind11::module &m);
  29. void BindHeadPose(pybind11::module &m);
  30. void BindSR(pybind11::module &m);
  31. void BindGeneration(pybind11::module &m);
  32. void BindVisualize(pybind11::module &m);
  33. void BindPerception(pybind11::module &m);
  34. void BindVision(pybind11::module &m) {
  35. pybind11::class_<vision::Mask>(m, "Mask")
  36. .def(pybind11::init())
  37. .def_readwrite("data", &vision::Mask::data)
  38. .def_readwrite("shape", &vision::Mask::shape)
  39. .def(pybind11::pickle(
  40. [](const vision::Mask &m) {
  41. return pybind11::make_tuple(m.data, m.shape);
  42. },
  43. [](pybind11::tuple t) {
  44. if (t.size() != 2)
  45. throw std::runtime_error(
  46. "vision::Mask pickle with invalid state!");
  47. vision::Mask m;
  48. m.data = t[0].cast<std::vector<uint32_t>>();
  49. m.shape = t[1].cast<std::vector<int64_t>>();
  50. return m;
  51. }))
  52. .def("__repr__", &vision::Mask::Str)
  53. .def("__str__", &vision::Mask::Str);
  54. pybind11::class_<vision::ClassifyResult>(m, "ClassifyResult")
  55. .def(pybind11::init())
  56. .def_readwrite("label_ids", &vision::ClassifyResult::label_ids)
  57. .def_readwrite("scores", &vision::ClassifyResult::scores)
  58. .def_readwrite("feature", &vision::ClassifyResult::feature)
  59. .def(pybind11::pickle(
  60. [](const vision::ClassifyResult &c) {
  61. if (c.feature.empty()) {
  62. return pybind11::make_tuple(c.label_ids, c.scores);
  63. }
  64. return pybind11::make_tuple(c.label_ids, c.scores, c.feature);
  65. },
  66. [](pybind11::tuple t) {
  67. if ((t.size() != 2) && (t.size() != 3)) {
  68. throw std::runtime_error(
  69. "vision::ClassifyResult pickle with invalid state!");
  70. }
  71. vision::ClassifyResult c;
  72. c.label_ids = t[0].cast<std::vector<int32_t>>();
  73. c.scores = t[1].cast<std::vector<float>>();
  74. if (t.size() == 3) {
  75. c.feature = t[2].cast<std::vector<float>>();
  76. }
  77. return c;
  78. }))
  79. .def("__repr__", &vision::ClassifyResult::Str)
  80. .def("__str__", &vision::ClassifyResult::Str);
  81. pybind11::class_<vision::DetectionResult>(m, "DetectionResult")
  82. .def(pybind11::init())
  83. .def_readwrite("boxes", &vision::DetectionResult::boxes)
  84. .def_readwrite("scores", &vision::DetectionResult::scores)
  85. .def_readwrite("rotated_boxes", &vision::DetectionResult::rotated_boxes)
  86. .def_readwrite("label_ids", &vision::DetectionResult::label_ids)
  87. .def_readwrite("masks", &vision::DetectionResult::masks)
  88. .def_readwrite("contain_masks", &vision::DetectionResult::contain_masks)
  89. .def(pybind11::pickle(
  90. [](const vision::DetectionResult &d) {
  91. return pybind11::make_tuple(d.boxes, d.scores, d.rotated_boxes,
  92. d.label_ids, d.masks, d.contain_masks);
  93. },
  94. [](pybind11::tuple t) {
  95. if (t.size() != 5)
  96. throw std::runtime_error(
  97. "vision::DetectionResult pickle with Invalid state!");
  98. vision::DetectionResult d;
  99. d.boxes = t[0].cast<std::vector<std::array<float, 4>>>();
  100. d.rotated_boxes = t[0].cast<std::vector<std::array<float, 8>>>();
  101. d.scores = t[1].cast<std::vector<float>>();
  102. d.label_ids = t[2].cast<std::vector<int32_t>>();
  103. d.masks = t[3].cast<std::vector<vision::Mask>>();
  104. d.contain_masks = t[4].cast<bool>();
  105. return d;
  106. }))
  107. .def("__repr__", &vision::DetectionResult::Str)
  108. .def("__str__", &vision::DetectionResult::Str);
  109. pybind11::class_<vision::PerceptionResult>(m, "PerceptionResult")
  110. .def(pybind11::init())
  111. .def_readwrite("valid", &vision::PerceptionResult::valid)
  112. .def_readwrite("scores", &vision::PerceptionResult::scores)
  113. .def_readwrite("label_ids", &vision::PerceptionResult::label_ids)
  114. .def_readwrite("boxes", &vision::PerceptionResult::boxes)
  115. .def_readwrite("center", &vision::PerceptionResult::center)
  116. .def_readwrite("observation_angle",
  117. &vision::PerceptionResult::observation_angle)
  118. .def_readwrite("yaw_angle", &vision::PerceptionResult::yaw_angle)
  119. .def_readwrite("velocity", &vision::PerceptionResult::velocity)
  120. .def(pybind11::pickle(
  121. [](const vision::PerceptionResult &d) {
  122. return pybind11::make_tuple(d.scores, d.label_ids, d.boxes,
  123. d.center, d.observation_angle,
  124. d.yaw_angle, d.velocity);
  125. },
  126. [](pybind11::tuple t) {
  127. if (t.size() != 7)
  128. throw std::runtime_error(
  129. "vision::PerceptionResult pickle with Invalid state!");
  130. vision::PerceptionResult d;
  131. d.scores = t[0].cast<std::vector<float>>();
  132. d.label_ids = t[1].cast<std::vector<int32_t>>();
  133. d.boxes = t[2].cast<std::vector<std::array<float, 7>>>();
  134. d.center = t[3].cast<std::vector<std::array<float, 3>>>();
  135. d.observation_angle = t[4].cast<std::vector<float>>();
  136. d.yaw_angle = t[5].cast<std::vector<float>>();
  137. d.velocity = t[6].cast<std::vector<std::array<float, 3>>>();
  138. return d;
  139. }))
  140. .def("__repr__", &vision::PerceptionResult::Str)
  141. .def("__str__", &vision::PerceptionResult::Str);
  142. pybind11::class_<vision::OCRResult>(m, "OCRResult")
  143. .def(pybind11::init())
  144. .def_readwrite("boxes", &vision::OCRResult::boxes)
  145. .def_readwrite("text", &vision::OCRResult::text)
  146. .def_readwrite("rec_scores", &vision::OCRResult::rec_scores)
  147. .def_readwrite("cls_scores", &vision::OCRResult::cls_scores)
  148. .def_readwrite("cls_labels", &vision::OCRResult::cls_labels)
  149. .def_readwrite("table_boxes", &vision::OCRResult::table_boxes)
  150. .def_readwrite("table_structure", &vision::OCRResult::table_structure)
  151. .def_readwrite("table_html", &vision::OCRResult::table_html)
  152. .def("__repr__", &vision::OCRResult::Str)
  153. .def("__str__", &vision::OCRResult::Str);
  154. pybind11::class_<vision::OCRCURVEResult>(m, "OCRCURVEResult")
  155. .def(pybind11::init())
  156. .def_readwrite("boxes", &vision::OCRCURVEResult::boxes)
  157. .def_readwrite("text", &vision::OCRCURVEResult::text)
  158. .def_readwrite("rec_scores", &vision::OCRCURVEResult::rec_scores)
  159. .def_readwrite("cls_scores", &vision::OCRCURVEResult::cls_scores)
  160. .def_readwrite("cls_labels", &vision::OCRCURVEResult::cls_labels)
  161. .def_readwrite("table_boxes", &vision::OCRCURVEResult::table_boxes)
  162. .def_readwrite("table_structure",
  163. &vision::OCRCURVEResult::table_structure)
  164. .def_readwrite("table_html", &vision::OCRCURVEResult::table_html)
  165. .def("__repr__", &vision::OCRCURVEResult::Str)
  166. .def("__str__", &vision::OCRCURVEResult::Str);
  167. pybind11::class_<vision::MOTResult>(m, "MOTResult")
  168. .def(pybind11::init())
  169. .def_readwrite("boxes", &vision::MOTResult::boxes)
  170. .def_readwrite("ids", &vision::MOTResult::ids)
  171. .def_readwrite("scores", &vision::MOTResult::scores)
  172. .def_readwrite("class_ids", &vision::MOTResult::class_ids)
  173. .def("__repr__", &vision::MOTResult::Str)
  174. .def("__str__", &vision::MOTResult::Str);
  175. pybind11::class_<vision::FaceDetectionResult>(m, "FaceDetectionResult")
  176. .def(pybind11::init())
  177. .def_readwrite("boxes", &vision::FaceDetectionResult::boxes)
  178. .def_readwrite("scores", &vision::FaceDetectionResult::scores)
  179. .def_readwrite("landmarks", &vision::FaceDetectionResult::landmarks)
  180. .def_readwrite("landmarks_per_face",
  181. &vision::FaceDetectionResult::landmarks_per_face)
  182. .def("__repr__", &vision::FaceDetectionResult::Str)
  183. .def("__str__", &vision::FaceDetectionResult::Str);
  184. pybind11::class_<vision::FaceAlignmentResult>(m, "FaceAlignmentResult")
  185. .def(pybind11::init())
  186. .def_readwrite("landmarks", &vision::FaceAlignmentResult::landmarks)
  187. .def("__repr__", &vision::FaceAlignmentResult::Str)
  188. .def("__str__", &vision::FaceAlignmentResult::Str);
  189. pybind11::class_<vision::FaceRecognitionResult>(m, "FaceRecognitionResult")
  190. .def(pybind11::init())
  191. .def_readwrite("embedding", &vision::FaceRecognitionResult::embedding)
  192. .def("__repr__", &vision::FaceRecognitionResult::Str)
  193. .def("__str__", &vision::FaceRecognitionResult::Str);
  194. pybind11::class_<vision::SegmentationResult>(m, "SegmentationResult")
  195. .def(pybind11::init())
  196. .def_readwrite("label_map", &vision::SegmentationResult::label_map)
  197. .def_readwrite("score_map", &vision::SegmentationResult::score_map)
  198. .def_readwrite("shape", &vision::SegmentationResult::shape)
  199. .def_readwrite("contain_score_map",
  200. &vision::SegmentationResult::contain_score_map)
  201. .def(pybind11::pickle(
  202. [](const vision::SegmentationResult &s) {
  203. return pybind11::make_tuple(s.label_map, s.score_map, s.shape,
  204. s.contain_score_map);
  205. },
  206. [](pybind11::tuple t) {
  207. if (t.size() != 4)
  208. throw std::runtime_error(
  209. "vision::SegmentationResult pickle with Invalid state!");
  210. vision::SegmentationResult s;
  211. s.label_map = t[0].cast<std::vector<uint8_t>>();
  212. s.score_map = t[1].cast<std::vector<float>>();
  213. s.shape = t[2].cast<std::vector<int64_t>>();
  214. s.contain_score_map = t[3].cast<bool>();
  215. return s;
  216. }))
  217. .def("__repr__", &vision::SegmentationResult::Str)
  218. .def("__str__", &vision::SegmentationResult::Str);
  219. pybind11::class_<vision::MattingResult>(m, "MattingResult")
  220. .def(pybind11::init())
  221. .def_readwrite("alpha", &vision::MattingResult::alpha)
  222. .def_readwrite("foreground", &vision::MattingResult::foreground)
  223. .def_readwrite("shape", &vision::MattingResult::shape)
  224. .def_readwrite("contain_foreground",
  225. &vision::MattingResult::contain_foreground)
  226. .def("__repr__", &vision::MattingResult::Str)
  227. .def("__str__", &vision::MattingResult::Str);
  228. pybind11::class_<vision::KeyPointDetectionResult>(m,
  229. "KeyPointDetectionResult")
  230. .def(pybind11::init())
  231. .def_readwrite("keypoints", &vision::KeyPointDetectionResult::keypoints)
  232. .def_readwrite("scores", &vision::KeyPointDetectionResult::scores)
  233. .def_readwrite("num_joints", &vision::KeyPointDetectionResult::num_joints)
  234. .def("__repr__", &vision::KeyPointDetectionResult::Str)
  235. .def("__str__", &vision::KeyPointDetectionResult::Str);
  236. pybind11::class_<vision::HeadPoseResult>(m, "HeadPoseResult")
  237. .def(pybind11::init())
  238. .def_readwrite("euler_angles", &vision::HeadPoseResult::euler_angles)
  239. .def("__repr__", &vision::HeadPoseResult::Str)
  240. .def("__str__", &vision::HeadPoseResult::Str);
  241. m.def("enable_flycv", &vision::EnableFlyCV,
  242. "Enable image preprocessing by FlyCV.");
  243. m.def("disable_flycv", &vision::DisableFlyCV,
  244. "Disable image preprocessing by FlyCV, change to use OpenCV.");
  245. BindFDMat(m);
  246. BindFDMatBatch(m);
  247. BindProcessors(m);
  248. BindDetection(m);
  249. BindClassification(m);
  250. BindSegmentation(m);
  251. BindFaceDet(m);
  252. BindFaceAlign(m);
  253. BindFaceId(m);
  254. BindMatting(m);
  255. BindOcr(m);
  256. BindTracking(m);
  257. BindKeyPointDetection(m);
  258. BindHeadPose(m);
  259. BindSR(m);
  260. BindGeneration(m);
  261. BindVisualize(m);
  262. BindPerception(m);
  263. }
  264. } // namespace ultra_infer