ocrmodel_pybind.cc 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <pybind11/stl.h>
  15. #include "ultra_infer/pybind/main.h"
  16. namespace ultra_infer {
  17. void BindPPOCRModel(pybind11::module &m) {
  18. m.def("sort_boxes", [](std::vector<std::array<int, 8>> &boxes) {
  19. vision::ocr::SortBoxes(&boxes);
  20. return boxes;
  21. });
  22. // UVDoc
  23. pybind11::class_<vision::ocr::UVDocPreprocessor, vision::ProcessorManager>(
  24. m, "UVDocPreprocessor")
  25. .def(pybind11::init<>())
  26. .def("set_normalize",
  27. [](vision::ocr::UVDocPreprocessor &self,
  28. const std::vector<float> &mean, const std::vector<float> &std,
  29. bool is_scale) { self.SetNormalize(mean, std, is_scale); })
  30. .def("run",
  31. [](vision::ocr::UVDocPreprocessor &self,
  32. std::vector<pybind11::array> &im_list) {
  33. std::vector<vision::FDMat> images;
  34. for (size_t i = 0; i < im_list.size(); ++i) {
  35. images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
  36. }
  37. std::vector<FDTensor> outputs;
  38. if (!self.Run(&images, &outputs)) {
  39. throw std::runtime_error(
  40. "Failed to preprocess the input data in "
  41. "UVDocPreprocessor.");
  42. }
  43. for (size_t i = 0; i < outputs.size(); ++i) {
  44. outputs[i].StopSharing();
  45. }
  46. return outputs;
  47. })
  48. .def(
  49. "disable_normalize",
  50. [](vision::ocr::UVDocPreprocessor &self) { self.DisableNormalize(); })
  51. .def("disable_permute",
  52. [](vision::ocr::UVDocPreprocessor &self) { self.DisablePermute(); });
  53. pybind11::class_<vision::ocr::UVDocPostprocessor>(m, "UVDocPostprocessor")
  54. .def(pybind11::init<>())
  55. .def("run", [](vision::ocr::UVDocPostprocessor &self,
  56. std::vector<FDTensor> &inputs) {
  57. std::vector<FDTensor> results;
  58. if (!self.Run(inputs, &results)) {
  59. throw std::runtime_error("Failed to preprocess the input data in "
  60. "UVDocPostprocessor.");
  61. }
  62. for (size_t i = 0; i < results.size(); ++i) {
  63. results[i].StopSharing();
  64. }
  65. return results;
  66. });
  67. pybind11::class_<vision::ocr::UVDocWarpper, UltraInferModel>(m,
  68. "UVDocWarpper")
  69. .def(pybind11::init<std::string, std::string, RuntimeOption,
  70. ModelFormat>())
  71. .def(pybind11::init<>())
  72. .def_property_readonly("preprocessor",
  73. &vision::ocr::UVDocWarpper::GetPreprocessor)
  74. .def_property_readonly("postprocessor",
  75. &vision::ocr::UVDocWarpper::GetPostprocessor)
  76. .def("clone",
  77. [](vision::ocr::UVDocWarpper &self) { return self.Clone(); })
  78. .def("predict",
  79. [](vision::ocr::UVDocWarpper &self, pybind11::array &data) {
  80. auto mat = PyArrayToCvMat(data);
  81. FDTensor res;
  82. self.Predict(mat, &res);
  83. res.StopSharing();
  84. return res;
  85. })
  86. .def("batch_predict", [](vision::ocr::UVDocWarpper &self,
  87. std::vector<pybind11::array> &data) {
  88. std::vector<cv::Mat> images;
  89. for (size_t i = 0; i < data.size(); ++i) {
  90. images.push_back(PyArrayToCvMat(data[i]));
  91. }
  92. std::vector<FDTensor> results;
  93. self.BatchPredict(images, &results);
  94. for (size_t i = 0; i < results.size(); ++i) {
  95. results[i].StopSharing();
  96. }
  97. return results;
  98. // std::vector<cv::Mat> results;
  99. // self.BatchPredict(images, &results);
  100. // std::vector<pybind11::array_t<unsigned char>> ret;
  101. // for(size_t i = 0; i < results.size(); ++i){
  102. // ret.push_back(pybind11::array_t<unsigned char>(
  103. // {results[i].rows, results[i].cols, results[i].channels()},
  104. // results[i].data));
  105. // }
  106. // return ret;
  107. });
  108. // DBDetector
  109. pybind11::class_<vision::ocr::DBDetectorPreprocessor,
  110. vision::ProcessorManager>(m, "DBDetectorPreprocessor")
  111. .def(pybind11::init<>())
  112. .def_property("static_shape_infer",
  113. &vision::ocr::DBDetectorPreprocessor::GetStaticShapeInfer,
  114. &vision::ocr::DBDetectorPreprocessor::SetStaticShapeInfer)
  115. .def_property("max_side_len",
  116. &vision::ocr::DBDetectorPreprocessor::GetMaxSideLen,
  117. &vision::ocr::DBDetectorPreprocessor::SetMaxSideLen)
  118. .def("set_normalize",
  119. [](vision::ocr::DBDetectorPreprocessor &self,
  120. const std::vector<float> &mean, const std::vector<float> &std,
  121. bool is_scale) { self.SetNormalize(mean, std, is_scale); })
  122. .def("run",
  123. [](vision::ocr::DBDetectorPreprocessor &self,
  124. std::vector<pybind11::array> &im_list) {
  125. std::vector<vision::FDMat> images;
  126. for (size_t i = 0; i < im_list.size(); ++i) {
  127. images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
  128. }
  129. std::vector<FDTensor> outputs;
  130. self.Run(&images, &outputs);
  131. auto batch_det_img_info = self.GetBatchImgInfo();
  132. for (size_t i = 0; i < outputs.size(); ++i) {
  133. outputs[i].StopSharing();
  134. }
  135. return std::make_pair(outputs, *batch_det_img_info);
  136. })
  137. .def("disable_normalize",
  138. [](vision::ocr::DBDetectorPreprocessor &self) {
  139. self.DisableNormalize();
  140. })
  141. .def("disable_permute", [](vision::ocr::DBDetectorPreprocessor &self) {
  142. self.DisablePermute();
  143. });
  144. pybind11::class_<vision::ocr::DBDetectorPostprocessor>(
  145. m, "DBDetectorPostprocessor")
  146. .def(pybind11::init<>())
  147. .def_property("det_db_thresh",
  148. &vision::ocr::DBDetectorPostprocessor::GetDetDBThresh,
  149. &vision::ocr::DBDetectorPostprocessor::SetDetDBThresh)
  150. .def_property("det_db_box_thresh",
  151. &vision::ocr::DBDetectorPostprocessor::GetDetDBBoxThresh,
  152. &vision::ocr::DBDetectorPostprocessor::SetDetDBBoxThresh)
  153. .def_property("det_db_unclip_ratio",
  154. &vision::ocr::DBDetectorPostprocessor::GetDetDBUnclipRatio,
  155. &vision::ocr::DBDetectorPostprocessor::SetDetDBUnclipRatio)
  156. .def_property("det_db_score_mode",
  157. &vision::ocr::DBDetectorPostprocessor::GetDetDBScoreMode,
  158. &vision::ocr::DBDetectorPostprocessor::SetDetDBScoreMode)
  159. .def_property("use_dilation",
  160. &vision::ocr::DBDetectorPostprocessor::GetUseDilation,
  161. &vision::ocr::DBDetectorPostprocessor::SetUseDilation)
  162. .def("run",
  163. [](vision::ocr::DBDetectorPostprocessor &self,
  164. std::vector<FDTensor> &inputs,
  165. const std::vector<std::array<int, 4>> &batch_det_img_info) {
  166. std::vector<std::vector<std::array<int, 8>>> results;
  167. if (!self.Run(inputs, &results, batch_det_img_info)) {
  168. throw std::runtime_error(
  169. "Failed to preprocess the input data in "
  170. "DBDetectorPostprocessor.");
  171. }
  172. return results;
  173. })
  174. .def(
  175. "run", [](vision::ocr::DBDetectorPostprocessor &self,
  176. std::vector<pybind11::array> &input_array,
  177. const std::vector<std::array<int, 4>> &batch_det_img_info) {
  178. std::vector<std::vector<std::array<int, 8>>> results;
  179. std::vector<FDTensor> inputs;
  180. PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
  181. if (!self.Run(inputs, &results, batch_det_img_info)) {
  182. throw std::runtime_error("Failed to preprocess the input data in "
  183. "DBDetectorPostprocessor.");
  184. }
  185. return results;
  186. });
  187. pybind11::class_<vision::ocr::DBCURVEDetectorPostprocessor>(
  188. m, "DBCURVEDetectorPostprocessor")
  189. .def(pybind11::init<>())
  190. .def_property("det_db_thresh",
  191. &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBThresh,
  192. &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBThresh)
  193. .def_property(
  194. "det_db_box_thresh",
  195. &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBBoxThresh,
  196. &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBBoxThresh)
  197. .def_property(
  198. "det_db_unclip_ratio",
  199. &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBUnclipRatio,
  200. &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBUnclipRatio)
  201. .def_property(
  202. "det_db_score_mode",
  203. &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBScoreMode,
  204. &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBScoreMode)
  205. .def_property("det_db_box_type",
  206. &vision::ocr::DBCURVEDetectorPostprocessor::GetDetDBBoxType,
  207. &vision::ocr::DBCURVEDetectorPostprocessor::SetDetDBBoxType)
  208. .def_property("use_dilation",
  209. &vision::ocr::DBCURVEDetectorPostprocessor::GetUseDilation,
  210. &vision::ocr::DBCURVEDetectorPostprocessor::SetUseDilation)
  211. .def("run",
  212. [](vision::ocr::DBCURVEDetectorPostprocessor &self,
  213. std::vector<FDTensor> &inputs,
  214. const std::vector<std::array<int, 4>> &batch_det_img_info) {
  215. std::vector<std::vector<std::vector<int>>> results;
  216. if (!self.Run(inputs, &results, batch_det_img_info)) {
  217. throw std::runtime_error(
  218. "Failed to preprocess the input data in "
  219. "DBCURVEDetectorPostprocessor.");
  220. }
  221. return results;
  222. })
  223. .def(
  224. "run", [](vision::ocr::DBCURVEDetectorPostprocessor &self,
  225. std::vector<pybind11::array> &input_array,
  226. const std::vector<std::array<int, 4>> &batch_det_img_info) {
  227. std::vector<std::vector<std::vector<int>>> results;
  228. std::vector<FDTensor> inputs;
  229. PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
  230. if (!self.Run(inputs, &results, batch_det_img_info)) {
  231. throw std::runtime_error("Failed to preprocess the input data in "
  232. "DBCURVEDetectorPostprocessor.");
  233. }
  234. return results;
  235. });
  236. pybind11::class_<vision::ocr::DBDetector, UltraInferModel>(m, "DBDetector")
  237. .def(pybind11::init<std::string, std::string, RuntimeOption,
  238. ModelFormat>())
  239. .def(pybind11::init<>())
  240. .def_property_readonly("preprocessor",
  241. &vision::ocr::DBDetector::GetPreprocessor)
  242. .def_property_readonly("postprocessor",
  243. &vision::ocr::DBDetector::GetPostprocessor)
  244. .def("predict",
  245. [](vision::ocr::DBDetector &self, pybind11::array &data) {
  246. auto mat = PyArrayToCvMat(data);
  247. vision::OCRResult ocr_result;
  248. self.Predict(mat, &ocr_result);
  249. return ocr_result;
  250. })
  251. .def("batch_predict", [](vision::ocr::DBDetector &self,
  252. std::vector<pybind11::array> &data) {
  253. std::vector<cv::Mat> images;
  254. for (size_t i = 0; i < data.size(); ++i) {
  255. images.push_back(PyArrayToCvMat(data[i]));
  256. }
  257. std::vector<vision::OCRResult> ocr_results;
  258. self.BatchPredict(images, &ocr_results);
  259. return ocr_results;
  260. });
  261. pybind11::class_<vision::ocr::DBCURVEDetector, UltraInferModel>(
  262. m, "DBCURVEDetector")
  263. .def(pybind11::init<std::string, std::string, RuntimeOption,
  264. ModelFormat>())
  265. .def(pybind11::init<>())
  266. .def_property_readonly("preprocessor",
  267. &vision::ocr::DBCURVEDetector::GetPreprocessor)
  268. .def_property_readonly("postprocessor",
  269. &vision::ocr::DBCURVEDetector::GetPostprocessor)
  270. .def("predict",
  271. [](vision::ocr::DBCURVEDetector &self, pybind11::array &data) {
  272. auto mat = PyArrayToCvMat(data);
  273. vision::OCRCURVEResult ocr_result;
  274. self.Predict(mat, &ocr_result);
  275. return ocr_result;
  276. })
  277. .def("batch_predict", [](vision::ocr::DBCURVEDetector &self,
  278. std::vector<pybind11::array> &data) {
  279. std::vector<cv::Mat> images;
  280. for (size_t i = 0; i < data.size(); ++i) {
  281. images.push_back(PyArrayToCvMat(data[i]));
  282. }
  283. std::vector<vision::OCRCURVEResult> ocr_results;
  284. self.BatchPredict(images, &ocr_results);
  285. return ocr_results;
  286. });
  287. // Classifier
  288. pybind11::class_<vision::ocr::ClassifierPreprocessor,
  289. vision::ProcessorManager>(m, "ClassifierPreprocessor")
  290. .def(pybind11::init<>())
  291. .def_property("cls_image_shape",
  292. &vision::ocr::ClassifierPreprocessor::GetClsImageShape,
  293. &vision::ocr::ClassifierPreprocessor::SetClsImageShape)
  294. .def("set_normalize",
  295. [](vision::ocr::ClassifierPreprocessor &self,
  296. const std::vector<float> &mean, const std::vector<float> &std,
  297. bool is_scale) { self.SetNormalize(mean, std, is_scale); })
  298. .def("run",
  299. [](vision::ocr::ClassifierPreprocessor &self,
  300. std::vector<pybind11::array> &im_list) {
  301. std::vector<vision::FDMat> images;
  302. for (size_t i = 0; i < im_list.size(); ++i) {
  303. images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
  304. }
  305. std::vector<FDTensor> outputs;
  306. if (!self.Run(&images, &outputs)) {
  307. throw std::runtime_error(
  308. "Failed to preprocess the input data in "
  309. "ClassifierPreprocessor.");
  310. }
  311. for (size_t i = 0; i < outputs.size(); ++i) {
  312. outputs[i].StopSharing();
  313. }
  314. return outputs;
  315. })
  316. .def("disable_normalize",
  317. [](vision::ocr::ClassifierPreprocessor &self) {
  318. self.DisableNormalize();
  319. })
  320. .def("disable_permute", [](vision::ocr::ClassifierPreprocessor &self) {
  321. self.DisablePermute();
  322. });
  323. pybind11::class_<vision::ocr::ClassifierPostprocessor>(
  324. m, "ClassifierPostprocessor")
  325. .def(pybind11::init<>())
  326. .def_property("cls_thresh",
  327. &vision::ocr::ClassifierPostprocessor::GetClsThresh,
  328. &vision::ocr::ClassifierPostprocessor::SetClsThresh)
  329. .def("run",
  330. [](vision::ocr::ClassifierPostprocessor &self,
  331. std::vector<FDTensor> &inputs) {
  332. std::vector<int> cls_labels;
  333. std::vector<float> cls_scores;
  334. if (!self.Run(inputs, &cls_labels, &cls_scores)) {
  335. throw std::runtime_error(
  336. "Failed to preprocess the input data in "
  337. "ClassifierPostprocessor.");
  338. }
  339. return std::make_pair(cls_labels, cls_scores);
  340. })
  341. .def("run", [](vision::ocr::ClassifierPostprocessor &self,
  342. std::vector<pybind11::array> &input_array) {
  343. std::vector<FDTensor> inputs;
  344. PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
  345. std::vector<int> cls_labels;
  346. std::vector<float> cls_scores;
  347. if (!self.Run(inputs, &cls_labels, &cls_scores)) {
  348. throw std::runtime_error("Failed to preprocess the input data in "
  349. "ClassifierPostprocessor.");
  350. }
  351. return std::make_pair(cls_labels, cls_scores);
  352. });
  353. pybind11::class_<vision::ocr::Classifier, UltraInferModel>(m, "Classifier")
  354. .def(pybind11::init<std::string, std::string, RuntimeOption,
  355. ModelFormat>())
  356. .def(pybind11::init<>())
  357. .def_property_readonly("preprocessor",
  358. &vision::ocr::Classifier::GetPreprocessor)
  359. .def_property_readonly("postprocessor",
  360. &vision::ocr::Classifier::GetPostprocessor)
  361. .def("predict",
  362. [](vision::ocr::Classifier &self, pybind11::array &data) {
  363. auto mat = PyArrayToCvMat(data);
  364. vision::OCRResult ocr_result;
  365. self.Predict(mat, &ocr_result);
  366. return ocr_result;
  367. })
  368. .def("batch_predict", [](vision::ocr::Classifier &self,
  369. std::vector<pybind11::array> &data) {
  370. std::vector<cv::Mat> images;
  371. for (size_t i = 0; i < data.size(); ++i) {
  372. images.push_back(PyArrayToCvMat(data[i]));
  373. }
  374. vision::OCRResult ocr_result;
  375. self.BatchPredict(images, &ocr_result);
  376. return ocr_result;
  377. });
  378. // Recognizer
  379. pybind11::class_<vision::ocr::RecognizerPreprocessor,
  380. vision::ProcessorManager>(m, "RecognizerPreprocessor")
  381. .def(pybind11::init<>())
  382. .def_property("static_shape_infer",
  383. &vision::ocr::RecognizerPreprocessor::GetStaticShapeInfer,
  384. &vision::ocr::RecognizerPreprocessor::SetStaticShapeInfer)
  385. .def_property("rec_image_shape",
  386. &vision::ocr::RecognizerPreprocessor::GetRecImageShape,
  387. &vision::ocr::RecognizerPreprocessor::SetRecImageShape)
  388. .def("set_normalize",
  389. [](vision::ocr::RecognizerPreprocessor &self,
  390. const std::vector<float> &mean, const std::vector<float> &std,
  391. bool is_scale) { self.SetNormalize(mean, std, is_scale); })
  392. .def("run",
  393. [](vision::ocr::RecognizerPreprocessor &self,
  394. std::vector<pybind11::array> &im_list) {
  395. std::vector<vision::FDMat> images;
  396. for (size_t i = 0; i < im_list.size(); ++i) {
  397. images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
  398. }
  399. std::vector<FDTensor> outputs;
  400. if (!self.Run(&images, &outputs)) {
  401. throw std::runtime_error(
  402. "Failed to preprocess the input data in "
  403. "RecognizerPreprocessor.");
  404. }
  405. for (size_t i = 0; i < outputs.size(); ++i) {
  406. outputs[i].StopSharing();
  407. }
  408. return outputs;
  409. })
  410. .def("disable_normalize",
  411. [](vision::ocr::RecognizerPreprocessor &self) {
  412. self.DisableNormalize();
  413. })
  414. .def("disable_permute", [](vision::ocr::RecognizerPreprocessor &self) {
  415. self.DisablePermute();
  416. });
  417. pybind11::class_<vision::ocr::RecognizerPostprocessor>(
  418. m, "RecognizerPostprocessor")
  419. .def(pybind11::init<std::string>())
  420. .def("run",
  421. [](vision::ocr::RecognizerPostprocessor &self,
  422. std::vector<FDTensor> &inputs) {
  423. std::vector<std::string> texts;
  424. std::vector<float> rec_scores;
  425. if (!self.Run(inputs, &texts, &rec_scores)) {
  426. throw std::runtime_error(
  427. "Failed to preprocess the input data in "
  428. "RecognizerPostprocessor.");
  429. }
  430. return std::make_pair(texts, rec_scores);
  431. })
  432. .def("run", [](vision::ocr::RecognizerPostprocessor &self,
  433. std::vector<pybind11::array> &input_array) {
  434. std::vector<FDTensor> inputs;
  435. PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
  436. std::vector<std::string> texts;
  437. std::vector<float> rec_scores;
  438. if (!self.Run(inputs, &texts, &rec_scores)) {
  439. throw std::runtime_error("Failed to preprocess the input data in "
  440. "RecognizerPostprocessor.");
  441. }
  442. return std::make_pair(texts, rec_scores);
  443. });
  444. pybind11::class_<vision::ocr::Recognizer, UltraInferModel>(m, "Recognizer")
  445. .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
  446. ModelFormat>())
  447. .def(pybind11::init<>())
  448. .def_property_readonly("preprocessor",
  449. &vision::ocr::Recognizer::GetPreprocessor)
  450. .def_property_readonly("postprocessor",
  451. &vision::ocr::Recognizer::GetPostprocessor)
  452. .def("clone", [](vision::ocr::Recognizer &self) { return self.Clone(); })
  453. .def("predict",
  454. [](vision::ocr::Recognizer &self, pybind11::array &data) {
  455. auto mat = PyArrayToCvMat(data);
  456. vision::OCRResult ocr_result;
  457. self.Predict(mat, &ocr_result);
  458. return ocr_result;
  459. })
  460. .def("batch_predict", [](vision::ocr::Recognizer &self,
  461. std::vector<pybind11::array> &data) {
  462. std::vector<cv::Mat> images;
  463. for (size_t i = 0; i < data.size(); ++i) {
  464. images.push_back(PyArrayToCvMat(data[i]));
  465. }
  466. vision::OCRResult ocr_result;
  467. self.BatchPredict(images, &ocr_result);
  468. return ocr_result;
  469. });
  470. // Table
  471. pybind11::class_<vision::ocr::StructureV2TablePreprocessor,
  472. vision::ProcessorManager>(m, "StructureV2TablePreprocessor")
  473. .def(pybind11::init<>())
  474. .def("run", [](vision::ocr::StructureV2TablePreprocessor &self,
  475. std::vector<pybind11::array> &im_list) {
  476. std::vector<vision::FDMat> images;
  477. for (size_t i = 0; i < im_list.size(); ++i) {
  478. images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
  479. }
  480. std::vector<FDTensor> outputs;
  481. if (!self.Run(&images, &outputs)) {
  482. throw std::runtime_error("Failed to preprocess the input data in "
  483. "StructureV2TablePreprocessor.");
  484. }
  485. auto batch_det_img_info = self.GetBatchImgInfo();
  486. for (size_t i = 0; i < outputs.size(); ++i) {
  487. outputs[i].StopSharing();
  488. }
  489. return std::make_pair(outputs, *batch_det_img_info);
  490. });
  491. pybind11::class_<vision::ocr::StructureV2TablePostprocessor>(
  492. m, "StructureV2TablePostprocessor")
  493. .def(pybind11::init<std::string, std::string>())
  494. .def("run",
  495. [](vision::ocr::StructureV2TablePostprocessor &self,
  496. std::vector<FDTensor> &inputs,
  497. const std::vector<std::array<float, 6>> &batch_det_img_info) {
  498. std::vector<std::vector<std::array<int, 8>>> boxes;
  499. std::vector<std::vector<std::string>> structure_list;
  500. if (!self.Run(inputs, &boxes, &structure_list,
  501. batch_det_img_info)) {
  502. throw std::runtime_error(
  503. "Failed to postprocess the input data in "
  504. "StructureV2TablePostprocessor.");
  505. }
  506. return std::make_pair(boxes, structure_list);
  507. })
  508. .def("run",
  509. [](vision::ocr::StructureV2TablePostprocessor &self,
  510. std::vector<pybind11::array> &input_array,
  511. const std::vector<std::array<float, 6>> &batch_det_img_info) {
  512. std::vector<FDTensor> inputs;
  513. PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
  514. std::vector<std::vector<std::array<int, 8>>> boxes;
  515. std::vector<std::vector<std::string>> structure_list;
  516. if (!self.Run(inputs, &boxes, &structure_list,
  517. batch_det_img_info)) {
  518. throw std::runtime_error(
  519. "Failed to postprocess the input data in "
  520. "StructureV2TablePostprocessor.");
  521. }
  522. return std::make_pair(boxes, structure_list);
  523. });
  524. pybind11::class_<vision::ocr::StructureV2Table, UltraInferModel>(
  525. m, "StructureV2Table")
  526. .def(pybind11::init<std::string, std::string, std::string, std::string,
  527. RuntimeOption, ModelFormat>())
  528. .def(pybind11::init<>())
  529. .def_property_readonly("preprocessor",
  530. &vision::ocr::StructureV2Table::GetPreprocessor)
  531. .def_property_readonly("postprocessor",
  532. &vision::ocr::StructureV2Table::GetPostprocessor)
  533. .def("clone",
  534. [](vision::ocr::StructureV2Table &self) { return self.Clone(); })
  535. .def("predict",
  536. [](vision::ocr::StructureV2Table &self, pybind11::array &data) {
  537. auto mat = PyArrayToCvMat(data);
  538. vision::OCRResult ocr_result;
  539. self.Predict(mat, &ocr_result);
  540. return ocr_result;
  541. })
  542. .def("batch_predict", [](vision::ocr::StructureV2Table &self,
  543. std::vector<pybind11::array> &data) {
  544. std::vector<cv::Mat> images;
  545. for (size_t i = 0; i < data.size(); ++i) {
  546. images.push_back(PyArrayToCvMat(data[i]));
  547. }
  548. std::vector<vision::OCRResult> ocr_results;
  549. self.BatchPredict(images, &ocr_results);
  550. return ocr_results;
  551. });
  552. // Layout
  553. pybind11::class_<vision::ocr::StructureV2LayoutPreprocessor,
  554. vision::ProcessorManager>(m, "StructureV2LayoutPreprocessor")
  555. .def(pybind11::init<>())
  556. .def_property(
  557. "static_shape_infer",
  558. &vision::ocr::StructureV2LayoutPreprocessor::GetStaticShapeInfer,
  559. &vision::ocr::StructureV2LayoutPreprocessor::SetStaticShapeInfer)
  560. .def_property(
  561. "layout_image_shape",
  562. &vision::ocr::StructureV2LayoutPreprocessor::GetLayoutImageShape,
  563. &vision::ocr::StructureV2LayoutPreprocessor::SetLayoutImageShape)
  564. .def("set_normalize",
  565. [](vision::ocr::StructureV2LayoutPreprocessor &self,
  566. const std::vector<float> &mean, const std::vector<float> &std,
  567. bool is_scale) { self.SetNormalize(mean, std, is_scale); })
  568. .def("run",
  569. [](vision::ocr::StructureV2LayoutPreprocessor &self,
  570. std::vector<pybind11::array> &im_list) {
  571. std::vector<vision::FDMat> images;
  572. for (size_t i = 0; i < im_list.size(); ++i) {
  573. images.push_back(vision::WrapMat(PyArrayToCvMat(im_list[i])));
  574. }
  575. std::vector<FDTensor> outputs;
  576. if (!self.Run(&images, &outputs)) {
  577. throw std::runtime_error(
  578. "Failed to preprocess the input data in "
  579. "StructureV2LayoutPreprocessor.");
  580. }
  581. auto batch_layout_img_info = self.GetBatchLayoutImgInfo();
  582. for (size_t i = 0; i < outputs.size(); ++i) {
  583. outputs[i].StopSharing();
  584. }
  585. return std::make_pair(outputs, *batch_layout_img_info);
  586. })
  587. .def("disable_normalize",
  588. [](vision::ocr::StructureV2LayoutPreprocessor &self) {
  589. self.DisableNormalize();
  590. })
  591. .def("disable_permute",
  592. [](vision::ocr::StructureV2LayoutPreprocessor &self) {
  593. self.DisablePermute();
  594. });
  595. pybind11::class_<vision::ocr::StructureV2LayoutPostprocessor>(
  596. m, "StructureV2LayoutPostprocessor")
  597. .def(pybind11::init<>())
  598. .def_property(
  599. "score_threshold",
  600. &vision::ocr::StructureV2LayoutPostprocessor::GetScoreThreshold,
  601. &vision::ocr::StructureV2LayoutPostprocessor::SetScoreThreshold)
  602. .def_property(
  603. "nms_threshold",
  604. &vision::ocr::StructureV2LayoutPostprocessor::GetNMSThreshold,
  605. &vision::ocr::StructureV2LayoutPostprocessor::SetNMSThreshold)
  606. .def_property("num_class",
  607. &vision::ocr::StructureV2LayoutPostprocessor::GetNumClass,
  608. &vision::ocr::StructureV2LayoutPostprocessor::SetNumClass)
  609. .def_property("fpn_stride",
  610. &vision::ocr::StructureV2LayoutPostprocessor::GetFPNStride,
  611. &vision::ocr::StructureV2LayoutPostprocessor::SetFPNStride)
  612. .def_property("reg_max",
  613. &vision::ocr::StructureV2LayoutPostprocessor::GetRegMax,
  614. &vision::ocr::StructureV2LayoutPostprocessor::SetRegMax)
  615. .def("run",
  616. [](vision::ocr::StructureV2LayoutPostprocessor &self,
  617. std::vector<FDTensor> &inputs,
  618. const std::vector<std::array<int, 4>> &batch_layout_img_info) {
  619. std::vector<vision::DetectionResult> results;
  620. if (!self.Run(inputs, &results, batch_layout_img_info)) {
  621. throw std::runtime_error(
  622. "Failed to postprocess the input data in "
  623. "StructureV2LayoutPostprocessor.");
  624. }
  625. return results;
  626. });
  627. pybind11::class_<vision::ocr::StructureV2Layout, UltraInferModel>(
  628. m, "StructureV2Layout")
  629. .def(pybind11::init<std::string, std::string, RuntimeOption,
  630. ModelFormat>())
  631. .def(pybind11::init<>())
  632. .def_property_readonly("preprocessor",
  633. &vision::ocr::StructureV2Layout::GetPreprocessor)
  634. .def_property_readonly("postprocessor",
  635. &vision::ocr::StructureV2Layout::GetPostprocessor)
  636. .def("clone",
  637. [](vision::ocr::StructureV2Layout &self) { return self.Clone(); })
  638. .def("predict",
  639. [](vision::ocr::StructureV2Layout &self, pybind11::array &data) {
  640. auto mat = PyArrayToCvMat(data);
  641. vision::DetectionResult result;
  642. self.Predict(mat, &result);
  643. return result;
  644. })
  645. .def("batch_predict", [](vision::ocr::StructureV2Layout &self,
  646. std::vector<pybind11::array> &data) {
  647. std::vector<cv::Mat> images;
  648. for (size_t i = 0; i < data.size(); ++i) {
  649. images.push_back(PyArrayToCvMat(data[i]));
  650. }
  651. std::vector<vision::DetectionResult> results;
  652. self.BatchPredict(images, &results);
  653. return results;
  654. });
  655. pybind11::class_<vision::ocr::StructureV2SERViLayoutXLMModel,
  656. UltraInferModel>(m, "StructureV2SERViLayoutXLMModel")
  657. .def(pybind11::init<std::string, std::string, std::string, RuntimeOption,
  658. ModelFormat>())
  659. .def("clone",
  660. [](vision::ocr::StructureV2SERViLayoutXLMModel &self) {
  661. return self.Clone();
  662. })
  663. .def("predict",
  664. [](vision::ocr::StructureV2SERViLayoutXLMModel &self,
  665. pybind11::array &data) {
  666. throw std::runtime_error(
  667. "StructureV2SERViLayoutXLMModel do not support predict.");
  668. })
  669. .def(
  670. "batch_predict",
  671. [](vision::ocr::StructureV2SERViLayoutXLMModel &self,
  672. std::vector<pybind11::array> &data) {
  673. throw std::runtime_error(
  674. "StructureV2SERViLayoutXLMModel do not support batch_predict.");
  675. })
  676. .def("infer",
  677. [](vision::ocr::StructureV2SERViLayoutXLMModel &self,
  678. std::map<std::string, pybind11::array> &data) {
  679. std::vector<FDTensor> inputs(data.size());
  680. int index = 0;
  681. for (auto iter = data.begin(); iter != data.end(); ++iter) {
  682. std::vector<int64_t> data_shape;
  683. data_shape.insert(data_shape.begin(), iter->second.shape(),
  684. iter->second.shape() + iter->second.ndim());
  685. auto dtype = NumpyDataTypeToFDDataType(iter->second.dtype());
  686. inputs[index].Resize(data_shape, dtype);
  687. memcpy(inputs[index].MutableData(), iter->second.mutable_data(),
  688. iter->second.nbytes());
  689. inputs[index].name = iter->first;
  690. index += 1;
  691. }
  692. std::vector<FDTensor> outputs(self.NumOutputsOfRuntime());
  693. self.Infer(inputs, &outputs);
  694. std::vector<pybind11::array> results;
  695. results.reserve(outputs.size());
  696. for (size_t i = 0; i < outputs.size(); ++i) {
  697. auto numpy_dtype = FDDataTypeToNumpyDataType(outputs[i].dtype);
  698. results.emplace_back(
  699. pybind11::array(numpy_dtype, outputs[i].shape));
  700. memcpy(results[i].mutable_data(), outputs[i].Data(),
  701. outputs[i].Numel() * FDDataTypeSize(outputs[i].dtype));
  702. }
  703. return results;
  704. })
  705. .def("get_input_info",
  706. [](vision::ocr::StructureV2SERViLayoutXLMModel &self, int &index) {
  707. return self.InputInfoOfRuntime(index);
  708. });
  709. }
  710. } // namespace ultra_infer