pipeline_arguments.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from ast import literal_eval
  15. from typing import Dict, List, Literal, Optional, Tuple, Union
  16. from pydantic import TypeAdapter, ValidationError
  17. def custom_type(cli_expected_type):
  18. """Create validator for CLI input conversion and type checking"""
  19. def validator(cli_input: str) -> cli_expected_type:
  20. try:
  21. parsed = literal_eval(cli_input)
  22. except (ValueError, SyntaxError, TypeError, MemoryError, RecursionError) as exc:
  23. err = f"""Malformed input:
  24. - Input: {cli_input!r}
  25. - Error: {exc}"""
  26. raise ValueError(err) from exc
  27. try:
  28. return TypeAdapter(cli_expected_type).validate_python(parsed)
  29. except ValidationError as exc:
  30. err = f"""Invalid input type:
  31. - Expected: {cli_expected_type}
  32. - Received: {cli_input!r}
  33. """
  34. raise ValueError(err) from exc
  35. return validator
  36. PIPELINE_ARGUMENTS = {
  37. "OCR": [
  38. {
  39. "name": "--use_doc_orientation_classify",
  40. "type": bool,
  41. "help": "Determines whether to use document orientation classification",
  42. },
  43. {
  44. "name": "--use_doc_unwarping",
  45. "type": bool,
  46. "help": "Determines whether to use document unwarping",
  47. },
  48. {
  49. "name": "--use_textline_orientation",
  50. "type": bool,
  51. "help": "Determines whether to consider text line orientation",
  52. },
  53. {
  54. "name": "--text_det_limit_side_len",
  55. "type": int,
  56. "help": "Sets the side length limit for text detection.",
  57. },
  58. {
  59. "name": "--text_det_limit_type",
  60. "type": str,
  61. "help": "Sets the limit type for text detection.",
  62. },
  63. {
  64. "name": "--text_det_thresh",
  65. "type": float,
  66. "help": "Sets the threshold for text detection.",
  67. },
  68. {
  69. "name": "--text_det_box_thresh",
  70. "type": float,
  71. "help": "Sets the box threshold for text detection.",
  72. },
  73. {
  74. "name": "--text_det_unclip_ratio",
  75. "type": float,
  76. "help": "Sets the unclip ratio for text detection.",
  77. },
  78. {
  79. "name": "--text_rec_score_thresh",
  80. "type": float,
  81. "help": "Sets the score threshold for text recognition.",
  82. },
  83. ],
  84. "object_detection": [
  85. {
  86. "name": "--threshold",
  87. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  88. "help": "Sets the threshold for object detection.",
  89. },
  90. ],
  91. "image_classification": [
  92. {
  93. "name": "--topk",
  94. "type": int,
  95. "help": "Sets the Top-K value for image classification.",
  96. },
  97. ],
  98. "image_multilabel_classification": [
  99. {
  100. "name": "--threshold",
  101. "type": float,
  102. "help": "Sets the threshold for image multilabel classification.",
  103. },
  104. ],
  105. "pedestrian_attribute_recognition": [
  106. {
  107. "name": "--det_threshold",
  108. "type": float,
  109. "help": "Sets the threshold for human detection.",
  110. },
  111. {
  112. "name": "--cls_threshold",
  113. "type": float,
  114. "help": "Sets the threshold for pedestrian attribute recognition.",
  115. },
  116. ],
  117. "vehicle_attribute_recognition": [
  118. {
  119. "name": "--det_threshold",
  120. "type": float,
  121. "help": "Sets the threshold for vehicle detection.",
  122. },
  123. {
  124. "name": "--cls_threshold",
  125. "type": float,
  126. "help": "Sets the threshold for vehicle attribute recognition.",
  127. },
  128. ],
  129. "human_keypoint_detection": [
  130. {
  131. "name": "--det_threshold",
  132. "type": custom_type(Optional[float]),
  133. "help": "Sets the threshold for human detection.",
  134. },
  135. ],
  136. "table_recognition": [
  137. {
  138. "name": "--use_table_cells_ocr_results",
  139. "type": bool,
  140. "help": "Determines whether to use cells OCR results",
  141. },
  142. {
  143. "name": "--use_doc_orientation_classify",
  144. "type": bool,
  145. "help": "Determines whether to use document preprocessing",
  146. },
  147. {
  148. "name": "--use_doc_unwarping",
  149. "type": bool,
  150. "help": "Determines whether to use document unwarping",
  151. },
  152. {
  153. "name": "--use_layout_detection",
  154. "type": bool,
  155. "help": "Determines whether to use document layout detection",
  156. },
  157. {
  158. "name": "--use_ocr_model",
  159. "type": bool,
  160. "help": "Determines whether to use OCR",
  161. },
  162. {
  163. "name": "--text_det_limit_side_len",
  164. "type": int,
  165. "help": "Sets the side length limit for text detection.",
  166. },
  167. {
  168. "name": "--text_det_limit_type",
  169. "type": str,
  170. "help": "Sets the limit type for text detection.",
  171. },
  172. {
  173. "name": "--text_det_thresh",
  174. "type": float,
  175. "help": "Sets the threshold for text detection.",
  176. },
  177. {
  178. "name": "--text_det_box_thresh",
  179. "type": float,
  180. "help": "Sets the box threshold for text detection.",
  181. },
  182. {
  183. "name": "--text_det_unclip_ratio",
  184. "type": float,
  185. "help": "Sets the unclip ratio for text detection.",
  186. },
  187. {
  188. "name": "--text_rec_score_thresh",
  189. "type": float,
  190. "help": "Sets the score threshold for text recognition.",
  191. },
  192. ],
  193. "table_recognition_v2": [
  194. {
  195. "name": "--use_table_cells_ocr_results",
  196. "type": bool,
  197. "help": "Determines whether to use cells OCR results",
  198. },
  199. {
  200. "name": "--use_e2e_wired_table_rec_model",
  201. "type": bool,
  202. "help": "Determines whether to use end-to-end wired table recognition model",
  203. },
  204. {
  205. "name": "--use_e2e_wireless_table_rec_model",
  206. "type": bool,
  207. "help": "Determines whether to use end-to-end wireless table recognition model",
  208. },
  209. {
  210. "name": "--use_doc_orientation_classify",
  211. "type": bool,
  212. "help": "Determines whether to use document preprocessing",
  213. },
  214. {
  215. "name": "--use_doc_unwarping",
  216. "type": bool,
  217. "help": "Determines whether to use document unwarping",
  218. },
  219. {
  220. "name": "--use_layout_detection",
  221. "type": bool,
  222. "help": "Determines whether to use document layout detection",
  223. },
  224. {
  225. "name": "--use_ocr_model",
  226. "type": bool,
  227. "help": "Determines whether to use OCR",
  228. },
  229. {
  230. "name": "--text_det_limit_side_len",
  231. "type": int,
  232. "help": "Sets the side length limit for text detection.",
  233. },
  234. {
  235. "name": "--text_det_limit_type",
  236. "type": str,
  237. "help": "Sets the limit type for text detection.",
  238. },
  239. {
  240. "name": "--text_det_thresh",
  241. "type": float,
  242. "help": "Sets the threshold for text detection.",
  243. },
  244. {
  245. "name": "--text_det_box_thresh",
  246. "type": float,
  247. "help": "Sets the box threshold for text detection.",
  248. },
  249. {
  250. "name": "--text_det_unclip_ratio",
  251. "type": float,
  252. "help": "Sets the unclip ratio for text detection.",
  253. },
  254. {
  255. "name": "--text_rec_score_thresh",
  256. "type": float,
  257. "help": "Sets the score threshold for text recognition.",
  258. },
  259. ],
  260. "seal_recognition": [
  261. {
  262. "name": "--use_doc_orientation_classify",
  263. "type": bool,
  264. "help": "Determines whether to use document preprocessing",
  265. },
  266. {
  267. "name": "--use_doc_unwarping",
  268. "type": bool,
  269. "help": "Determines whether to use document unwarping",
  270. },
  271. {
  272. "name": "--use_layout_detection",
  273. "type": bool,
  274. "help": "Determines whether to use document layout detection",
  275. },
  276. {
  277. "name": "--layout_threshold",
  278. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  279. "help": "Determines confidence threshold for layout detection",
  280. },
  281. {
  282. "name": "--layout_nms",
  283. "type": bool,
  284. "help": "Determines whether to use non maximum suppression",
  285. },
  286. {
  287. "name": "--layout_unclip_ratio",
  288. "type": custom_type(
  289. Optional[Union[float, Tuple[float, float], Dict[int, Tuple]]]
  290. ),
  291. "help": "Determines unclip ratio for layout detection boxes",
  292. },
  293. {
  294. "name": "--layout_merge_bboxes_mode",
  295. "type": custom_type(Optional[Union[str, Dict[int, str]]]),
  296. "help": "Determines merge mode for layout detection bboxes, 'union', 'large' or 'small'",
  297. },
  298. {
  299. "name": "--seal_det_limit_side_len",
  300. "type": int,
  301. "help": "Sets the side length limit for text detection.",
  302. },
  303. {
  304. "name": "--seal_det_limit_type",
  305. "type": str,
  306. "help": "Sets the limit type for text detection, 'min', 'max'.",
  307. },
  308. {
  309. "name": "--seal_det_thresh",
  310. "type": float,
  311. "help": "Sets the threshold for text detection.",
  312. },
  313. {
  314. "name": "--seal_det_box_thresh",
  315. "type": float,
  316. "help": "Sets the box threshold for text detection.",
  317. },
  318. {
  319. "name": "--seal_det_unclip_ratio",
  320. "type": float,
  321. "help": "Sets the unclip ratio for text detection.",
  322. },
  323. {
  324. "name": "--seal_rec_score_thresh",
  325. "type": float,
  326. "help": "Sets the score threshold for text recognition.",
  327. },
  328. ],
  329. "layout_parsing": [
  330. {
  331. "name": "--use_doc_orientation_classify",
  332. "type": bool,
  333. "help": "Determines whether to use document orientation classification",
  334. },
  335. {
  336. "name": "--use_doc_unwarping",
  337. "type": bool,
  338. "help": "Determines whether to use document unwarping",
  339. },
  340. {
  341. "name": "--use_general_ocr",
  342. "type": bool,
  343. "help": "Determines whether to use general ocr",
  344. },
  345. {
  346. "name": "--use_textline_orientation",
  347. "type": bool,
  348. "help": "Determines whether to consider text line orientation",
  349. },
  350. {
  351. "name": "--use_seal_recognition",
  352. "type": bool,
  353. "help": "Determines whether to use seal recognition",
  354. },
  355. {
  356. "name": "--use_table_recognition",
  357. "type": bool,
  358. "help": "Determines whether to use table recognition",
  359. },
  360. {
  361. "name": "--use_formula_recognition",
  362. "type": bool,
  363. "help": "Determines whether to use formula recognition",
  364. },
  365. {
  366. "name": "--layout_threshold",
  367. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  368. "help": "Determines confidence threshold for layout detection",
  369. },
  370. {
  371. "name": "--layout_nms",
  372. "type": bool,
  373. "help": "Determines whether to use non maximum suppression",
  374. },
  375. {
  376. "name": "--layout_unclip_ratio",
  377. "type": custom_type(
  378. Optional[Union[float, Tuple[float, float], Dict[int, Tuple]]]
  379. ),
  380. "help": "Determines unclip ratio for layout detection boxes",
  381. },
  382. {
  383. "name": "--layout_merge_bboxes_mode",
  384. "type": custom_type(Optional[Union[str, Dict[int, str]]]),
  385. "help": "Determines merge mode for layout detection bboxes, 'union', 'large' or 'small'",
  386. },
  387. {
  388. "name": "--seal_det_limit_side_len",
  389. "type": int,
  390. "help": "Sets the side length limit for text detection.",
  391. },
  392. {
  393. "name": "--seal_det_limit_type",
  394. "type": str,
  395. "help": "Sets the limit type for text detection, 'min', 'max'.",
  396. },
  397. {
  398. "name": "--seal_det_thresh",
  399. "type": float,
  400. "help": "Sets the threshold for text detection.",
  401. },
  402. {
  403. "name": "--seal_det_box_thresh",
  404. "type": float,
  405. "help": "Sets the box threshold for text detection.",
  406. },
  407. {
  408. "name": "--seal_det_unclip_ratio",
  409. "type": float,
  410. "help": "Sets the unclip ratio for text detection.",
  411. },
  412. {
  413. "name": "--seal_rec_score_thresh",
  414. "type": float,
  415. "help": "Sets the score threshold for text recognition.",
  416. },
  417. {
  418. "name": "--text_det_limit_side_len",
  419. "type": int,
  420. "help": "Sets the side length limit for text detection.",
  421. },
  422. {
  423. "name": "--text_det_limit_type",
  424. "type": str,
  425. "help": "Sets the limit type for text detection.",
  426. },
  427. {
  428. "name": "--text_det_thresh",
  429. "type": float,
  430. "help": "Sets the threshold for text detection.",
  431. },
  432. {
  433. "name": "--text_det_box_thresh",
  434. "type": float,
  435. "help": "Sets the box threshold for text detection.",
  436. },
  437. {
  438. "name": "--text_det_unclip_ratio",
  439. "type": float,
  440. "help": "Sets the unclip ratio for text detection.",
  441. },
  442. {
  443. "name": "--text_rec_score_thresh",
  444. "type": float,
  445. "help": "Sets the score threshold for text recognition.",
  446. },
  447. ],
  448. "PP-StructureV3": [
  449. {
  450. "name": "--use_doc_orientation_classify",
  451. "type": bool,
  452. "help": "Determines whether to use document orientation classification",
  453. },
  454. {
  455. "name": "--use_doc_unwarping",
  456. "type": bool,
  457. "help": "Determines whether to use document unwarping",
  458. },
  459. {
  460. "name": "--use_general_ocr",
  461. "type": bool,
  462. "help": "Determines whether to use general ocr",
  463. },
  464. {
  465. "name": "--use_textline_orientation",
  466. "type": bool,
  467. "help": "Determines whether to consider text line orientation",
  468. },
  469. {
  470. "name": "--use_seal_recognition",
  471. "type": bool,
  472. "help": "Determines whether to use seal recognition",
  473. },
  474. {
  475. "name": "--use_table_recognition",
  476. "type": bool,
  477. "help": "Determines whether to use table recognition",
  478. },
  479. {
  480. "name": "--use_formula_recognition",
  481. "type": bool,
  482. "help": "Determines whether to use formula recognition",
  483. },
  484. {
  485. "name": "--layout_threshold",
  486. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  487. "help": "Determines confidence threshold for layout detection",
  488. },
  489. {
  490. "name": "--layout_nms",
  491. "type": bool,
  492. "help": "Determines whether to use non maximum suppression",
  493. },
  494. {
  495. "name": "--layout_unclip_ratio",
  496. "type": custom_type(
  497. Optional[Union[float, Tuple[float, float], Dict[int, Tuple]]]
  498. ),
  499. "help": "Determines unclip ratio for layout detection boxes",
  500. },
  501. {
  502. "name": "--layout_merge_bboxes_mode",
  503. "type": custom_type(Optional[Union[str, Dict[int, str]]]),
  504. "help": "Determines merge mode for layout detection bboxes, 'union', 'large' or 'small'",
  505. },
  506. {
  507. "name": "--seal_det_limit_side_len",
  508. "type": int,
  509. "help": "Sets the side length limit for text detection.",
  510. },
  511. {
  512. "name": "--seal_det_limit_type",
  513. "type": str,
  514. "help": "Sets the limit type for text detection, 'min', 'max'.",
  515. },
  516. {
  517. "name": "--seal_det_thresh",
  518. "type": float,
  519. "help": "Sets the threshold for text detection.",
  520. },
  521. {
  522. "name": "--seal_det_box_thresh",
  523. "type": float,
  524. "help": "Sets the box threshold for text detection.",
  525. },
  526. {
  527. "name": "--seal_det_unclip_ratio",
  528. "type": float,
  529. "help": "Sets the unclip ratio for text detection.",
  530. },
  531. {
  532. "name": "--seal_rec_score_thresh",
  533. "type": float,
  534. "help": "Sets the score threshold for text recognition.",
  535. },
  536. {
  537. "name": "--text_det_limit_side_len",
  538. "type": int,
  539. "help": "Sets the side length limit for text detection.",
  540. },
  541. {
  542. "name": "--text_det_limit_type",
  543. "type": str,
  544. "help": "Sets the limit type for text detection.",
  545. },
  546. {
  547. "name": "--text_det_thresh",
  548. "type": float,
  549. "help": "Sets the threshold for text detection.",
  550. },
  551. {
  552. "name": "--text_det_box_thresh",
  553. "type": float,
  554. "help": "Sets the box threshold for text detection.",
  555. },
  556. {
  557. "name": "--text_det_unclip_ratio",
  558. "type": float,
  559. "help": "Sets the unclip ratio for text detection.",
  560. },
  561. {
  562. "name": "--text_rec_score_thresh",
  563. "type": float,
  564. "help": "Sets the score threshold for text recognition.",
  565. },
  566. {
  567. "name": "--use_table_cells_ocr_results",
  568. "type": bool,
  569. "help": "Determines whether to use cells OCR results",
  570. },
  571. {
  572. "name": "--use_e2e_wired_table_rec_model",
  573. "type": bool,
  574. "help": "Determines whether to use end-to-end wired table recognition model",
  575. },
  576. {
  577. "name": "--use_e2e_wireless_table_rec_model",
  578. "type": bool,
  579. "help": "Determines whether to use end-to-end wireless table recognition model",
  580. },
  581. ],
  582. "ts_forecast": None,
  583. "ts_anomaly_detection": None,
  584. "ts_classification": None,
  585. "formula_recognition": [
  586. {
  587. "name": "--use_layout_detection",
  588. "type": bool,
  589. "help": "Determines whether to use layout detection",
  590. },
  591. {
  592. "name": "--use_doc_orientation_classify",
  593. "type": bool,
  594. "help": "Determines whether to use document orientation classification",
  595. },
  596. {
  597. "name": "--use_doc_unwarping",
  598. "type": bool,
  599. "help": "Determines whether to use document unwarping",
  600. },
  601. {
  602. "name": "--layout_threshold",
  603. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  604. "help": "Sets the layout threshold for layout detection.",
  605. },
  606. {
  607. "name": "--layout_nms",
  608. "type": bool,
  609. "help": "Determines whether to use layout nms",
  610. },
  611. {
  612. "name": "--layout_unclip_ratio",
  613. "type": custom_type(
  614. Optional[Union[float, Tuple[float, float], Dict[int, Tuple]]]
  615. ),
  616. "help": "Sets the layout unclip ratio for layout detection.",
  617. },
  618. {
  619. "name": "--layout_merge_bboxes_mode",
  620. "type": custom_type(Optional[Union[str, Dict[int, str]]]),
  621. "help": "Sets the layout merge bboxes mode for layout detection.",
  622. },
  623. ],
  624. "instance_segmentation": [
  625. {
  626. "name": "--threshold",
  627. "type": custom_type(Optional[float]),
  628. "help": "Sets the threshold for instance segmentation.",
  629. },
  630. ],
  631. "semantic_segmentation": [
  632. {
  633. "name": "--target_size",
  634. "type": custom_type(Optional[Union[int, Tuple[int, int], Literal[-1]]]),
  635. "help": "Sets the inference image resolution for semantic segmentation.",
  636. },
  637. ],
  638. "small_object_detection": [
  639. {
  640. "name": "--threshold",
  641. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  642. "help": "Sets the threshold for small object detection.",
  643. },
  644. ],
  645. "anomaly_detection": None,
  646. "video_classification": [
  647. {
  648. "name": "--topk",
  649. "type": int,
  650. "help": "Sets the Top-K value for video classification.",
  651. },
  652. ],
  653. "video_detection": [
  654. {
  655. "name": "--nms_thresh",
  656. "type": float,
  657. "help": "Sets the NMS threshold for video detection.",
  658. },
  659. {
  660. "name": "--score_thresh",
  661. "type": float,
  662. "help": "Sets the confidence threshold for video detection.",
  663. },
  664. ],
  665. "doc_preprocessor": [
  666. {
  667. "name": "--use_doc_orientation_classify",
  668. "type": bool,
  669. "help": "Determines whether to use document orientation classification.",
  670. },
  671. {
  672. "name": "--use_doc_unwarping",
  673. "type": bool,
  674. "help": "Determines whether to use document unwarping.",
  675. },
  676. ],
  677. "rotated_object_detection": [
  678. {
  679. "name": "--threshold",
  680. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  681. "help": "Sets the threshold for rotated object detection.",
  682. },
  683. ],
  684. "open_vocabulary_detection": [
  685. {
  686. "name": "--thresholds",
  687. "type": custom_type(Dict[str, float]),
  688. "help": "Sets the thresholds for open vocabulary detection.",
  689. },
  690. {
  691. "name": "--prompt",
  692. "type": str,
  693. "help": "Sets the prompt for open vocabulary detection.",
  694. },
  695. ],
  696. "open_vocabulary_segmentation": [
  697. {
  698. "name": "--prompt_type",
  699. "type": str,
  700. "help": "Sets the prompt type for open vocabulary segmentation.",
  701. },
  702. {
  703. "name": "--prompt",
  704. "type": custom_type(List[List[float]]),
  705. "help": "Sets the prompt for open vocabulary segmentation.",
  706. },
  707. ],
  708. "3d_bev_detection": None,
  709. "multilingual_speech_recognition": None,
  710. }