pipeline_arguments.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from ast import literal_eval
  15. from typing import Dict, List, Literal, Optional, Tuple, Union
  16. from pydantic import TypeAdapter, ValidationError
  17. def custom_type(cli_expected_type):
  18. """Create validator for CLI input conversion and type checking"""
  19. def validator(cli_input: str) -> cli_expected_type:
  20. try:
  21. parsed = literal_eval(cli_input)
  22. except (ValueError, SyntaxError, TypeError, MemoryError, RecursionError) as exc:
  23. err = f"""Malformed input:
  24. - Input: {cli_input!r}
  25. - Error: {exc}"""
  26. raise ValueError(err) from exc
  27. try:
  28. return TypeAdapter(cli_expected_type).validate_python(parsed)
  29. except ValidationError as exc:
  30. err = f"""Invalid input type:
  31. - Expected: {cli_expected_type}
  32. - Received: {cli_input!r}
  33. """
  34. raise ValueError(err) from exc
  35. return validator
  36. PIPELINE_ARGUMENTS = {
  37. "OCR": [
  38. {
  39. "name": "--use_doc_orientation_classify",
  40. "type": bool,
  41. "help": "Determines whether to use document orientation classification",
  42. },
  43. {
  44. "name": "--use_doc_unwarping",
  45. "type": bool,
  46. "help": "Determines whether to use document unwarping",
  47. },
  48. {
  49. "name": "--use_textline_orientation",
  50. "type": bool,
  51. "help": "Determines whether to consider text line orientation",
  52. },
  53. {
  54. "name": "--text_det_limit_side_len",
  55. "type": int,
  56. "help": "Sets the side length limit for text detection.",
  57. },
  58. {
  59. "name": "--text_det_limit_type",
  60. "type": str,
  61. "help": "Sets the limit type for text detection.",
  62. },
  63. {
  64. "name": "--text_det_thresh",
  65. "type": float,
  66. "help": "Sets the threshold for text detection.",
  67. },
  68. {
  69. "name": "--text_det_box_thresh",
  70. "type": float,
  71. "help": "Sets the box threshold for text detection.",
  72. },
  73. {
  74. "name": "--text_det_unclip_ratio",
  75. "type": float,
  76. "help": "Sets the unclip ratio for text detection.",
  77. },
  78. {
  79. "name": "--text_rec_score_thresh",
  80. "type": float,
  81. "help": "Sets the score threshold for text recognition.",
  82. },
  83. {
  84. "name": "--return_word_box",
  85. "type": bool,
  86. "help": "Determines whether to return word box",
  87. },
  88. ],
  89. "object_detection": [
  90. {
  91. "name": "--threshold",
  92. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  93. "help": "Sets the threshold for object detection.",
  94. },
  95. ],
  96. "image_classification": [
  97. {
  98. "name": "--topk",
  99. "type": int,
  100. "help": "Sets the Top-K value for image classification.",
  101. },
  102. ],
  103. "image_multilabel_classification": [
  104. {
  105. "name": "--threshold",
  106. "type": float,
  107. "help": "Sets the threshold for image multilabel classification.",
  108. },
  109. ],
  110. "pedestrian_attribute_recognition": [
  111. {
  112. "name": "--det_threshold",
  113. "type": float,
  114. "help": "Sets the threshold for human detection.",
  115. },
  116. {
  117. "name": "--cls_threshold",
  118. "type": float,
  119. "help": "Sets the threshold for pedestrian attribute recognition.",
  120. },
  121. ],
  122. "vehicle_attribute_recognition": [
  123. {
  124. "name": "--det_threshold",
  125. "type": float,
  126. "help": "Sets the threshold for vehicle detection.",
  127. },
  128. {
  129. "name": "--cls_threshold",
  130. "type": float,
  131. "help": "Sets the threshold for vehicle attribute recognition.",
  132. },
  133. ],
  134. "human_keypoint_detection": [
  135. {
  136. "name": "--det_threshold",
  137. "type": custom_type(Optional[float]),
  138. "help": "Sets the threshold for human detection.",
  139. },
  140. ],
  141. "table_recognition": [
  142. {
  143. "name": "--use_table_cells_ocr_results",
  144. "type": bool,
  145. "help": "Determines whether to use cells OCR results",
  146. },
  147. {
  148. "name": "--use_doc_orientation_classify",
  149. "type": bool,
  150. "help": "Determines whether to use document preprocessing",
  151. },
  152. {
  153. "name": "--use_doc_unwarping",
  154. "type": bool,
  155. "help": "Determines whether to use document unwarping",
  156. },
  157. {
  158. "name": "--use_layout_detection",
  159. "type": bool,
  160. "help": "Determines whether to use document layout detection",
  161. },
  162. {
  163. "name": "--use_ocr_model",
  164. "type": bool,
  165. "help": "Determines whether to use OCR",
  166. },
  167. {
  168. "name": "--text_det_limit_side_len",
  169. "type": int,
  170. "help": "Sets the side length limit for text detection.",
  171. },
  172. {
  173. "name": "--text_det_limit_type",
  174. "type": str,
  175. "help": "Sets the limit type for text detection.",
  176. },
  177. {
  178. "name": "--text_det_thresh",
  179. "type": float,
  180. "help": "Sets the threshold for text detection.",
  181. },
  182. {
  183. "name": "--text_det_box_thresh",
  184. "type": float,
  185. "help": "Sets the box threshold for text detection.",
  186. },
  187. {
  188. "name": "--text_det_unclip_ratio",
  189. "type": float,
  190. "help": "Sets the unclip ratio for text detection.",
  191. },
  192. {
  193. "name": "--text_rec_score_thresh",
  194. "type": float,
  195. "help": "Sets the score threshold for text recognition.",
  196. },
  197. ],
  198. "table_recognition_v2": [
  199. {
  200. "name": "--use_table_cells_ocr_results",
  201. "type": bool,
  202. "help": "Determines whether to use cells OCR results",
  203. },
  204. {
  205. "name": "--use_e2e_wired_table_rec_model",
  206. "type": bool,
  207. "help": "Determines whether to use end-to-end wired table recognition model",
  208. },
  209. {
  210. "name": "--use_e2e_wireless_table_rec_model",
  211. "type": bool,
  212. "help": "Determines whether to use end-to-end wireless table recognition model",
  213. },
  214. {
  215. "name": "--use_doc_orientation_classify",
  216. "type": bool,
  217. "help": "Determines whether to use document preprocessing",
  218. },
  219. {
  220. "name": "--use_doc_unwarping",
  221. "type": bool,
  222. "help": "Determines whether to use document unwarping",
  223. },
  224. {
  225. "name": "--use_layout_detection",
  226. "type": bool,
  227. "help": "Determines whether to use document layout detection",
  228. },
  229. {
  230. "name": "--use_ocr_model",
  231. "type": bool,
  232. "help": "Determines whether to use OCR",
  233. },
  234. {
  235. "name": "--text_det_limit_side_len",
  236. "type": int,
  237. "help": "Sets the side length limit for text detection.",
  238. },
  239. {
  240. "name": "--text_det_limit_type",
  241. "type": str,
  242. "help": "Sets the limit type for text detection.",
  243. },
  244. {
  245. "name": "--text_det_thresh",
  246. "type": float,
  247. "help": "Sets the threshold for text detection.",
  248. },
  249. {
  250. "name": "--text_det_box_thresh",
  251. "type": float,
  252. "help": "Sets the box threshold for text detection.",
  253. },
  254. {
  255. "name": "--text_det_unclip_ratio",
  256. "type": float,
  257. "help": "Sets the unclip ratio for text detection.",
  258. },
  259. {
  260. "name": "--text_rec_score_thresh",
  261. "type": float,
  262. "help": "Sets the score threshold for text recognition.",
  263. },
  264. ],
  265. "seal_recognition": [
  266. {
  267. "name": "--use_doc_orientation_classify",
  268. "type": bool,
  269. "help": "Determines whether to use document preprocessing",
  270. },
  271. {
  272. "name": "--use_doc_unwarping",
  273. "type": bool,
  274. "help": "Determines whether to use document unwarping",
  275. },
  276. {
  277. "name": "--use_layout_detection",
  278. "type": bool,
  279. "help": "Determines whether to use document layout detection",
  280. },
  281. {
  282. "name": "--layout_threshold",
  283. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  284. "help": "Determines confidence threshold for layout detection",
  285. },
  286. {
  287. "name": "--layout_nms",
  288. "type": bool,
  289. "help": "Determines whether to use non maximum suppression",
  290. },
  291. {
  292. "name": "--layout_unclip_ratio",
  293. "type": custom_type(
  294. Optional[Union[float, Tuple[float, float], Dict[int, Tuple]]]
  295. ),
  296. "help": "Determines unclip ratio for layout detection boxes",
  297. },
  298. {
  299. "name": "--layout_merge_bboxes_mode",
  300. "type": custom_type(Optional[Union[str, Dict[int, str]]]),
  301. "help": "Determines merge mode for layout detection bboxes, 'union', 'large' or 'small'",
  302. },
  303. {
  304. "name": "--seal_det_limit_side_len",
  305. "type": int,
  306. "help": "Sets the side length limit for text detection.",
  307. },
  308. {
  309. "name": "--seal_det_limit_type",
  310. "type": str,
  311. "help": "Sets the limit type for text detection, 'min', 'max'.",
  312. },
  313. {
  314. "name": "--seal_det_thresh",
  315. "type": float,
  316. "help": "Sets the threshold for text detection.",
  317. },
  318. {
  319. "name": "--seal_det_box_thresh",
  320. "type": float,
  321. "help": "Sets the box threshold for text detection.",
  322. },
  323. {
  324. "name": "--seal_det_unclip_ratio",
  325. "type": float,
  326. "help": "Sets the unclip ratio for text detection.",
  327. },
  328. {
  329. "name": "--seal_rec_score_thresh",
  330. "type": float,
  331. "help": "Sets the score threshold for text recognition.",
  332. },
  333. ],
  334. "layout_parsing": [
  335. {
  336. "name": "--use_doc_orientation_classify",
  337. "type": bool,
  338. "help": "Determines whether to use document orientation classification",
  339. },
  340. {
  341. "name": "--use_doc_unwarping",
  342. "type": bool,
  343. "help": "Determines whether to use document unwarping",
  344. },
  345. {
  346. "name": "--use_general_ocr",
  347. "type": bool,
  348. "help": "Determines whether to use general ocr",
  349. },
  350. {
  351. "name": "--use_textline_orientation",
  352. "type": bool,
  353. "help": "Determines whether to consider text line orientation",
  354. },
  355. {
  356. "name": "--use_seal_recognition",
  357. "type": bool,
  358. "help": "Determines whether to use seal recognition",
  359. },
  360. {
  361. "name": "--use_table_recognition",
  362. "type": bool,
  363. "help": "Determines whether to use table recognition",
  364. },
  365. {
  366. "name": "--use_formula_recognition",
  367. "type": bool,
  368. "help": "Determines whether to use formula recognition",
  369. },
  370. {
  371. "name": "--layout_threshold",
  372. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  373. "help": "Determines confidence threshold for layout detection",
  374. },
  375. {
  376. "name": "--layout_nms",
  377. "type": bool,
  378. "help": "Determines whether to use non maximum suppression",
  379. },
  380. {
  381. "name": "--layout_unclip_ratio",
  382. "type": custom_type(
  383. Optional[Union[float, Tuple[float, float], Dict[int, Tuple]]]
  384. ),
  385. "help": "Determines unclip ratio for layout detection boxes",
  386. },
  387. {
  388. "name": "--layout_merge_bboxes_mode",
  389. "type": custom_type(Optional[Union[str, Dict[int, str]]]),
  390. "help": "Determines merge mode for layout detection bboxes, 'union', 'large' or 'small'",
  391. },
  392. {
  393. "name": "--seal_det_limit_side_len",
  394. "type": int,
  395. "help": "Sets the side length limit for text detection.",
  396. },
  397. {
  398. "name": "--seal_det_limit_type",
  399. "type": str,
  400. "help": "Sets the limit type for text detection, 'min', 'max'.",
  401. },
  402. {
  403. "name": "--seal_det_thresh",
  404. "type": float,
  405. "help": "Sets the threshold for text detection.",
  406. },
  407. {
  408. "name": "--seal_det_box_thresh",
  409. "type": float,
  410. "help": "Sets the box threshold for text detection.",
  411. },
  412. {
  413. "name": "--seal_det_unclip_ratio",
  414. "type": float,
  415. "help": "Sets the unclip ratio for text detection.",
  416. },
  417. {
  418. "name": "--seal_rec_score_thresh",
  419. "type": float,
  420. "help": "Sets the score threshold for text recognition.",
  421. },
  422. {
  423. "name": "--text_det_limit_side_len",
  424. "type": int,
  425. "help": "Sets the side length limit for text detection.",
  426. },
  427. {
  428. "name": "--text_det_limit_type",
  429. "type": str,
  430. "help": "Sets the limit type for text detection.",
  431. },
  432. {
  433. "name": "--text_det_thresh",
  434. "type": float,
  435. "help": "Sets the threshold for text detection.",
  436. },
  437. {
  438. "name": "--text_det_box_thresh",
  439. "type": float,
  440. "help": "Sets the box threshold for text detection.",
  441. },
  442. {
  443. "name": "--text_det_unclip_ratio",
  444. "type": float,
  445. "help": "Sets the unclip ratio for text detection.",
  446. },
  447. {
  448. "name": "--text_rec_score_thresh",
  449. "type": float,
  450. "help": "Sets the score threshold for text recognition.",
  451. },
  452. ],
  453. "PP-StructureV3": [
  454. {
  455. "name": "--use_doc_orientation_classify",
  456. "type": bool,
  457. "help": "Determines whether to use document orientation classification",
  458. },
  459. {
  460. "name": "--use_doc_unwarping",
  461. "type": bool,
  462. "help": "Determines whether to use document unwarping",
  463. },
  464. {
  465. "name": "--use_general_ocr",
  466. "type": bool,
  467. "help": "Determines whether to use general ocr",
  468. },
  469. {
  470. "name": "--use_textline_orientation",
  471. "type": bool,
  472. "help": "Determines whether to consider text line orientation",
  473. },
  474. {
  475. "name": "--use_seal_recognition",
  476. "type": bool,
  477. "help": "Determines whether to use seal recognition",
  478. },
  479. {
  480. "name": "--use_table_recognition",
  481. "type": bool,
  482. "help": "Determines whether to use table recognition",
  483. },
  484. {
  485. "name": "--use_formula_recognition",
  486. "type": bool,
  487. "help": "Determines whether to use formula recognition",
  488. },
  489. {
  490. "name": "--layout_threshold",
  491. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  492. "help": "Determines confidence threshold for layout detection",
  493. },
  494. {
  495. "name": "--layout_nms",
  496. "type": bool,
  497. "help": "Determines whether to use non maximum suppression",
  498. },
  499. {
  500. "name": "--layout_unclip_ratio",
  501. "type": custom_type(
  502. Optional[Union[float, Tuple[float, float], Dict[int, Tuple]]]
  503. ),
  504. "help": "Determines unclip ratio for layout detection boxes",
  505. },
  506. {
  507. "name": "--layout_merge_bboxes_mode",
  508. "type": custom_type(Optional[Union[str, Dict[int, str]]]),
  509. "help": "Determines merge mode for layout detection bboxes, 'union', 'large' or 'small'",
  510. },
  511. {
  512. "name": "--seal_det_limit_side_len",
  513. "type": int,
  514. "help": "Sets the side length limit for text detection.",
  515. },
  516. {
  517. "name": "--seal_det_limit_type",
  518. "type": str,
  519. "help": "Sets the limit type for text detection, 'min', 'max'.",
  520. },
  521. {
  522. "name": "--seal_det_thresh",
  523. "type": float,
  524. "help": "Sets the threshold for text detection.",
  525. },
  526. {
  527. "name": "--seal_det_box_thresh",
  528. "type": float,
  529. "help": "Sets the box threshold for text detection.",
  530. },
  531. {
  532. "name": "--seal_det_unclip_ratio",
  533. "type": float,
  534. "help": "Sets the unclip ratio for text detection.",
  535. },
  536. {
  537. "name": "--seal_rec_score_thresh",
  538. "type": float,
  539. "help": "Sets the score threshold for text recognition.",
  540. },
  541. {
  542. "name": "--text_det_limit_side_len",
  543. "type": int,
  544. "help": "Sets the side length limit for text detection.",
  545. },
  546. {
  547. "name": "--text_det_limit_type",
  548. "type": str,
  549. "help": "Sets the limit type for text detection.",
  550. },
  551. {
  552. "name": "--text_det_thresh",
  553. "type": float,
  554. "help": "Sets the threshold for text detection.",
  555. },
  556. {
  557. "name": "--text_det_box_thresh",
  558. "type": float,
  559. "help": "Sets the box threshold for text detection.",
  560. },
  561. {
  562. "name": "--text_det_unclip_ratio",
  563. "type": float,
  564. "help": "Sets the unclip ratio for text detection.",
  565. },
  566. {
  567. "name": "--text_rec_score_thresh",
  568. "type": float,
  569. "help": "Sets the score threshold for text recognition.",
  570. },
  571. {
  572. "name": "--use_table_cells_ocr_results",
  573. "type": bool,
  574. "help": "Determines whether to use cells OCR results",
  575. },
  576. {
  577. "name": "--use_e2e_wired_table_rec_model",
  578. "type": bool,
  579. "help": "Determines whether to use end-to-end wired table recognition model",
  580. },
  581. {
  582. "name": "--use_e2e_wireless_table_rec_model",
  583. "type": bool,
  584. "help": "Determines whether to use end-to-end wireless table recognition model",
  585. },
  586. ],
  587. "PaddleOCR-VL": [
  588. {
  589. "name": "--use_doc_orientation_classify",
  590. "type": bool,
  591. "help": "Determines whether to use document orientation classification",
  592. },
  593. {
  594. "name": "--use_doc_unwarping",
  595. "type": bool,
  596. "help": "Determines whether to use document unwarping",
  597. },
  598. {
  599. "name": "--use_layout_detection",
  600. "type": bool,
  601. "help": "Determines whether to use layout detection",
  602. },
  603. {
  604. "name": "--use_chart_recognition",
  605. "type": bool,
  606. "help": "Determines whether to use document chart recognition",
  607. },
  608. {
  609. "name": "--layout_threshold",
  610. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  611. "help": "Determines confidence threshold for layout detection",
  612. },
  613. {
  614. "name": "--layout_nms",
  615. "type": bool,
  616. "help": "Determines whether to use non maximum suppression",
  617. },
  618. {
  619. "name": "--layout_unclip_ratio",
  620. "type": custom_type(
  621. Optional[Union[float, Tuple[float, float], Dict[int, Tuple]]]
  622. ),
  623. "help": "Determines unclip ratio for layout detection boxes",
  624. },
  625. {
  626. "name": "--layout_merge_bboxes_mode",
  627. "type": custom_type(Optional[Union[str, Dict[int, str]]]),
  628. "help": "Determines merge mode for layout detection bboxes, 'union', 'large' or 'small'",
  629. },
  630. {
  631. "name": "--use_queues",
  632. "type": bool,
  633. "help": "Determines whether to use queues",
  634. },
  635. {
  636. "name": "--prompt_label",
  637. "type": custom_type(Optional[str]),
  638. "help": "Sets the prompt label when not using layout detection",
  639. },
  640. {
  641. "name": "--format_block_content",
  642. "type": bool,
  643. "help": "Determines whether to format block content",
  644. },
  645. {
  646. "name": "--repetition_penalty",
  647. "type": custom_type(Optional[float]),
  648. "help": "",
  649. },
  650. {
  651. "name": "--temperature",
  652. "type": custom_type(Optional[float]),
  653. "help": "Temperature parameter for VLLM model.",
  654. },
  655. {
  656. "name": "--top_p",
  657. "type": custom_type(Optional[float]),
  658. "help": "Top-p parameter for VLLM model.",
  659. },
  660. {
  661. "name": "--min_pixels",
  662. "type": custom_type(Optional[int]),
  663. "help": "Sets the minimum pixels for VLLM model.",
  664. },
  665. {
  666. "name": "--max_pixels",
  667. "type": custom_type(Optional[int]),
  668. "help": "Sets the max_pixels pixels for VLLM model.",
  669. },
  670. ],
  671. "ts_forecast": None,
  672. "ts_anomaly_detection": None,
  673. "ts_classification": None,
  674. "formula_recognition": [
  675. {
  676. "name": "--use_layout_detection",
  677. "type": bool,
  678. "help": "Determines whether to use layout detection",
  679. },
  680. {
  681. "name": "--use_doc_orientation_classify",
  682. "type": bool,
  683. "help": "Determines whether to use document orientation classification",
  684. },
  685. {
  686. "name": "--use_doc_unwarping",
  687. "type": bool,
  688. "help": "Determines whether to use document unwarping",
  689. },
  690. {
  691. "name": "--layout_threshold",
  692. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  693. "help": "Sets the layout threshold for layout detection.",
  694. },
  695. {
  696. "name": "--layout_nms",
  697. "type": bool,
  698. "help": "Determines whether to use layout nms",
  699. },
  700. {
  701. "name": "--layout_unclip_ratio",
  702. "type": custom_type(
  703. Optional[Union[float, Tuple[float, float], Dict[int, Tuple]]]
  704. ),
  705. "help": "Sets the layout unclip ratio for layout detection.",
  706. },
  707. {
  708. "name": "--layout_merge_bboxes_mode",
  709. "type": custom_type(Optional[Union[str, Dict[int, str]]]),
  710. "help": "Sets the layout merge bboxes mode for layout detection.",
  711. },
  712. ],
  713. "instance_segmentation": [
  714. {
  715. "name": "--threshold",
  716. "type": custom_type(Optional[float]),
  717. "help": "Sets the threshold for instance segmentation.",
  718. },
  719. ],
  720. "semantic_segmentation": [
  721. {
  722. "name": "--target_size",
  723. "type": custom_type(Optional[Union[int, Tuple[int, int], Literal[-1]]]),
  724. "help": "Sets the inference image resolution for semantic segmentation.",
  725. },
  726. ],
  727. "small_object_detection": [
  728. {
  729. "name": "--threshold",
  730. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  731. "help": "Sets the threshold for small object detection.",
  732. },
  733. ],
  734. "anomaly_detection": None,
  735. "video_classification": [
  736. {
  737. "name": "--topk",
  738. "type": int,
  739. "help": "Sets the Top-K value for video classification.",
  740. },
  741. ],
  742. "video_detection": [
  743. {
  744. "name": "--nms_thresh",
  745. "type": float,
  746. "help": "Sets the NMS threshold for video detection.",
  747. },
  748. {
  749. "name": "--score_thresh",
  750. "type": float,
  751. "help": "Sets the confidence threshold for video detection.",
  752. },
  753. ],
  754. "doc_preprocessor": [
  755. {
  756. "name": "--use_doc_orientation_classify",
  757. "type": bool,
  758. "help": "Determines whether to use document orientation classification.",
  759. },
  760. {
  761. "name": "--use_doc_unwarping",
  762. "type": bool,
  763. "help": "Determines whether to use document unwarping.",
  764. },
  765. ],
  766. "rotated_object_detection": [
  767. {
  768. "name": "--threshold",
  769. "type": custom_type(Optional[Union[float, Dict[int, float]]]),
  770. "help": "Sets the threshold for rotated object detection.",
  771. },
  772. ],
  773. "open_vocabulary_detection": [
  774. {
  775. "name": "--thresholds",
  776. "type": custom_type(Dict[str, float]),
  777. "help": "Sets the thresholds for open vocabulary detection.",
  778. },
  779. {
  780. "name": "--prompt",
  781. "type": str,
  782. "help": "Sets the prompt for open vocabulary detection.",
  783. },
  784. ],
  785. "open_vocabulary_segmentation": [
  786. {
  787. "name": "--prompt_type",
  788. "type": str,
  789. "help": "Sets the prompt type for open vocabulary segmentation.",
  790. },
  791. {
  792. "name": "--prompt",
  793. "type": custom_type(List[List[float]]),
  794. "help": "Sets the prompt for open vocabulary segmentation.",
  795. },
  796. ],
  797. "3d_bev_detection": None,
  798. "multilingual_speech_recognition": None,
  799. }