minerU-api.json 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. {
  2. "openapi": "3.1.0",
  3. "info": {
  4. "title": "FastAPI",
  5. "version": "0.1.0"
  6. },
  7. "paths": {
  8. "/file_parse": {
  9. "post": {
  10. "summary": "Parse Pdf",
  11. "operationId": "parse_pdf_file_parse_post",
  12. "requestBody": {
  13. "content": {
  14. "multipart/form-data": {
  15. "schema": {
  16. "$ref": "#/components/schemas/Body_parse_pdf_file_parse_post"
  17. }
  18. }
  19. },
  20. "required": true
  21. },
  22. "responses": {
  23. "200": {
  24. "description": "Successful Response",
  25. "content": {
  26. "application/json": {
  27. "schema": {}
  28. }
  29. }
  30. },
  31. "422": {
  32. "description": "Validation Error",
  33. "content": {
  34. "application/json": {
  35. "schema": {
  36. "$ref": "#/components/schemas/HTTPValidationError"
  37. }
  38. }
  39. }
  40. }
  41. }
  42. }
  43. }
  44. },
  45. "components": {
  46. "schemas": {
  47. "Body_parse_pdf_file_parse_post": {
  48. "properties": {
  49. "files": {
  50. "items": {
  51. "type": "string",
  52. "format": "binary"
  53. },
  54. "type": "array",
  55. "title": "Files",
  56. "description": "Upload pdf or image files for parsing"
  57. },
  58. "output_dir": {
  59. "type": "string",
  60. "title": "Output Dir",
  61. "description": "Output local directory",
  62. "default": "./output"
  63. },
  64. "lang_list": {
  65. "items": {
  66. "type": "string"
  67. },
  68. "type": "array",
  69. "title": "Lang List",
  70. "description": "(Adapted only for pipeline and hybrid backend)Input the languages in the pdf to improve OCR accuracy.Options:\n- ch: Chinese, English, Chinese Traditional.\n- ch_lite: Chinese, English, Chinese Traditional, Japanese.\n- ch_server: Chinese, English, Chinese Traditional, Japanese.\n- en: English.\n- korean: Korean, English.\n- japan: Chinese, English, Chinese Traditional, Japanese.\n- chinese_cht: Chinese, English, Chinese Traditional, Japanese.\n- ta: Tamil, English.\n- te: Telugu, English.\n- ka: Kannada.\n- th: Thai, English.\n- el: Greek, English.\n- latin: French, German, Afrikaans, Italian, Spanish, Bosnian, Portuguese, Czech, Welsh, Danish, Estonian, Irish, Croatian, Uzbek, Hungarian, Serbian (Latin), Indonesian, Occitan, Icelandic, Lithuanian, Maori, Malay, Dutch, Norwegian, Polish, Slovak, Slovenian, Albanian, Swedish, Swahili, Tagalog, Turkish, Latin, Azerbaijani, Kurdish, Latvian, Maltese, Pali, Romanian, Vietnamese, Finnish, Basque, Galician, Luxembourgish, Romansh, Catalan, Quechua.\n- arabic: Arabic, Persian, Uyghur, Urdu, Pashto, Kurdish, Sindhi, Balochi, English.\n- east_slavic: Russian, Belarusian, Ukrainian, English.\n- cyrillic: Russian, Belarusian, Ukrainian, Serbian (Cyrillic), Bulgarian, Mongolian, Abkhazian, Adyghe, Kabardian, Avar, Dargin, Ingush, Chechen, Lak, Lezgin, Tabasaran, Kazakh, Kyrgyz, Tajik, Macedonian, Tatar, Chuvash, Bashkir, Malian, Moldovan, Udmurt, Komi, Ossetian, Buryat, Kalmyk, Tuvan, Sakha, Karakalpak, English.\n- devanagari: Hindi, Marathi, Nepali, Bihari, Maithili, Angika, Bhojpuri, Magahi, Santali, Newari, Konkani, Sanskrit, Haryanvi, English.\n",
  71. "default": [
  72. "ch"
  73. ]
  74. },
  75. "backend": {
  76. "type": "string",
  77. "title": "Backend",
  78. "description": "The backend for parsing:\n- pipeline: More general, supports multiple languages, hallucination-free.\n- vlm-auto-engine: High accuracy via local computing power, supports Chinese and English documents only.\n- vlm-http-client: High accuracy via remote computing power(client suitable for openai-compatible servers), supports Chinese and English documents only.\n- hybrid-auto-engine: Next-generation high accuracy solution via local computing power, supports multiple languages.\n- hybrid-http-client: High accuracy via remote computing power but requires a little local computing power(client suitable for openai-compatible servers), supports multiple languages.",
  79. "default": "hybrid-auto-engine"
  80. },
  81. "parse_method": {
  82. "type": "string",
  83. "title": "Parse Method",
  84. "description": "(Adapted only for pipeline and hybrid backend)The method for parsing PDF:\n- auto: Automatically determine the method based on the file type\n- txt: Use text extraction method\n- ocr: Use OCR method for image-based PDFs\n",
  85. "default": "auto"
  86. },
  87. "formula_enable": {
  88. "type": "boolean",
  89. "title": "Formula Enable",
  90. "description": "Enable formula parsing.",
  91. "default": true
  92. },
  93. "table_enable": {
  94. "type": "boolean",
  95. "title": "Table Enable",
  96. "description": "Enable table parsing.",
  97. "default": true
  98. },
  99. "server_url": {
  100. "anyOf": [
  101. {
  102. "type": "string"
  103. },
  104. {
  105. "type": "null"
  106. }
  107. ],
  108. "title": "Server Url",
  109. "description": "(Adapted only for <vlm/hybrid>-http-client backend)openai compatible server url, e.g., http://127.0.0.1:30000"
  110. },
  111. "return_md": {
  112. "type": "boolean",
  113. "title": "Return Md",
  114. "description": "Return markdown content in response",
  115. "default": true
  116. },
  117. "return_middle_json": {
  118. "type": "boolean",
  119. "title": "Return Middle Json",
  120. "description": "Return middle JSON in response",
  121. "default": false
  122. },
  123. "return_model_output": {
  124. "type": "boolean",
  125. "title": "Return Model Output",
  126. "description": "Return model output JSON in response",
  127. "default": false
  128. },
  129. "return_content_list": {
  130. "type": "boolean",
  131. "title": "Return Content List",
  132. "description": "Return content list JSON in response",
  133. "default": false
  134. },
  135. "return_images": {
  136. "type": "boolean",
  137. "title": "Return Images",
  138. "description": "Return extracted images in response",
  139. "default": false
  140. },
  141. "response_format_zip": {
  142. "type": "boolean",
  143. "title": "Response Format Zip",
  144. "description": "Return results as a ZIP file instead of JSON",
  145. "default": false
  146. },
  147. "start_page_id": {
  148. "type": "integer",
  149. "title": "Start Page Id",
  150. "description": "The starting page for PDF parsing, beginning from 0",
  151. "default": 0
  152. },
  153. "end_page_id": {
  154. "type": "integer",
  155. "title": "End Page Id",
  156. "description": "The ending page for PDF parsing, beginning from 0",
  157. "default": 99999
  158. }
  159. },
  160. "type": "object",
  161. "required": [
  162. "files"
  163. ],
  164. "title": "Body_parse_pdf_file_parse_post"
  165. },
  166. "HTTPValidationError": {
  167. "properties": {
  168. "detail": {
  169. "items": {
  170. "$ref": "#/components/schemas/ValidationError"
  171. },
  172. "type": "array",
  173. "title": "Detail"
  174. }
  175. },
  176. "type": "object",
  177. "title": "HTTPValidationError"
  178. },
  179. "ValidationError": {
  180. "properties": {
  181. "loc": {
  182. "items": {
  183. "anyOf": [
  184. {
  185. "type": "string"
  186. },
  187. {
  188. "type": "integer"
  189. }
  190. ]
  191. },
  192. "type": "array",
  193. "title": "Location"
  194. },
  195. "msg": {
  196. "type": "string",
  197. "title": "Message"
  198. },
  199. "type": {
  200. "type": "string",
  201. "title": "Error Type"
  202. }
  203. },
  204. "type": "object",
  205. "required": [
  206. "loc",
  207. "msg",
  208. "type"
  209. ],
  210. "title": "ValidationError"
  211. }
  212. }
  213. }
  214. }