enum_class.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. class BlockType:
  2. IMAGE = 'image'
  3. TABLE = 'table'
  4. IMAGE_BODY = 'image_body'
  5. TABLE_BODY = 'table_body'
  6. IMAGE_CAPTION = 'image_caption'
  7. TABLE_CAPTION = 'table_caption'
  8. IMAGE_FOOTNOTE = 'image_footnote'
  9. TABLE_FOOTNOTE = 'table_footnote'
  10. TEXT = 'text'
  11. TITLE = 'title'
  12. INTERLINE_EQUATION = 'interline_equation'
  13. LIST = 'list'
  14. INDEX = 'index'
  15. DISCARDED = 'discarded'
  16. # Added in vlm 2.5
  17. CODE = "code"
  18. CODE_BODY = "code_body"
  19. CODE_CAPTION = "code_caption"
  20. ALGORITHM = "algorithm"
  21. REF_TEXT = "ref_text"
  22. PHONETIC = "phonetic"
  23. HEADER = "header"
  24. FOOTER = "footer"
  25. PAGE_NUMBER = "page_number"
  26. ASIDE_TEXT = "aside_text"
  27. PAGE_FOOTNOTE = "page_footnote"
  28. class ContentType:
  29. IMAGE = 'image'
  30. TABLE = 'table'
  31. TEXT = 'text'
  32. INTERLINE_EQUATION = 'interline_equation'
  33. INLINE_EQUATION = 'inline_equation'
  34. EQUATION = 'equation'
  35. CODE = 'code'
  36. class CategoryId:
  37. Title = 0
  38. Text = 1
  39. Abandon = 2
  40. ImageBody = 3
  41. ImageCaption = 4
  42. TableBody = 5
  43. TableCaption = 6
  44. TableFootnote = 7
  45. InterlineEquation_Layout = 8
  46. InterlineEquationNumber_Layout = 9
  47. InlineEquation = 13
  48. InterlineEquation_YOLO = 14
  49. OcrText = 15
  50. LowScoreText = 16
  51. ImageFootnote = 101
  52. class MakeMode:
  53. MM_MD = 'mm_markdown'
  54. NLP_MD = 'nlp_markdown'
  55. CONTENT_LIST = 'content_list'
  56. class ModelPath:
  57. vlm_root_hf = "opendatalab/MinerU2.5-2509-1.2B"
  58. vlm_root_modelscope = "OpenDataLab/MinerU2.5-2509-1.2B"
  59. pipeline_root_modelscope = "OpenDataLab/PDF-Extract-Kit-1.0"
  60. pipeline_root_hf = "opendatalab/PDF-Extract-Kit-1.0"
  61. doclayout_yolo = "models/Layout/YOLO/doclayout_yolo_docstructbench_imgsz1280_2501.pt"
  62. yolo_v8_mfd = "models/MFD/YOLO/yolo_v8_ft.pt"
  63. unimernet_small = "models/MFR/unimernet_hf_small_2503"
  64. pytorch_paddle = "models/OCR/paddleocr_torch"
  65. layout_reader = "models/ReadingOrder/layout_reader"
  66. slanet_plus = "models/TabRec/SlanetPlus/slanet-plus.onnx"
  67. unet_structure = "models/TabRec/UnetStructure/unet.onnx"
  68. paddle_table_cls = "models/TabCls/paddle_table_cls/PP-LCNet_x1_0_table_cls.onnx"
  69. paddle_orientation_classification = "models/OriCls/paddle_orientation_classification/PP-LCNet_x1_0_doc_ori.onnx"
  70. class SplitFlag:
  71. CROSS_PAGE = 'cross_page'
  72. LINES_DELETED = 'lines_deleted'
  73. class ImageType:
  74. PIL = 'pil_img'
  75. BASE64 = 'base64_img'