PP-ChatOCRv3-doc.yaml 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. pipeline_name: PP-ChatOCRv3-doc
  2. use_layout_parser: True
  3. SubModules:
  4. LLM_Chat:
  5. module_name: chat_bot
  6. model_name: ernie-3.5-8k
  7. base_url: "https://qianfan.baidubce.com/v2"
  8. api_type: openai
  9. api_key: "api_key" # Set this to a real API key
  10. LLM_Retriever:
  11. module_name: retriever
  12. model_name: embedding-v1
  13. base_url: "https://qianfan.baidubce.com/v2"
  14. api_type: qianfan
  15. api_key: "api_key" # Set this to a real API key
  16. PromptEngneering:
  17. KIE_CommonText:
  18. module_name: prompt_engneering
  19. task_type: text_kie_prompt_v1
  20. task_description: '你现在的任务是从OCR文字识别的结果中提取关键词列表中每一项对应的关键信息。
  21. OCR的文字识别结果使用```符号包围,包含所识别出来的文字,顺序在原始图片中从左至右、从上至下。
  22. 我指定的关键词列表使用[]符号包围。请注意OCR的文字识别结果可能存在长句子换行被切断、不合理的分词、
  23. 文字被错误合并等问题,你需要结合上下文语义进行综合判断,以抽取准确的关键信息。'
  24. rules_str:
  25. output_format: '在返回结果时使用JSON格式,包含多个key-value对,key值为我指定的问题,value值为该问题对应的答案。
  26. 如果认为OCR识别结果中,对于问题key,没有答案,则将value赋值为"未知"。请只输出json格式的结果,
  27. 并做json格式校验后返回,不要包含其它多余文字!'
  28. few_shot_demo_text_content:
  29. few_shot_demo_key_value_list:
  30. KIE_Table:
  31. module_name: prompt_engneering
  32. task_type: table_kie_prompt_v1
  33. task_description: '你现在的任务是从输入的表格内容中提取关键词列表中每一项对应的关键信息,
  34. 表格内容用```符号包围,我指定的关键词列表使用[]符号包围。你需要结合上下文语义进行综合判断,以抽取准确的关键信息。'
  35. rules_str:
  36. output_format: '在返回结果时使用JSON格式,包含多个key-value对,key值为我指定的关键词,value值为所抽取的结果。
  37. 如果认为表格识别结果中没有关键词key对应的value,则将value赋值为"未知"。请只输出json格式的结果,
  38. 并做json格式校验后返回,不要包含其它多余文字!'
  39. few_shot_demo_text_content:
  40. few_shot_demo_key_value_list:
  41. SubPipelines:
  42. LayoutParser:
  43. pipeline_name: layout_parsing
  44. use_doc_preprocessor: True
  45. use_general_ocr: True
  46. use_seal_recognition: True
  47. use_table_recognition: True
  48. use_formula_recognition: False
  49. SubModules:
  50. LayoutDetection:
  51. module_name: layout_detection
  52. model_name: RT-DETR-H_layout_3cls
  53. model_dir: null
  54. SubPipelines:
  55. DocPreprocessor:
  56. pipeline_name: doc_preprocessor
  57. use_doc_orientation_classify: True
  58. use_doc_unwarping: True
  59. SubModules:
  60. DocOrientationClassify:
  61. module_name: doc_text_orientation
  62. model_name: PP-LCNet_x1_0_doc_ori
  63. model_dir: null
  64. DocUnwarping:
  65. module_name: image_unwarping
  66. model_name: UVDoc
  67. model_dir: null
  68. GeneralOCR:
  69. pipeline_name: OCR
  70. text_type: general
  71. use_doc_preprocessor: False
  72. use_textline_orientation: False
  73. SubModules:
  74. TextDetection:
  75. module_name: text_detection
  76. model_name: PP-OCRv4_server_det
  77. model_dir: null
  78. limit_side_len: 960
  79. limit_type: max
  80. max_side_limit: 4000
  81. thresh: 0.3
  82. box_thresh: 0.6
  83. unclip_ratio: 1.5
  84. TextRecognition:
  85. module_name: text_recognition
  86. model_name: PP-OCRv4_server_rec
  87. model_dir: null
  88. batch_size: 6
  89. score_thresh: 0
  90. TableRecognition:
  91. pipeline_name: table_recognition
  92. use_layout_detection: False
  93. use_doc_preprocessor: False
  94. use_ocr_model: False
  95. SubModules:
  96. TableStructureRecognition:
  97. module_name: table_structure_recognition
  98. model_name: SLANet_plus
  99. model_dir: null
  100. SealRecognition:
  101. pipeline_name: seal_recognition
  102. use_layout_detection: False
  103. use_doc_preprocessor: False
  104. SubPipelines:
  105. SealOCR:
  106. pipeline_name: OCR
  107. text_type: seal
  108. use_doc_preprocessor: False
  109. use_textline_orientation: False
  110. SubModules:
  111. TextDetection:
  112. module_name: seal_text_detection
  113. model_name: PP-OCRv4_server_seal_det
  114. model_dir: null
  115. limit_side_len: 736
  116. limit_type: min
  117. max_side_limit: 4000
  118. thresh: 0.2
  119. box_thresh: 0.6
  120. unclip_ratio: 0.5
  121. TextRecognition:
  122. module_name: text_recognition
  123. model_name: PP-OCRv4_server_rec
  124. model_dir: null
  125. batch_size: 1
  126. score_thresh: 0