pp_chatocrv4_doc.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Dict, Final, List, Optional
  15. from pydantic import BaseModel
  16. from ..infra.models import DataInfo, PrimaryOperations
  17. from .shared import ocr
  18. __all__ = [
  19. "ANALYZE_IMAGES_ENDPOINT",
  20. "AnalyzeImagesRequest",
  21. "LayoutParsingResult",
  22. "AnalyzeImagesResult",
  23. "BUILD_VECTOR_STORE_ENDPOINT",
  24. "BuildVectorStoreRequest",
  25. "BuildVectorStoreResult",
  26. "INVOKE_MLLM_ENDPOINT",
  27. "InvokeMLLMRequest",
  28. "InvokeMLLMResult",
  29. "CHAT_ENDPOINT",
  30. "ChatRequest",
  31. "ChatResult",
  32. "PRIMARY_OPERATIONS",
  33. ]
  34. ANALYZE_IMAGES_ENDPOINT: Final[str] = "/chatocr-visual"
  35. class AnalyzeImagesRequest(ocr.BaseInferRequest):
  36. useDocOrientationClassify: Optional[bool] = None
  37. useDocUnwarping: Optional[bool] = None
  38. useGeneralOcr: Optional[bool] = None
  39. useSealRecognition: Optional[bool] = None
  40. useTableRecognition: Optional[bool] = None
  41. textDetLimitSideLen: Optional[int] = None
  42. textDetLimitType: Optional[str] = None
  43. textDetThresh: Optional[float] = None
  44. textDetBoxThresh: Optional[float] = None
  45. textDetUnclipRatio: Optional[float] = None
  46. textRecScoreThresh: Optional[float] = None
  47. sealDetLimitSideLen: Optional[int] = None
  48. sealDetLimitType: Optional[str] = None
  49. sealDetThresh: Optional[float] = None
  50. sealDetBoxThresh: Optional[float] = None
  51. sealDetUnclipRatio: Optional[float] = None
  52. sealRecScoreThresh: Optional[float] = None
  53. class LayoutParsingResult(BaseModel):
  54. prunedResult: dict
  55. outputImages: Optional[Dict[str, str]] = None
  56. inputImage: Optional[str] = None
  57. class AnalyzeImagesResult(BaseModel):
  58. layoutParsingResults: List[LayoutParsingResult]
  59. visualInfo: List[dict]
  60. dataInfo: DataInfo
  61. BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
  62. class BuildVectorStoreRequest(BaseModel):
  63. visualInfo: List[dict]
  64. minCharacters: Optional[int] = None
  65. blockSize: Optional[int] = None
  66. retrieverConfig: Optional[dict] = None
  67. class BuildVectorStoreResult(BaseModel):
  68. vectorInfo: dict
  69. INVOKE_MLLM_ENDPOINT: Final[str] = "/chatocr-mllm"
  70. class InvokeMLLMRequest(BaseModel):
  71. image: str
  72. keyList: List[str]
  73. mllmChatBotConfig: Optional[dict] = None
  74. class InvokeMLLMResult(BaseModel):
  75. mllmPredictInfo: dict
  76. CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
  77. class ChatRequest(BaseModel):
  78. keyList: List[str]
  79. visualInfo: List[dict]
  80. useVectorRetrieval: Optional[bool] = None
  81. vectorInfo: Optional[dict] = None
  82. minCharacters: Optional[int] = None
  83. textTaskDescription: Optional[str] = None
  84. textOutputFormat: Optional[str] = None
  85. textRulesStr: Optional[str] = None
  86. textFewShotDemoTextContent: Optional[str] = None
  87. textFewShotDemoKeyValueList: Optional[str] = None
  88. tableTaskDescription: Optional[str] = None
  89. tableOutputFormat: Optional[str] = None
  90. tableRulesStr: Optional[str] = None
  91. tableFewShotDemoTextContent: Optional[str] = None
  92. tableFewShotDemoKeyValueList: Optional[str] = None
  93. mllmPredictInfo: Optional[dict] = None
  94. mllmIntegrationStrategy: Optional[str] = None
  95. chatBotConfig: Optional[dict] = None
  96. retrieverConfig: Optional[dict] = None
  97. class ChatResult(BaseModel):
  98. chatResult: dict
  99. PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
  100. "analyzeImages": (
  101. ANALYZE_IMAGES_ENDPOINT,
  102. AnalyzeImagesRequest,
  103. AnalyzeImagesResult,
  104. ),
  105. "buildVectorStore": (
  106. BUILD_VECTOR_STORE_ENDPOINT,
  107. BuildVectorStoreRequest,
  108. BuildVectorStoreResult,
  109. ),
  110. "invokeMllm": (
  111. INVOKE_MLLM_ENDPOINT,
  112. InvokeMLLMRequest,
  113. InvokeMLLMResult,
  114. ),
  115. "chat": (CHAT_ENDPOINT, ChatRequest, ChatResult),
  116. }