pp_chatocrv4_doc.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Dict, Final, List, Optional, Tuple, Union
  15. from pydantic import BaseModel
  16. from ..infra.models import DataInfo, PrimaryOperations
  17. from .shared import ocr
  18. __all__ = [
  19. "ANALYZE_IMAGES_ENDPOINT",
  20. "AnalyzeImagesRequest",
  21. "LayoutParsingResult",
  22. "AnalyzeImagesResult",
  23. "BUILD_VECTOR_STORE_ENDPOINT",
  24. "BuildVectorStoreRequest",
  25. "BuildVectorStoreResult",
  26. "INVOKE_MLLM_ENDPOINT",
  27. "InvokeMLLMRequest",
  28. "InvokeMLLMResult",
  29. "CHAT_ENDPOINT",
  30. "ChatRequest",
  31. "ChatResult",
  32. "PRIMARY_OPERATIONS",
  33. ]
  34. ANALYZE_IMAGES_ENDPOINT: Final[str] = "/chatocr-visual"
  35. class AnalyzeImagesRequest(ocr.BaseInferRequest):
  36. useDocOrientationClassify: Optional[bool] = None
  37. useDocUnwarping: Optional[bool] = None
  38. useTextlineOrientation: Optional[bool] = None
  39. useSealRecognition: Optional[bool] = None
  40. useTableRecognition: Optional[bool] = None
  41. layoutThreshold: Optional[float] = None
  42. layoutNms: Optional[bool] = None
  43. layoutUnclipRatio: Optional[Union[float, Tuple[float, float], dict]] = None
  44. layoutMergeBboxesMode: Optional[Union[str, dict]] = None
  45. textDetLimitSideLen: Optional[int] = None
  46. textDetLimitType: Optional[str] = None
  47. textDetThresh: Optional[float] = None
  48. textDetBoxThresh: Optional[float] = None
  49. textDetUnclipRatio: Optional[float] = None
  50. textRecScoreThresh: Optional[float] = None
  51. sealDetLimitSideLen: Optional[int] = None
  52. sealDetLimitType: Optional[str] = None
  53. sealDetThresh: Optional[float] = None
  54. sealDetBoxThresh: Optional[float] = None
  55. sealDetUnclipRatio: Optional[float] = None
  56. sealRecScoreThresh: Optional[float] = None
  57. class LayoutParsingResult(BaseModel):
  58. prunedResult: dict
  59. outputImages: Optional[Dict[str, str]] = None
  60. inputImage: Optional[str] = None
  61. class AnalyzeImagesResult(BaseModel):
  62. layoutParsingResults: List[LayoutParsingResult]
  63. visualInfo: List[dict]
  64. dataInfo: DataInfo
  65. BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
  66. class BuildVectorStoreRequest(BaseModel):
  67. visualInfo: List[dict]
  68. minCharacters: Optional[int] = None
  69. blockSize: Optional[int] = None
  70. retrieverConfig: Optional[dict] = None
  71. class BuildVectorStoreResult(BaseModel):
  72. vectorInfo: dict
  73. INVOKE_MLLM_ENDPOINT: Final[str] = "/chatocr-mllm"
  74. class InvokeMLLMRequest(BaseModel):
  75. image: str
  76. keyList: List[str]
  77. mllmChatBotConfig: Optional[dict] = None
  78. class InvokeMLLMResult(BaseModel):
  79. mllmPredictInfo: dict
  80. CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
  81. class ChatRequest(BaseModel):
  82. keyList: List[str]
  83. visualInfo: List[dict]
  84. useVectorRetrieval: Optional[bool] = None
  85. vectorInfo: Optional[dict] = None
  86. minCharacters: Optional[int] = None
  87. textTaskDescription: Optional[str] = None
  88. textOutputFormat: Optional[str] = None
  89. textRulesStr: Optional[str] = None
  90. textFewShotDemoTextContent: Optional[str] = None
  91. textFewShotDemoKeyValueList: Optional[str] = None
  92. tableTaskDescription: Optional[str] = None
  93. tableOutputFormat: Optional[str] = None
  94. tableRulesStr: Optional[str] = None
  95. tableFewShotDemoTextContent: Optional[str] = None
  96. tableFewShotDemoKeyValueList: Optional[str] = None
  97. mllmPredictInfo: Optional[dict] = None
  98. mllmIntegrationStrategy: Optional[str] = None
  99. chatBotConfig: Optional[dict] = None
  100. retrieverConfig: Optional[dict] = None
  101. class ChatResult(BaseModel):
  102. chatResult: dict
  103. PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
  104. "analyzeImages": (
  105. ANALYZE_IMAGES_ENDPOINT,
  106. AnalyzeImagesRequest,
  107. AnalyzeImagesResult,
  108. ),
  109. "buildVectorStore": (
  110. BUILD_VECTOR_STORE_ENDPOINT,
  111. BuildVectorStoreRequest,
  112. BuildVectorStoreResult,
  113. ),
  114. "invokeMllm": (
  115. INVOKE_MLLM_ENDPOINT,
  116. InvokeMLLMRequest,
  117. InvokeMLLMResult,
  118. ),
  119. "chat": (CHAT_ENDPOINT, ChatRequest, ChatResult),
  120. }