| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from typing import Dict, Final, List, Optional, Tuple, Union
- from pydantic import BaseModel
- from ..infra.models import DataInfo, PrimaryOperations
- from .shared import ocr
- __all__ = [
- "ANALYZE_IMAGES_ENDPOINT",
- "AnalyzeImagesRequest",
- "LayoutParsingResult",
- "AnalyzeImagesResult",
- "BUILD_VECTOR_STORE_ENDPOINT",
- "BuildVectorStoreRequest",
- "BuildVectorStoreResult",
- "INVOKE_MLLM_ENDPOINT",
- "InvokeMLLMRequest",
- "InvokeMLLMResult",
- "CHAT_ENDPOINT",
- "ChatRequest",
- "ChatResult",
- "PRIMARY_OPERATIONS",
- ]
- ANALYZE_IMAGES_ENDPOINT: Final[str] = "/chatocr-visual"
- class AnalyzeImagesRequest(ocr.BaseInferRequest):
- useDocOrientationClassify: Optional[bool] = None
- useDocUnwarping: Optional[bool] = None
- useTextlineOrientation: Optional[bool] = None
- useSealRecognition: Optional[bool] = None
- useTableRecognition: Optional[bool] = None
- layoutThreshold: Optional[Union[float, dict]] = None
- layoutNms: Optional[bool] = None
- layoutUnclipRatio: Optional[Union[float, Tuple[float, float], dict]] = None
- layoutMergeBboxesMode: Optional[Union[str, dict]] = None
- textDetLimitSideLen: Optional[int] = None
- textDetLimitType: Optional[str] = None
- textDetThresh: Optional[float] = None
- textDetBoxThresh: Optional[float] = None
- textDetUnclipRatio: Optional[float] = None
- textRecScoreThresh: Optional[float] = None
- sealDetLimitSideLen: Optional[int] = None
- sealDetLimitType: Optional[str] = None
- sealDetThresh: Optional[float] = None
- sealDetBoxThresh: Optional[float] = None
- sealDetUnclipRatio: Optional[float] = None
- sealRecScoreThresh: Optional[float] = None
- visualize: Optional[bool] = None
- class LayoutParsingResult(BaseModel):
- prunedResult: dict
- outputImages: Optional[Dict[str, str]] = None
- inputImage: Optional[str] = None
- class AnalyzeImagesResult(BaseModel):
- layoutParsingResults: List[LayoutParsingResult]
- visualInfo: List[dict]
- dataInfo: DataInfo
- BUILD_VECTOR_STORE_ENDPOINT: Final[str] = "/chatocr-vector"
- class BuildVectorStoreRequest(BaseModel):
- visualInfo: List[dict]
- minCharacters: int = 3500
- blockSize: int = 300
- retrieverConfig: Optional[dict] = None
- class BuildVectorStoreResult(BaseModel):
- vectorInfo: dict
- INVOKE_MLLM_ENDPOINT: Final[str] = "/chatocr-mllm"
- class InvokeMLLMRequest(BaseModel):
- image: str
- keyList: List[str]
- mllmChatBotConfig: Optional[dict] = None
- class InvokeMLLMResult(BaseModel):
- mllmPredictInfo: dict
- CHAT_ENDPOINT: Final[str] = "/chatocr-chat"
- class ChatRequest(BaseModel):
- keyList: List[str]
- visualInfo: List[dict]
- useVectorRetrieval: bool = True
- vectorInfo: Optional[dict] = None
- minCharacters: int = 3500
- textTaskDescription: Optional[str] = None
- textOutputFormat: Optional[str] = None
- textRulesStr: Optional[str] = None
- textFewShotDemoTextContent: Optional[str] = None
- textFewShotDemoKeyValueList: Optional[str] = None
- tableTaskDescription: Optional[str] = None
- tableOutputFormat: Optional[str] = None
- tableRulesStr: Optional[str] = None
- tableFewShotDemoTextContent: Optional[str] = None
- tableFewShotDemoKeyValueList: Optional[str] = None
- mllmPredictInfo: Optional[dict] = None
- mllmIntegrationStrategy: str = "integration"
- chatBotConfig: Optional[dict] = None
- retrieverConfig: Optional[dict] = None
- class ChatResult(BaseModel):
- chatResult: dict
- PRIMARY_OPERATIONS: Final[PrimaryOperations] = {
- "analyzeImages": (
- ANALYZE_IMAGES_ENDPOINT,
- AnalyzeImagesRequest,
- AnalyzeImagesResult,
- ),
- "buildVectorStore": (
- BUILD_VECTOR_STORE_ENDPOINT,
- BuildVectorStoreRequest,
- BuildVectorStoreResult,
- ),
- "invokeMllm": (
- INVOKE_MLLM_ENDPOINT,
- InvokeMLLMRequest,
- InvokeMLLMResult,
- ),
- "chat": (CHAT_ENDPOINT, ChatRequest, ChatResult),
- }
|