pp_doctranslation.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Any, Dict, List
  15. from .....utils.deps import function_requires_deps, is_dep_available
  16. from ...infra import utils as serving_utils
  17. from ...infra.config import AppConfig
  18. from ...infra.models import AIStudioResultResponse
  19. from ...schemas import pp_doctranslation as schema
  20. from .._app import create_app, primary_operation
  21. from ._common import common
  22. from ._common import ocr as ocr_common
  23. if is_dep_available("fastapi"):
  24. from fastapi import FastAPI
  25. @function_requires_deps("fastapi")
  26. def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
  27. pipeline.inintial_visual_predictor(pipeline.config)
  28. app, ctx = create_app(
  29. pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
  30. )
  31. ocr_common.update_app_context(ctx)
  32. @primary_operation(
  33. app,
  34. schema.ANALYZE_IMAGES_ENDPOINT,
  35. "analyzeImages",
  36. )
  37. async def _analyze_images(
  38. request: schema.AnalyzeImagesRequest,
  39. ) -> AIStudioResultResponse[schema.AnalyzeImagesResult]:
  40. pipeline = ctx.pipeline
  41. log_id = serving_utils.generate_log_id()
  42. visualize_enabled = (
  43. request.visualize if request.visualize is not None else ctx.config.visualize
  44. )
  45. images, data_info = await ocr_common.get_images(request, ctx)
  46. result = await pipeline.call(
  47. pipeline.pipeline.visual_predict,
  48. images,
  49. use_doc_orientation_classify=request.useDocOrientationClassify,
  50. use_doc_unwarping=request.useDocUnwarping,
  51. use_textline_orientation=request.useTextlineOrientation,
  52. use_seal_recognition=request.useSealRecognition,
  53. use_table_recognition=request.useTableRecognition,
  54. use_formula_recognition=request.useFormulaRecognition,
  55. use_chart_recognition=request.useChartRecognition,
  56. use_region_detection=request.useRegionDetection,
  57. layout_threshold=request.layoutThreshold,
  58. layout_nms=request.layoutNms,
  59. layout_unclip_ratio=request.layoutUnclipRatio,
  60. layout_merge_bboxes_mode=request.layoutMergeBboxesMode,
  61. text_det_limit_side_len=request.textDetLimitSideLen,
  62. text_det_limit_type=request.textDetLimitType,
  63. text_det_thresh=request.textDetThresh,
  64. text_det_box_thresh=request.textDetBoxThresh,
  65. text_det_unclip_ratio=request.textDetUnclipRatio,
  66. text_rec_score_thresh=request.textRecScoreThresh,
  67. seal_det_limit_side_len=request.sealDetLimitSideLen,
  68. seal_det_limit_type=request.sealDetLimitType,
  69. seal_det_thresh=request.sealDetThresh,
  70. seal_det_box_thresh=request.sealDetBoxThresh,
  71. seal_det_unclip_ratio=request.sealDetUnclipRatio,
  72. seal_rec_score_thresh=request.sealRecScoreThresh,
  73. use_wired_table_cells_trans_to_html=request.useWiredTableCellsTransToHtml,
  74. use_wireless_table_cells_trans_to_html=request.useWirelessTableCellsTransToHtml,
  75. use_table_orientation_classify=request.useTableOrientationClassify,
  76. use_ocr_results_with_table_cells=request.useOcrResultsWithTableCells,
  77. use_e2e_wired_table_rec_model=request.useE2eWiredTableRecModel,
  78. use_e2e_wireless_table_rec_model=request.useE2eWirelessTableRecModel,
  79. )
  80. layout_parsing_results: List[Dict[str, Any]] = []
  81. for i, (img, item) in enumerate(zip(images, result)):
  82. pruned_res = common.prune_result(item["layout_parsing_result"].json["res"])
  83. md_data = item["layout_parsing_result"].markdown
  84. md_text = md_data["markdown_texts"]
  85. md_imgs = await serving_utils.call_async(
  86. common.postprocess_images,
  87. md_data["markdown_images"],
  88. log_id,
  89. filename_template=f"markdown_{i}/{{key}}",
  90. file_storage=ctx.extra["file_storage"],
  91. return_urls=ctx.extra["return_img_urls"],
  92. max_img_size=ctx.extra["max_output_img_size"],
  93. )
  94. md_flags = md_data["page_continuation_flags"]
  95. if visualize_enabled:
  96. imgs = {
  97. "input_img": img,
  98. **item["layout_parsing_result"].img,
  99. }
  100. imgs = await serving_utils.call_async(
  101. common.postprocess_images,
  102. imgs,
  103. log_id,
  104. filename_template=f"{{key}}_{i}.jpg",
  105. file_storage=ctx.extra["file_storage"],
  106. return_urls=ctx.extra["return_img_urls"],
  107. max_img_size=ctx.extra["max_output_img_size"],
  108. )
  109. else:
  110. imgs = {}
  111. layout_parsing_results.append(
  112. dict(
  113. prunedResult=pruned_res,
  114. markdown=dict(
  115. text=md_text,
  116. images=md_imgs,
  117. isStart=md_flags[0],
  118. isEnd=md_flags[1],
  119. ),
  120. outputImages=(
  121. {k: v for k, v in imgs.items() if k != "input_img"}
  122. if imgs
  123. else None
  124. ),
  125. inputImage=imgs.get("input_img"),
  126. )
  127. )
  128. return AIStudioResultResponse[schema.AnalyzeImagesResult](
  129. logId=log_id,
  130. result=schema.AnalyzeImagesResult(
  131. layoutParsingResults=layout_parsing_results,
  132. dataInfo=data_info,
  133. ),
  134. )
  135. @primary_operation(
  136. app,
  137. schema.TRANSLATE_ENDPOINT,
  138. "translate",
  139. )
  140. async def _translate(
  141. request: schema.TranslateRequest,
  142. ) -> AIStudioResultResponse[schema.TranslateResult]:
  143. pipeline = ctx.pipeline
  144. ori_md_info_list: List[Dict[str, Any]] = []
  145. for i, item in enumerate(request.markdownList):
  146. ori_md_info_list.append(
  147. {
  148. "input_path": None,
  149. "page_index": i,
  150. "markdown_texts": item.text,
  151. "page_continuation_flags": (item.isStart, item.isEnd),
  152. }
  153. )
  154. result = await serving_utils.call_async(
  155. pipeline.pipeline.translate,
  156. ori_md_info_list,
  157. target_language=request.targetLanguage,
  158. chunk_size=request.chunkSize,
  159. task_description=request.taskDescription,
  160. output_format=request.outputFormat,
  161. rules_str=request.rulesStr,
  162. few_shot_demo_text_content=request.fewShotDemoTextContent,
  163. few_shot_demo_key_value_list=request.fewShotDemoKeyValueList,
  164. chat_bot_config=request.chatBotConfig,
  165. llm_request_interval=request.llmRequestInterval,
  166. )
  167. translation_results: List[Dict[str, Any]] = []
  168. for item in result:
  169. translation_results.append(
  170. dict(
  171. language=item["language"],
  172. markdown=dict(
  173. text=item["markdown_texts"],
  174. isStart=item["page_continuation_flags"][0],
  175. isEnd=item["page_continuation_flags"][1],
  176. ),
  177. )
  178. )
  179. return AIStudioResultResponse[schema.TranslateResult](
  180. logId=serving_utils.generate_log_id(),
  181. result=schema.TranslateResult(
  182. translationResults=translation_results,
  183. ),
  184. )
  185. return app