pp_structurev3.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Any, Dict, List
  15. from fastapi import FastAPI
  16. from ...infra import utils as serving_utils
  17. from ...infra.config import AppConfig
  18. from ...infra.models import ResultResponse
  19. from ...schemas.pp_structurev3 import INFER_ENDPOINT, InferRequest, InferResult
  20. from .._app import create_app, primary_operation
  21. from ._common import common
  22. from ._common import ocr as ocr_common
  23. def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> FastAPI:
  24. app, ctx = create_app(
  25. pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
  26. )
  27. ocr_common.update_app_context(ctx)
  28. @primary_operation(
  29. app,
  30. INFER_ENDPOINT,
  31. "infer",
  32. )
  33. async def _infer(
  34. request: InferRequest,
  35. ) -> ResultResponse[InferResult]:
  36. pipeline = ctx.pipeline
  37. log_id = serving_utils.generate_log_id()
  38. images, data_info = await ocr_common.get_images(request, ctx)
  39. result = await pipeline.infer(
  40. images,
  41. use_doc_orientation_classify=request.useDocOrientationClassify,
  42. use_doc_unwarping=request.useDocUnwarping,
  43. use_textline_orientation=request.useTextlineOrientation,
  44. use_general_ocr=request.useGeneralOcr,
  45. use_seal_recognition=request.useSealRecognition,
  46. use_table_recognition=request.useTableRecognition,
  47. use_formula_recognition=request.useFormulaRecognition,
  48. layout_threshold=request.layoutThreshold,
  49. layout_nms=request.layoutNms,
  50. layout_unclip_ratio=request.layoutUnclipRatio,
  51. layout_merge_bboxes_mode=request.layoutMergeBboxesMode,
  52. text_det_limit_side_len=request.textDetLimitSideLen,
  53. text_det_limit_type=request.textDetLimitType,
  54. text_det_thresh=request.textDetThresh,
  55. text_det_box_thresh=request.textDetBoxThresh,
  56. text_det_unclip_ratio=request.textDetUnclipRatio,
  57. text_rec_score_thresh=request.textRecScoreThresh,
  58. seal_det_limit_side_len=request.sealDetLimitSideLen,
  59. seal_det_limit_type=request.sealDetLimitType,
  60. seal_det_thresh=request.sealDetThresh,
  61. seal_det_box_thresh=request.sealDetBoxThresh,
  62. seal_det_unclip_ratio=request.sealDetUnclipRatio,
  63. seal_rec_score_thresh=request.sealRecScoreThresh,
  64. use_table_cells_ocr_results=request.useTableCellsOcrResults,
  65. use_e2e_wired_table_rec_model=request.useE2eWiredTableRecModel,
  66. use_e2e_wireless_table_rec_model=request.useE2eWirelessTableRecModel,
  67. )
  68. layout_parsing_results: List[Dict[str, Any]] = []
  69. for i, (img, item) in enumerate(zip(images, result)):
  70. pruned_res = common.prune_result(item.json["res"])
  71. md_data = item.markdown
  72. md_text = md_data["markdown_texts"]
  73. md_imgs = await serving_utils.call_async(
  74. common.postprocess_images,
  75. md_data["markdown_images"],
  76. log_id,
  77. filename_template=f"markdown_{i}/{{key}}",
  78. file_storage=ctx.extra["file_storage"],
  79. return_urls=ctx.extra["return_img_urls"],
  80. max_img_size=ctx.extra["max_output_img_size"],
  81. )
  82. md_flags = md_data["page_continuation_flags"]
  83. if ctx.config.visualize:
  84. imgs = {
  85. "input_img": img,
  86. **item.img,
  87. }
  88. imgs = await serving_utils.call_async(
  89. common.postprocess_images,
  90. imgs,
  91. log_id,
  92. filename_template=f"{{key}}_{i}.jpg",
  93. file_storage=ctx.extra["file_storage"],
  94. return_urls=ctx.extra["return_img_urls"],
  95. max_img_size=ctx.extra["max_output_img_size"],
  96. )
  97. else:
  98. imgs = {}
  99. layout_parsing_results.append(
  100. dict(
  101. prunedResult=pruned_res,
  102. markdown=dict(
  103. text=md_text,
  104. images=md_imgs,
  105. isStart=md_flags[0],
  106. isEnd=md_flags[1],
  107. ),
  108. outputImages=(
  109. {k: v for k, v in imgs.items() if k != "input_img"}
  110. if imgs
  111. else None
  112. ),
  113. inputImage=imgs.get("input_img"),
  114. )
  115. )
  116. return ResultResponse[InferResult](
  117. logId=log_id,
  118. result=InferResult(
  119. layoutParsingResults=layout_parsing_results,
  120. dataInfo=data_info,
  121. ),
  122. )
  123. return app