ocr.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Any, Dict, List
  15. from .....utils.deps import function_requires_deps, is_dep_available
  16. from ...infra import utils as serving_utils
  17. from ...infra.config import AppConfig
  18. from ...infra.models import ResultResponse
  19. from ...schemas.ocr import INFER_ENDPOINT, InferRequest, InferResult
  20. from .._app import create_app, primary_operation
  21. from ._common import common
  22. from ._common import ocr as ocr_common
  23. if is_dep_available("fastapi"):
  24. from fastapi import FastAPI
  25. @function_requires_deps("fastapi")
  26. def create_pipeline_app(pipeline: Any, app_config: AppConfig) -> "FastAPI":
  27. app, ctx = create_app(
  28. pipeline=pipeline, app_config=app_config, app_aiohttp_session=True
  29. )
  30. ocr_common.update_app_context(ctx)
  31. @primary_operation(
  32. app,
  33. INFER_ENDPOINT,
  34. "infer",
  35. )
  36. async def _infer(request: InferRequest) -> ResultResponse[InferResult]:
  37. pipeline = ctx.pipeline
  38. log_id = serving_utils.generate_log_id()
  39. images, data_info = await ocr_common.get_images(request, ctx)
  40. result = await pipeline.infer(
  41. images,
  42. use_doc_orientation_classify=request.useDocOrientationClassify,
  43. use_doc_unwarping=request.useDocUnwarping,
  44. use_textline_orientation=request.useTextlineOrientation,
  45. text_det_limit_side_len=request.textDetLimitSideLen,
  46. text_det_limit_type=request.textDetLimitType,
  47. text_det_thresh=request.textDetThresh,
  48. text_det_box_thresh=request.textDetBoxThresh,
  49. text_det_unclip_ratio=request.textDetUnclipRatio,
  50. text_rec_score_thresh=request.textRecScoreThresh,
  51. )
  52. ocr_results: List[Dict[str, Any]] = []
  53. for i, (img, item) in enumerate(zip(images, result)):
  54. pruned_res = common.prune_result(item.json["res"])
  55. if ctx.config.visualize:
  56. output_imgs = item.img
  57. imgs = {
  58. "input_img": img,
  59. "ocr_img": output_imgs["ocr_res_img"],
  60. }
  61. if "preprocessed_img" in output_imgs:
  62. imgs["doc_preprocessing_img"] = output_imgs["preprocessed_img"]
  63. imgs = await serving_utils.call_async(
  64. common.postprocess_images,
  65. imgs,
  66. log_id,
  67. filename_template=f"{{key}}_{i}.jpg",
  68. file_storage=ctx.extra["file_storage"],
  69. return_urls=ctx.extra["return_img_urls"],
  70. max_img_size=ctx.extra["max_output_img_size"],
  71. )
  72. else:
  73. imgs = {}
  74. ocr_results.append(
  75. dict(
  76. prunedResult=pruned_res,
  77. ocrImage=imgs.get("ocr_img"),
  78. docPreprocessingImage=imgs.get("doc_preprocessing_img"),
  79. inputImage=imgs.get("input_img"),
  80. )
  81. )
  82. return ResultResponse[InferResult](
  83. logId=log_id,
  84. result=InferResult(
  85. ocrResults=ocr_results,
  86. dataInfo=data_info,
  87. ),
  88. )
  89. return app