generate_kie_prompt.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from .base import BaseGeneratePrompt
  15. class GenerateKIEPrompt(BaseGeneratePrompt):
  16. """Generate KIE Prompt"""
  17. entities = ["text_kie_prompt", "table_kie_prompt"]
  18. def __init__(self, config):
  19. super().__init__()
  20. task_type = config.get("task_type", "")
  21. task_description = config.get("task_description", "")
  22. output_format = config.get("output_format", "")
  23. rules_str = config.get("rules_str", "")
  24. few_shot_demo_text_content = config.get("few_shot_demo_text_content", "")
  25. few_shot_demo_key_value_list = config.get("few_shot_demo_key_value_list", "")
  26. if task_description is None:
  27. task_description = ""
  28. if output_format is None:
  29. output_format = ""
  30. if rules_str is None:
  31. rules_str = ""
  32. if few_shot_demo_text_content is None:
  33. few_shot_demo_text_content = ""
  34. if few_shot_demo_key_value_list is None:
  35. few_shot_demo_key_value_list = ""
  36. if task_type not in self.entities:
  37. raise ValueError(
  38. f"task type must be in {self.entities} of GenerateKIEPrompt."
  39. )
  40. self.task_type = task_type
  41. self.task_description = task_description
  42. self.output_format = output_format
  43. self.rules_str = rules_str
  44. self.few_shot_demo_text_content = few_shot_demo_text_content
  45. self.few_shot_demo_key_value_list = few_shot_demo_key_value_list
  46. def generate_prompt(
  47. self,
  48. text_content,
  49. key_list,
  50. task_description=None,
  51. output_format=None,
  52. rules_str=None,
  53. few_shot_demo_text_content=None,
  54. few_shot_demo_key_value_list=None,
  55. ):
  56. """
  57. args:
  58. return:
  59. """
  60. if task_description is None:
  61. task_description = self.task_description
  62. if output_format is None:
  63. output_format = self.output_format
  64. if rules_str is None:
  65. rules_str = self.rules_str
  66. if few_shot_demo_text_content is None:
  67. few_shot_demo_text_content = self.few_shot_demo_text_content
  68. if few_shot_demo_key_value_list is None:
  69. few_shot_demo_key_value_list = self.few_shot_demo_key_value_list
  70. prompt = f"""{task_description}{output_format}{rules_str}{few_shot_demo_text_content}{few_shot_demo_key_value_list}"""
  71. if self.task_type == "table_kie_prompt":
  72. prompt += f"""\n结合上面,下面正式开始:\
  73. 表格内容:```{text_content}```\
  74. 关键词列表:{key_list}。""".replace(
  75. " ", ""
  76. )
  77. elif self.task_type == "text_kie_prompt":
  78. prompt += f"""\n结合上面的例子,下面正式开始:\
  79. OCR文字:```{text_content}```\
  80. 关键词列表:{key_list}。""".replace(
  81. " ", ""
  82. )
  83. else:
  84. raise ValueError(f"{self.task_type} is currently not supported.")
  85. return prompt