generate_kie_prompt.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Dict
  15. from .base import BaseGeneratePrompt
  16. from .....utils import logging
  17. class GenerateKIEPrompt(BaseGeneratePrompt):
  18. """Generate KIE Prompt"""
  19. entities = [
  20. "text_kie_prompt_v1",
  21. "table_kie_prompt_v1",
  22. "text_kie_prompt_v2",
  23. "table_kie_prompt_v2",
  24. ]
  25. def __init__(self, config: Dict) -> None:
  26. """Initializes the GenerateKIEPrompt instance with the given configuration.
  27. Args:
  28. config (Dict): A dictionary containing configuration settings.
  29. - task_type (str): The type of task to generate a prompt for, in the support entities list.
  30. - task_description (str, optional): A description of the task. Defaults to an empty string.
  31. - output_format (str, optional): The desired output format. Defaults to an empty string.
  32. - rules_str (str, optional): A string representing rules for the task. Defaults to an empty string.
  33. - few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to an empty string.
  34. - few_shot_demo_key_value_list (str, optional): A key-value list for few-shot demos. Defaults to an empty string.
  35. Raises:
  36. ValueError: If the task type is not in the allowed entities for GenerateKIEPrompt.
  37. """
  38. super().__init__()
  39. task_type = config.get("task_type", "")
  40. task_description = config.get("task_description", "")
  41. output_format = config.get("output_format", "")
  42. rules_str = config.get("rules_str", "")
  43. few_shot_demo_text_content = config.get("few_shot_demo_text_content", "")
  44. few_shot_demo_key_value_list = config.get("few_shot_demo_key_value_list", "")
  45. if task_description is None:
  46. task_description = ""
  47. if output_format is None:
  48. output_format = ""
  49. if rules_str is None:
  50. rules_str = ""
  51. if few_shot_demo_text_content is None:
  52. few_shot_demo_text_content = ""
  53. if few_shot_demo_key_value_list is None:
  54. few_shot_demo_key_value_list = ""
  55. if task_type not in self.entities:
  56. raise ValueError(
  57. f"task type must be in {self.entities} of GenerateKIEPrompt."
  58. )
  59. self.task_type = task_type
  60. self.task_description = task_description
  61. self.output_format = output_format
  62. self.rules_str = rules_str
  63. self.few_shot_demo_text_content = few_shot_demo_text_content
  64. self.few_shot_demo_key_value_list = few_shot_demo_key_value_list
  65. def generate_prompt(
  66. self,
  67. text_content: str,
  68. key_list: list,
  69. task_description: str = None,
  70. output_format: str = None,
  71. rules_str: str = None,
  72. few_shot_demo_text_content: str = None,
  73. few_shot_demo_key_value_list: str = None,
  74. ) -> str:
  75. """Generates a prompt based on the given parameters.
  76. Args:
  77. text_content (str): The main text content to be used in the prompt.
  78. key_list (list): A list of keywords for information extraction.
  79. task_description (str, optional): A description of the task. Defaults to None.
  80. output_format (str, optional): The desired output format. Defaults to None.
  81. rules_str (str, optional): A string containing rules or instructions. Defaults to None.
  82. few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to None.
  83. few_shot_demo_key_value_list (str, optional): Key-value list for few-shot demos. Defaults to None.
  84. Returns:
  85. str: The generated prompt.
  86. Raises:
  87. ValueError: If the task_type is not supported.
  88. """
  89. if task_description is None:
  90. task_description = self.task_description
  91. if output_format is None:
  92. output_format = self.output_format
  93. if rules_str is None:
  94. rules_str = self.rules_str
  95. if few_shot_demo_text_content is None:
  96. few_shot_demo_text_content = self.few_shot_demo_text_content
  97. if few_shot_demo_key_value_list is None:
  98. few_shot_demo_key_value_list = self.few_shot_demo_key_value_list
  99. prompt = f"""{task_description}{rules_str}{output_format}{few_shot_demo_text_content}{few_shot_demo_key_value_list}"""
  100. task_type = self.task_type
  101. if task_type == "table_kie_prompt_v1":
  102. prompt += f"""\n结合上面,下面正式开始:\
  103. 表格内容:```{text_content}```\
  104. 关键词列表:[{key_list}]。""".replace(
  105. " ", ""
  106. )
  107. elif task_type == "text_kie_prompt_v1":
  108. prompt += f"""\n结合上面的例子,下面正式开始:\
  109. OCR文字:```{text_content}```\
  110. 关键词列表:[{key_list}]。""".replace(
  111. " ", ""
  112. )
  113. elif task_type == "table_kie_prompt_v2":
  114. prompt += f"""\n结合上面,下面正式开始:\
  115. 表格内容:```{text_content}```\
  116. \n问题列表:{key_list}。""".replace(
  117. " ", ""
  118. )
  119. elif task_type == "text_kie_prompt_v2":
  120. prompt += f"""\n结合上面的例子,下面正式开始:\
  121. OCR文字:```{text_content}```\
  122. \n问题列表:{key_list}。""".replace(
  123. " ", ""
  124. )
  125. return prompt