generate_kie_prompt.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from typing import Dict
  15. from .base import BaseGeneratePrompt
  16. class GenerateKIEPrompt(BaseGeneratePrompt):
  17. """Generate KIE Prompt"""
  18. entities = [
  19. "text_kie_prompt_v1",
  20. "table_kie_prompt_v1",
  21. "text_kie_prompt_v2",
  22. "table_kie_prompt_v2",
  23. ]
  24. def __init__(self, config: Dict) -> None:
  25. """Initializes the GenerateKIEPrompt instance with the given configuration.
  26. Args:
  27. config (Dict): A dictionary containing configuration settings.
  28. - task_type (str): The type of task to generate a prompt for, in the support entities list.
  29. - task_description (str, optional): A description of the task. Defaults to an empty string.
  30. - output_format (str, optional): The desired output format. Defaults to an empty string.
  31. - rules_str (str, optional): A string representing rules for the task. Defaults to an empty string.
  32. - few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to an empty string.
  33. - few_shot_demo_key_value_list (str, optional): A key-value list for few-shot demos. Defaults to an empty string.
  34. Raises:
  35. ValueError: If the task type is not in the allowed entities for GenerateKIEPrompt.
  36. """
  37. super().__init__()
  38. task_type = config.get("task_type", "")
  39. task_description = config.get("task_description", "")
  40. output_format = config.get("output_format", "")
  41. rules_str = config.get("rules_str", "")
  42. few_shot_demo_text_content = config.get("few_shot_demo_text_content", "")
  43. few_shot_demo_key_value_list = config.get("few_shot_demo_key_value_list", "")
  44. if task_description is None:
  45. task_description = ""
  46. if output_format is None:
  47. output_format = ""
  48. if rules_str is None:
  49. rules_str = ""
  50. if few_shot_demo_text_content is None:
  51. few_shot_demo_text_content = ""
  52. if few_shot_demo_key_value_list is None:
  53. few_shot_demo_key_value_list = ""
  54. if task_type not in self.entities:
  55. raise ValueError(
  56. f"task type must be in {self.entities} of GenerateKIEPrompt."
  57. )
  58. self.task_type = task_type
  59. self.task_description = task_description
  60. self.output_format = output_format
  61. self.rules_str = rules_str
  62. self.few_shot_demo_text_content = few_shot_demo_text_content
  63. self.few_shot_demo_key_value_list = few_shot_demo_key_value_list
  64. def generate_prompt(
  65. self,
  66. text_content: str,
  67. key_list: list,
  68. task_description: str = None,
  69. output_format: str = None,
  70. rules_str: str = None,
  71. few_shot_demo_text_content: str = None,
  72. few_shot_demo_key_value_list: str = None,
  73. ) -> str:
  74. """Generates a prompt based on the given parameters.
  75. Args:
  76. text_content (str): The main text content to be used in the prompt.
  77. key_list (list): A list of keywords for information extraction.
  78. task_description (str, optional): A description of the task. Defaults to None.
  79. output_format (str, optional): The desired output format. Defaults to None.
  80. rules_str (str, optional): A string containing rules or instructions. Defaults to None.
  81. few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to None.
  82. few_shot_demo_key_value_list (str, optional): Key-value list for few-shot demos. Defaults to None.
  83. Returns:
  84. str: The generated prompt.
  85. Raises:
  86. ValueError: If the task_type is not supported.
  87. """
  88. if task_description is None:
  89. task_description = self.task_description
  90. if output_format is None:
  91. output_format = self.output_format
  92. if rules_str is None:
  93. rules_str = self.rules_str
  94. if few_shot_demo_text_content is None:
  95. few_shot_demo_text_content = self.few_shot_demo_text_content
  96. if few_shot_demo_key_value_list is None:
  97. few_shot_demo_key_value_list = self.few_shot_demo_key_value_list
  98. prompt = f"""{task_description}{rules_str}{output_format}{few_shot_demo_text_content}{few_shot_demo_key_value_list}"""
  99. task_type = self.task_type
  100. if task_type == "table_kie_prompt_v1":
  101. prompt += f"""\n结合上面,下面正式开始:\
  102. 表格内容:```{text_content}```\
  103. 关键词列表:[{key_list}]。""".replace(
  104. " ", ""
  105. )
  106. elif task_type == "text_kie_prompt_v1":
  107. prompt += f"""\n结合上面的例子,下面正式开始:\
  108. OCR文字:```{text_content}```\
  109. 关键词列表:[{key_list}]。""".replace(
  110. " ", ""
  111. )
  112. elif task_type == "table_kie_prompt_v2":
  113. prompt += f"""\n结合上面,下面正式开始:\
  114. 表格内容:```{text_content}```\
  115. \n问题列表:{key_list}。""".replace(
  116. " ", ""
  117. )
  118. elif task_type == "text_kie_prompt_v2":
  119. prompt += f"""\n结合上面的例子,下面正式开始:\
  120. OCR文字:```{text_content}```\
  121. \n问题列表:{key_list}。""".replace(
  122. " ", ""
  123. )
  124. return prompt