generate_kie_prompt.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from .base import BaseGeneratePrompt
  15. from typing import Dict
  16. class GenerateKIEPrompt(BaseGeneratePrompt):
  17. """Generate KIE Prompt"""
  18. entities = ["text_kie_prompt", "table_kie_prompt"]
  19. def __init__(self, config: Dict) -> None:
  20. """Initializes the GenerateKIEPrompt instance with the given configuration.
  21. Args:
  22. config (Dict): A dictionary containing configuration settings.
  23. - task_type (str): The type of task to generate a prompt for, in the support entities list.
  24. - task_description (str, optional): A description of the task. Defaults to an empty string.
  25. - output_format (str, optional): The desired output format. Defaults to an empty string.
  26. - rules_str (str, optional): A string representing rules for the task. Defaults to an empty string.
  27. - few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to an empty string.
  28. - few_shot_demo_key_value_list (str, optional): A key-value list for few-shot demos. Defaults to an empty string.
  29. Raises:
  30. ValueError: If the task type is not in the allowed entities for GenerateKIEPrompt.
  31. """
  32. super().__init__()
  33. task_type = config.get("task_type", "")
  34. task_description = config.get("task_description", "")
  35. output_format = config.get("output_format", "")
  36. rules_str = config.get("rules_str", "")
  37. few_shot_demo_text_content = config.get("few_shot_demo_text_content", "")
  38. few_shot_demo_key_value_list = config.get("few_shot_demo_key_value_list", "")
  39. if task_description is None:
  40. task_description = ""
  41. if output_format is None:
  42. output_format = ""
  43. if rules_str is None:
  44. rules_str = ""
  45. if few_shot_demo_text_content is None:
  46. few_shot_demo_text_content = ""
  47. if few_shot_demo_key_value_list is None:
  48. few_shot_demo_key_value_list = ""
  49. if task_type not in self.entities:
  50. raise ValueError(
  51. f"task type must be in {self.entities} of GenerateKIEPrompt."
  52. )
  53. self.task_type = task_type
  54. self.task_description = task_description
  55. self.output_format = output_format
  56. self.rules_str = rules_str
  57. self.few_shot_demo_text_content = few_shot_demo_text_content
  58. self.few_shot_demo_key_value_list = few_shot_demo_key_value_list
  59. def generate_prompt(
  60. self,
  61. text_content: str,
  62. key_list: list,
  63. task_description: str = None,
  64. output_format: str = None,
  65. rules_str: str = None,
  66. few_shot_demo_text_content: str = None,
  67. few_shot_demo_key_value_list: str = None,
  68. ) -> str:
  69. """Generates a prompt based on the given parameters.
  70. Args:
  71. text_content (str): The main text content to be used in the prompt.
  72. key_list (list): A list of keywords for information extraction.
  73. task_description (str, optional): A description of the task. Defaults to None.
  74. output_format (str, optional): The desired output format. Defaults to None.
  75. rules_str (str, optional): A string containing rules or instructions. Defaults to None.
  76. few_shot_demo_text_content (str, optional): Text content for few-shot demos. Defaults to None.
  77. few_shot_demo_key_value_list (str, optional): Key-value list for few-shot demos. Defaults to None.
  78. Returns:
  79. str: The generated prompt.
  80. Raises:
  81. ValueError: If the task_type is not supported.
  82. """
  83. if task_description is None:
  84. task_description = self.task_description
  85. if output_format is None:
  86. output_format = self.output_format
  87. if rules_str is None:
  88. rules_str = self.rules_str
  89. if few_shot_demo_text_content is None:
  90. few_shot_demo_text_content = self.few_shot_demo_text_content
  91. if few_shot_demo_key_value_list is None:
  92. few_shot_demo_key_value_list = self.few_shot_demo_key_value_list
  93. prompt = f"""{task_description}{rules_str}{output_format}{few_shot_demo_text_content}{few_shot_demo_key_value_list}"""
  94. if self.task_type == "table_kie_prompt":
  95. # prompt += f"""\n结合上面,下面正式开始:\
  96. # 表格内容:```{text_content}```\
  97. # \n问题列表:{key_list}。""".replace(
  98. # " ", ""
  99. # )
  100. prompt += f"""\n结合上面,下面正式开始:\
  101. 表格内容:```{text_content}```\
  102. \n关键词列表:{key_list}。""".replace(
  103. " ", ""
  104. )
  105. elif self.task_type == "text_kie_prompt":
  106. # prompt += f"""\n结合上面的例子,下面正式开始:\
  107. # OCR文字:```{text_content}```\
  108. # \n问题列表:{key_list}。""".replace(
  109. # " ", ""
  110. # )
  111. prompt += f"""\n结合上面的例子,下面正式开始:\
  112. OCR文字:```{text_content}```\
  113. \n关键词列表:{key_list}。""".replace(
  114. " ", ""
  115. )
  116. else:
  117. raise ValueError(f"{self.task_type} is currently not supported.")
  118. return prompt