generate_kie_prompt.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. # copyright (c) 2024 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from .base import BaseGeneratePrompt
  15. class GenerateKIEPrompt(BaseGeneratePrompt):
  16. """Generate KIE Prompt"""
  17. entities = [
  18. "text_kie_prompt",
  19. "table_kie_prompt"
  20. ]
  21. def __init__(self, config):
  22. super().__init__()
  23. task_type = config.get('task_type', "")
  24. task_description = config.get('task_description', "")
  25. output_format = config.get('output_format', "")
  26. rules_str = config.get('rules_str', "")
  27. few_shot_demo_text_content = config.get('few_shot_demo_text_content', "")
  28. few_shot_demo_key_value_list = config.get('few_shot_demo_key_value_list', "")
  29. if task_description is None:
  30. task_description = ""
  31. if output_format is None:
  32. output_format = ""
  33. if rules_str is None:
  34. rules_str = ""
  35. if few_shot_demo_text_content is None:
  36. few_shot_demo_text_content = ""
  37. if few_shot_demo_key_value_list is None:
  38. few_shot_demo_key_value_list = ""
  39. if task_type not in self.entities:
  40. raise ValueError(f"task type must be in {self.entities} of GenerateKIEPrompt.")
  41. self.task_type = task_type
  42. self.task_description = task_description
  43. self.output_format = output_format
  44. self.rules_str = rules_str
  45. self.few_shot_demo_text_content = few_shot_demo_text_content
  46. self.few_shot_demo_key_value_list = few_shot_demo_key_value_list
  47. def generate_prompt(self, text_content,
  48. key_list,
  49. task_description=None,
  50. output_format=None,
  51. rules_str=None,
  52. few_shot_demo_text_content=None,
  53. few_shot_demo_key_value_list=None):
  54. """
  55. args:
  56. return:
  57. """
  58. if task_description is None:
  59. task_description = self.task_description
  60. if output_format is None:
  61. output_format = self.output_format
  62. if rules_str is None:
  63. rules_str = self.rules_str
  64. if few_shot_demo_text_content is None:
  65. few_shot_demo_text_content = self.few_shot_demo_text_content
  66. if few_shot_demo_key_value_list is None:
  67. few_shot_demo_key_value_list = self.few_shot_demo_key_value_list
  68. prompt = f"""{task_description}{output_format}{rules_str}{few_shot_demo_text_content}{few_shot_demo_key_value_list}"""
  69. if self.task_type == "table_kie_prompt":
  70. prompt += f"""\n结合上面,下面正式开始:\
  71. 表格内容:```{text_content}```\
  72. 关键词列表:{key_list}。""".replace(
  73. " ", "")
  74. elif self.task_type == "text_kie_prompt":
  75. prompt += f"""\n结合上面的例子,下面正式开始:\
  76. OCR文字:```{text_content}```\
  77. 关键词列表:{key_list}。""".replace(
  78. " ", "")
  79. else:
  80. raise ValueError(f"{self.task_type} is currently not supported.")
  81. return prompt