Sfoglia il codice sorgente

excel合并跨页报表,提取表头

zhch158_admin 11 mesi fa
parent
commit
1f5e22470c
1 ha cambiato i file con 6 aggiunte e 3 eliminazioni
  1. 6 3
      zhch/magic_pdf_parse_main_zhch.py

+ 6 - 3
zhch/magic_pdf_parse_main_zhch.py

@@ -107,14 +107,17 @@ def save_report(
                         # 将html转换为dataframe
                         dataframe = pd.read_html(block['lines'][0]['spans'][0]['html'])[0]
                     elif block['type'] == BlockType.TableCaption:
-                        sheet_name = block['lines'][0]['spans'][0]['content']
+                        title = block['lines'][0]['spans'][0]['content']
+                        # 如果title不为空,且title的最后一个字符是“表” 或者 结尾lowcase是“table”
+                        if title is not None and title != '' and (title[-1] == '表' or title.lower().endswith('table')):
+                            sheet_name = title
                 if sheet_name is None:
                     # 向上查找,类型是Title的para_block
                     for title_block in reversed(paras_of_layout[:paras_of_layout.index(para_block)]):
                         if title_block['type'] == BlockType.Title:
                             title = title_block['lines'][0]['spans'][0]['content'].strip()
-                            # 如果title不为空,且title的最后一个字符是“表”
-                            if title is not None and title != '' and title[-1] == '表':
+                            # 如果title不为空,且title的最后一个字符是“表” 或者 结尾lowcase是“table”
+                            if title is not None and title != '' and (title[-1] == '表' or title.lower().endswith('table')):
                                 sheet_name = title
                                 break
                 if dataframe is None: