Bladeren bron

修复init错误

赵小蒙 1 jaar geleden
bovenliggende
commit
016cde3ece
7 gewijzigde bestanden met toevoegingen van 6 en 7 verwijderingen
  1. 1 1
      .gitignore
  2. 0 1
      demo/ocr_demo.py
  3. 2 2
      magic_pdf/pipeline.py
  4. 1 1
      magic_pdf/pipeline_txt.py
  5. 0 0
      magic_pdf/spark/__init__.py
  6. 1 1
      magic_pdf/spark/s3.py
  7. 1 1
      tests/test_commons.py

+ 1 - 1
.gitignore

@@ -34,4 +34,4 @@ tmp
 ocr_demo
 
 /app/common/__init__.py
-/magic_pdf/spark/__init__.py
+/magic_pdf/config/__init__.py

+ 0 - 1
demo/ocr_demo.py

@@ -12,7 +12,6 @@ from magic_pdf.dict2md.ocr_mkcontent import (
     make_standard_format_with_para
 )
 from magic_pdf.libs.commons import join_path, read_file
-from magic_pdf.pdf_parse_by_ocr import parse_pdf_by_ocr
 
 
 def save_markdown(markdown_text, input_filepath):

+ 2 - 2
magic_pdf/pipeline.py

@@ -19,9 +19,9 @@ from magic_pdf.filter.pdf_meta_scan import pdf_meta_scan
 from loguru import logger
 
 from magic_pdf.pdf_parse_for_train import parse_pdf_for_train
-from magic_pdf.spark.base import exception_handler, get_data_source, get_bookname, get_pdf_bytes
+from magic_pdf.spark import exception_handler, get_data_source
 from magic_pdf.train_utils.convert_to_train_format import convert_to_train_format
-from magic_pdf.spark.s3 import get_s3_config, get_s3_client
+from magic_pdf.spark import get_s3_config, get_s3_client
 
 
 

+ 1 - 1
magic_pdf/pipeline_txt.py

@@ -8,7 +8,7 @@ from loguru import logger
 from magic_pdf.dict2md.mkcontent import mk_mm_markdown, mk_universal_format
 from magic_pdf.libs.commons import join_path
 from magic_pdf.libs.json_compressor import JsonCompressor
-from magic_pdf.spark.base import exception_handler, get_data_source
+from magic_pdf.spark import exception_handler, get_data_source
 
 
 def txt_pdf_to_standard_format(jso: dict, debug_mode=False) -> dict:

+ 0 - 0
magic_pdf/spark/__init__.py


+ 1 - 1
magic_pdf/spark/s3.py

@@ -8,7 +8,7 @@ from typing import List, Union
 try:
     from app.config import s3_buckets, s3_clusters, get_cluster_name, s3_users
 except ImportError:
-    from magic_pdf.spark import s3_buckets, s3_clusters, get_cluster_name, s3_users
+    from magic_pdf.config import s3_buckets, s3_clusters, get_cluster_name, s3_users
 
 __re_s3_path = re.compile("^s3a?://([^/]+)(?:/(.*))?$")
 def get_s3_config(path: Union[str, List[str]], outside=False):

+ 1 - 1
tests/test_commons.py

@@ -3,7 +3,7 @@ import json
 import os
 from magic_pdf.libs.commons import fitz
 
-from magic_pdf.spark.s3 import get_s3_config, get_s3_client
+from magic_pdf.spark import get_s3_config, get_s3_client
 from magic_pdf.libs.commons import join_path, json_dump_path, read_file, parse_bucket_key
 from loguru import logger