Kaynağa Gözat

添加模型解析类

kernel.h@qq.com 1 yıl önce
ebeveyn
işleme
0a3afbf053
2 değiştirilmiş dosya ile 51 ekleme ve 2 silme
  1. 2 2
      magic_pdf/cli/magicpdf.py
  2. 49 0
      magic_pdf/model/magic_model.py

+ 2 - 2
magic_pdf/cli/magicpdf.py

@@ -119,7 +119,7 @@ def json_command(json, method):
 
     _do_parse(
         pdf_data,
-        jso["doc_layout_result"],
+        jso,
         method,
         local_image_rw,
         local_md_rw,
@@ -158,7 +158,7 @@ def pdf_command(pdf, model, method):
     )
     _do_parse(
         pdf_data,
-        jso["doc_layout_result"],
+        jso,
         method,
         local_image_rw,
         local_md_rw,

+ 49 - 0
magic_pdf/model/magic_model.py

@@ -0,0 +1,49 @@
+
+
+class MagicModel():
+    """
+    每个函数没有得到元素的时候返回空list
+    
+    """
+    def __fix_axis():
+        # TODO 计算
+        self.__model_list = xx
+        
+    def __init__(model_list:list, page:Page):
+        self.__model_list = model_list
+        self.__fix_axis()
+        self.__page = page
+        
+    def get_imgs(self, page_no:int): # @许瑞
+        
+        return_lst = []
+        img = {
+        "bbox":[x0,y0,x1,y1]
+        }
+        img_caption = {
+        "bbox":[x0,y0,x1,y1],
+        "text":"",
+        }
+        return [{"img":img, "caption":img_caption},]
+        
+    def get_tables(self, page_no:int) ->list: # 3个坐标, caption, table主体,table-note
+        pass # 许瑞
+        
+    def get_equations(self, page_no:int)->list: # 有坐标,也有字
+        return inline_equations, interline_equations  # @凯文
+        
+    def get_discarded(self, page_no:int)->list: # 自研模型,只有坐标
+        pass # @凯文
+        
+    def get_text_blocks(self, page_no:int)->list: # 自研模型搞的,只有坐标,没有字
+        pass # @凯文
+        
+    def get_title_blocks(self, page_no:int)->list: # 自研模型,只有坐标,没字
+        pass # @凯文
+        
+    def get_ocr_text(self, page_no:int)->list: # paddle 搞的,有字也有坐标
+        pass  # @小蒙
+        
+    def get_ocr_spans(self, page_no:int)->list:
+        pass   # @小蒙
+