|
@@ -4,7 +4,6 @@ import enum
|
|
|
from pydantic import BaseModel, Field
|
|
from pydantic import BaseModel, Field
|
|
|
from PIL import Image
|
|
from PIL import Image
|
|
|
|
|
|
|
|
-
|
|
|
|
|
class SupportedPdfParseMethod(enum.Enum):
|
|
class SupportedPdfParseMethod(enum.Enum):
|
|
|
OCR = 'ocr'
|
|
OCR = 'ocr'
|
|
|
TXT = 'txt'
|
|
TXT = 'txt'
|
|
@@ -17,7 +16,7 @@ class PageInfo(BaseModel):
|
|
|
h: float = Field(description='the height of page')
|
|
h: float = Field(description='the height of page')
|
|
|
|
|
|
|
|
|
|
|
|
|
-def fitz_doc_to_image(doc, target_dpi=200, origin_dpi=None) -> dict:
|
|
|
|
|
|
|
+def fitz_doc_to_image(doc, target_dpi=200, origin_dpi=None) -> Image.Image:
|
|
|
"""Convert fitz.Document to image, Then convert the image to numpy array.
|
|
"""Convert fitz.Document to image, Then convert the image to numpy array.
|
|
|
|
|
|
|
|
Args:
|
|
Args:
|