преди 4 месеца · 804c106c47
--- a/paddlex/inference/models/formula_recognition/result.py
+++ b/paddlex/inference/models/formula_recognition/result.py
@@ -256,25 +256,28 @@ def pdf2img(pdf_path: str, img_path: str, is_padding: bool = False):
 
				         np.ndarray: The resulting image as a NumPy array, or None if the PDF is not single-page.
			
 
				     """
			
 
				     pdfDoc = pdfium.PdfDocument(pdf_path)
			
 
				-    if len(pdfDoc) != 1:
			
 
				-        return None
			
 
				-    for page in pdfDoc:
			
 
				-        rotate = int(0)
			
 
				-        zoom = 2
			
 
				-        img = page.render(scale=zoom, rotation=rotate).to_pil()
			
 
				-        img = img.convert("RGB")
			
 
				-        img = np.array(img)
			
 
				-        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
			
 
				-        xywh = crop_white_area(img)
			
 
				-
			
 
				-        if xywh is not None:
			
 
				-            x, y, w, h = xywh
			
 
				-            img = img[y : y + h, x : x + w]
			
 
				-            if is_padding:
			
 
				-                img = cv2.copyMakeBorder(
			
 
				-                    img, 30, 30, 30, 30, cv2.BORDER_CONSTANT, value=(255, 255, 255)
			
 
				-                )
			
 
				-            return img
			
 
				+    try:
			
 
				+        if len(pdfDoc) != 1:
			
 
				+            return None
			
 
				+        for page in pdfDoc:
			
 
				+            rotate = int(0)
			
 
				+            zoom = 2
			
 
				+            img = page.render(scale=zoom, rotation=rotate).to_pil()
			
 
				+            img = img.convert("RGB")
			
 
				+            img = np.array(img)
			
 
				+            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
			
 
				+            xywh = crop_white_area(img)
			
 
				+
			
 
				+            if xywh is not None:
			
 
				+                x, y, w, h = xywh
			
 
				+                img = img[y : y + h, x : x + w]
			
 
				+                if is_padding:
			
 
				+                    img = cv2.copyMakeBorder(
			
 
				+                        img, 30, 30, 30, 30, cv2.BORDER_CONSTANT, value=(255, 255, 255)
			
 
				+                    )
			
 
				+                return img
			
 
				+    finally:
			
 
				+        pdfDoc.close()
			
 
				     return None
			
 
				 
			
 
				 
			
--- a/paddlex/inference/serving/infra/utils.py
+++ b/paddlex/inference/serving/infra/utils.py
@@ -188,22 +188,25 @@ def read_pdf(
 
				     page_info_list: List[PDFPageInfo] = []
			
 
				     with _lock:
			
 
				         doc = pdfium.PdfDocument(bytes_)
			
 
				-        for page in doc:
			
 
				-            if max_num_imgs is not None and len(images) >= max_num_imgs:
			
 
				-                break
			
 
				-            # TODO: Do not always use zoom=2.0
			
 
				-            zoom = 2.0
			
 
				-            deg = 0
			
 
				-            image = page.render(scale=zoom, rotation=deg).to_pil()
			
 
				-            image = image.convert("RGB")
			
 
				-            image = np.array(image)
			
 
				-            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
			
 
				-            images.append(image)
			
 
				-            page_info = PDFPageInfo(
			
 
				-                width=image.shape[1],
			
 
				-                height=image.shape[0],
			
 
				-            )
			
 
				-            page_info_list.append(page_info)
			
 
				+        try:
			
 
				+            for page in doc:
			
 
				+                if max_num_imgs is not None and len(images) >= max_num_imgs:
			
 
				+                    break
			
 
				+                # TODO: Do not always use zoom=2.0
			
 
				+                zoom = 2.0
			
 
				+                deg = 0
			
 
				+                image = page.render(scale=zoom, rotation=deg).to_pil()
			
 
				+                image = image.convert("RGB")
			
 
				+                image = np.array(image)
			
 
				+                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
			
 
				+                images.append(image)
			
 
				+                page_info = PDFPageInfo(
			
 
				+                    width=image.shape[1],
			
 
				+                    height=image.shape[0],
			
 
				+                )
			
 
				+                page_info_list.append(page_info)
			
 
				+        finally:
			
 
				+            doc.close()
			
 
				     pdf_info = PDFInfo(
			
 
				         numPages=len(page_info_list),
			
 
				         pages=page_info_list,
			
--- a/paddlex/inference/utils/io/readers.py
+++ b/paddlex/inference/utils/io/readers.py
@@ -290,12 +290,16 @@ class PDFReaderBackend(_BaseReaderBackend):
 
				         self._scale = zoom
			
 
				 
			
 
				     def read_file(self, in_path):
			
 
				-        for page in pdfium.PdfDocument(in_path):
			
 
				-            image = page.render(scale=self._scale, rotation=self._rotation).to_pil()
			
 
				-            image = image.convert("RGB")
			
 
				-            img_cv = np.array(image)
			
 
				-            img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
			
 
				-            yield img_cv
			
 
				+        doc = pdfium.PdfDocument(in_path)
			
 
				+        try:
			
 
				+            for page in doc:
			
 
				+                image = page.render(scale=self._scale, rotation=self._rotation).to_pil()
			
 
				+                image = image.convert("RGB")
			
 
				+                img_cv = np.array(image)
			
 
				+                img_cv = cv2.cvtColor(img_cv, cv2.COLOR_RGB2BGR)
			
 
				+                yield img_cv
			
 
				+        finally:
			
 
				+            doc.close()
			
 
				 
			
 
				 
			
 
				 class TXTReaderBackend(_BaseReaderBackend):