Sfoglia il codice sorgente

fix(table): handle empty OCR result in rapidtable

- Add check for empty OCR result when using PaddleOCR model
- Assign None to ocr_result if no text is detected, preventing further errors
myhloli 10 mesi fa
parent
commit
12caa7845d

+ 4 - 1
magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py

@@ -27,8 +27,11 @@ class RapidTableModel(object):
         elif self.ocr_model_name == "PaddleOCR":
             bgr_image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
             ocr_result = self.ocr_engine.ocr(bgr_image)[0]
-            ocr_result = [[item[0], item[1][0], item[1][1]] for item in ocr_result if
+            if ocr_result:
+                ocr_result = [[item[0], item[1][0], item[1][1]] for item in ocr_result if
                           len(item) == 2 and isinstance(item[1], tuple)]
+            else:
+                ocr_result = None
         else:
             logger.error("OCR model not supported")
             ocr_result = None