Browse Source

fix(ocr): handle NaN values in recognition scores

- Update predict_rec.py to check for NaN values in recognition results
- Replace NaN scores with 0.0 to ensure stability and consistency
myhloli 7 months ago
parent
commit
c97959e4f5

+ 6 - 0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py

@@ -437,4 +437,10 @@ class TextRecognizer(BaseOCRV20):
                 index += 1
                 pbar.update(current_batch_size)
 
+        # Fix NaN values in recognition results
+        for i in range(len(rec_res)):
+            text, score = rec_res[i]
+            if isinstance(score, float) and math.isnan(score):
+                rec_res[i] = (text, 0.0)
+
         return rec_res, elapse