Selaa lähdekoodia

refactor(ocr): comment out det_count update and update OCR models

- Comment out the line that updates det_count in batch_analyze.py
- Add a new OCR model configuration for Chinese (ch_lite) in models_config.yml- Update the Chinese OCR model configuration to use a different recognition model
myhloli 7 kuukautta sitten
vanhempi
commit
f8323ae07c

+ 2 - 1
magic_pdf/model/batch_analyze.py

@@ -143,7 +143,8 @@ class BatchAnalyze:
                 if ocr_res:
                     ocr_result_list = get_ocr_result_list(ocr_res, useful_list, ocr_res_list_dict['ocr_enable'], new_image, _lang)
                     ocr_res_list_dict['layout_res'].extend(ocr_result_list)
-            det_count += len(ocr_res_list_dict['ocr_res_list'])
+
+            # det_count += len(ocr_res_list_dict['ocr_res_list'])
         # logger.info(f'ocr-det time: {round(time.time()-det_start, 2)}, image num: {det_count}')
 
 

+ 5 - 1
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml

@@ -1,8 +1,12 @@
 lang:
-  ch:
+  ch_lite:
     det: ch_PP-OCRv3_det_infer.pth
     rec: ch_PP-OCRv4_rec_infer.pth
     dict: ppocr_keys_v1.txt
+  ch:
+    det: ch_PP-OCRv3_det_infer.pth
+    rec: ch_PP-OCRv4_rec_server_infer.pth
+    dict: ppocr_keys_v1.txt
   en:
     det: en_PP-OCRv3_det_infer.pth
     rec: en_PP-OCRv4_rec_infer.pth