Browse Source

fix(ocr): switch to ch_lite model for Chinese OCR on CPU

- Automatically change to ch_lite model when using CPU for Chinese OCR
- This modification improves performance on CPU devices
myhloli 6 months ago
parent
commit
69cdea908d

+ 5 - 0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py

@@ -53,6 +53,11 @@ class PytorchPaddleOCR(TextSystem):
         args = parser.parse_args(args)
 
         self.lang = kwargs.get('lang', 'ch')
+
+        device = get_device()
+        if device == 'cpu' and self.lang == 'ch':
+            self.lang = 'ch_lite'
+
         if self.lang in latin_lang:
             self.lang = 'latin'
         elif self.lang in arabic_lang: