Browse Source

feat: add support for additional language options in OCR processing

myhloli 4 months ago
parent
commit
359110e37d
1 changed files with 2 additions and 1 deletions
  1. 2 1
      mineru/cli/client.py

+ 2 - 1
mineru/cli/client.py

@@ -60,7 +60,8 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
     '-l',
     '-l',
     '--lang',
     '--lang',
     'lang',
     'lang',
-    type=click.Choice(['ch', 'ch_server', 'ch_lite', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']),
+    type=click.Choice(['ch', 'ch_server', 'ch_lite', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka',
+                       'latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']),
     help="""
     help="""
     Input the languages in the pdf (if known) to improve OCR accuracy.  Optional.
     Input the languages in the pdf (if known) to improve OCR accuracy.  Optional.
     Without languages specified, 'ch' will be used by default.
     Without languages specified, 'ch' will be used by default.