|
@@ -159,9 +159,12 @@ devanagari_lang = [
|
|
|
'sa', 'bgc'
|
|
'sa', 'bgc'
|
|
|
]
|
|
]
|
|
|
other_lang = ['ch', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']
|
|
other_lang = ['ch', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']
|
|
|
|
|
+add_lang = ['latin', 'arabic', 'cyrillic', 'devanagari']
|
|
|
|
|
|
|
|
-all_lang = ['', 'auto']
|
|
|
|
|
-all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
|
|
|
|
|
|
|
+# all_lang = ['', 'auto']
|
|
|
|
|
+all_lang = []
|
|
|
|
|
+# all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
|
|
|
|
|
+all_lang.extend([*other_lang, *add_lang])
|
|
|
|
|
|
|
|
|
|
|
|
|
def to_pdf(file_path):
|
|
def to_pdf(file_path):
|
|
@@ -192,8 +195,8 @@ if __name__ == '__main__':
|
|
|
file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'])
|
|
file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'])
|
|
|
max_pages = gr.Slider(1, 20, 10, step=1, label='Max convert pages')
|
|
max_pages = gr.Slider(1, 20, 10, step=1, label='Max convert pages')
|
|
|
with gr.Row():
|
|
with gr.Row():
|
|
|
- layout_mode = gr.Dropdown(['layoutlmv3', 'doclayout_yolo'], label='Layout model', value='doclayout_yolo')
|
|
|
|
|
- language = gr.Dropdown(all_lang, label='Language', value='auto')
|
|
|
|
|
|
|
+ layout_mode = gr.Dropdown(['doclayout_yolo'], label='Layout model', value='doclayout_yolo')
|
|
|
|
|
+ language = gr.Dropdown(all_lang, label='Language', value='ch')
|
|
|
with gr.Row():
|
|
with gr.Row():
|
|
|
formula_enable = gr.Checkbox(label='Enable formula recognition', value=True)
|
|
formula_enable = gr.Checkbox(label='Enable formula recognition', value=True)
|
|
|
is_ocr = gr.Checkbox(label='Force enable OCR', value=False)
|
|
is_ocr = gr.Checkbox(label='Force enable OCR', value=False)
|