ソースを参照

feat: add ppocr el, th env5

Sidney233 2 ヶ月 前
コミット
3da1ed8443

+ 1 - 1
mineru/cli/gradio_app.py

@@ -145,7 +145,7 @@ devanagari_lang = [
         'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',  # noqa: E126
         'sa', 'bgc'
 ]
-other_lang = ['ch', 'ch_lite', 'ch_server', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']
+other_lang = ['ch', 'ch_lite', 'ch_server', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka', "el", "th"]
 add_lang = ['latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']
 
 # all_lang = ['', 'auto']

+ 76 - 0
mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml

@@ -568,4 +568,80 @@ eslav_PP-OCRv5_rec_infer:
           nrtr_dim: 384
           max_text_length: 25
 
+el_PP-OCRv5_rec_infer:
+  model_type: rec
+  algorithm: SVTR_LCNet
+  Transform:
+  Backbone:
+    name: PPLCNetV3
+    scale: 0.95
+  Head:
+    name: MultiHead
+    out_channels_list:
+      CTCLabelDecode: 356
+    head_list:
+      - CTCHead:
+          Neck:
+            name: svtr
+            dims: 120
+            depth: 2
+            hidden_dims: 120
+            kernel_size: [ 1, 3 ]
+            use_guide: True
+          Head:
+            fc_decay: 0.00001
+      - NRTRHead:
+          nrtr_dim: 384
+          max_text_length: 25
 
+th_PP-OCRv5_rec_infer:
+  model_type: rec
+  algorithm: SVTR_LCNet
+  Transform:
+  Backbone:
+    name: PPLCNetV3
+    scale: 0.95
+  Head:
+    name: MultiHead
+    out_channels_list:
+      CTCLabelDecode: 526
+    head_list:
+      - CTCHead:
+          Neck:
+            name: svtr
+            dims: 120
+            depth: 2
+            hidden_dims: 120
+            kernel_size: [ 1, 3 ]
+            use_guide: True
+          Head:
+            fc_decay: 0.00001
+      - NRTRHead:
+          nrtr_dim: 384
+          max_text_length: 25
+
+en_PP-OCRv5_rec_infer:
+  model_type: rec
+  algorithm: SVTR_LCNet
+  Transform:
+  Backbone:
+    name: PPLCNetV3
+    scale: 0.95
+  Head:
+    name: MultiHead
+    out_channels_list:
+      CTCLabelDecode: 438
+    head_list:
+      - CTCHead:
+          Neck:
+            name: svtr
+            dims: 120
+            depth: 2
+            hidden_dims: 120
+            kernel_size: [ 1, 3 ]
+            use_guide: True
+          Head:
+            fc_decay: 0.00001
+      - NRTRHead:
+          nrtr_dim: 384
+          max_text_length: 25

+ 354 - 0
mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_el_dict.txt

@@ -0,0 +1,354 @@
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+[
+]
+_
+`
+^
+~
+™
+¿
+‖
+‗
+‘
+’
+‚
+‛
+“
+”
+„
+†
+‡
+…
+‰
+′
+″
+‴
+‵
+‶
+‷
+‹
+›
+‼
+‽
+‾
+€
+₤
+₡
+₽
+₴
+₹
+₿
+∓
+≈
+≠
+≡
+≤
+≥
+∑
+∏
+∫
+∬
+∭
+∮
+∯
+∰
+√
+∛
+∜
+∝
+∞
+∂
+∇
+∴
+∵
+∷
+∠
+∡
+∢
+∟
+∦
+∩
+∪
+∗
+∖
+∥
+∧
+∨
+⊂
+⊃
+⊥
+∈
+∋
+∉
+∅
+↑
+→
+↓
+↔
+↕
+←
+⇒
+⇐
+⇔
+∀
+∃
+∄
+Ⅰ
+Ⅱ
+Ⅲ
+Ⅳ
+Ⅴ
+Ⅵ
+Ⅶ
+Ⅷ
+Ⅸ
+Ⅹ
+Ⅺ
+Ⅻ
+ⅰ
+ⅱ
+ⅲ
+ⅳ
+ⅴ
+ⅵ
+ⅶ
+ⅷ
+ⅸ
+ⅹ
+ⅺ
+ⅻ
+①
+②
+③
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+❶
+❷
+❸
+❹
+❺
+❻
+❼
+❽
+❾
+❿
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+ο

+ 436 - 0
mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_en_dict.txt

@@ -0,0 +1,436 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+:
+;
+<
+=
+>
+?
+@
+[
+\
+]
+_
+`
+{
+|
+}
+^
+~
+℉
+№
+Ω
+℮
+™
+∆
+✓
+✔
+✗
+✘
+✕
+☑
+☒
+●
+▪
+▫
+◼
+▶
+◀
+⬆
+¿
+‐
+‑
+‒
+—
+―
+‖
+‗
+‘
+’
+‚
+‛
+“
+”
+„
+‟
+†
+‡
+‣
+․
+…
+‧
+‰
+‴
+‵
+‶
+‷
+‸
+‹
+›
+※
+‼
+‽
+‾
+−
+₤
+₡
+₹
+₽
+₴
+₿
+€
+Ⅰ
+Ⅱ
+Ⅲ
+Ⅳ
+Ⅴ
+Ⅵ
+Ⅶ
+Ⅷ
+Ⅸ
+Ⅹ
+Ⅺ
+Ⅻ
+ⅰ
+ⅱ
+ⅲ
+ⅳ
+ⅴ
+ⅵ
+ⅶ
+ⅷ
+ⅸ
+ⅹ
+ⅺ
+ⅻ
+➀
+➁
+➂
+➃
+➄
+➅
+➆
+➇
+➈
+➉
+➊
+➋
+➌
+➍
+➎
+➏
+➐
+➑
+➒
+➓
+❶
+❷
+❸
+❹
+❺
+❻
+❼
+❽
+❾
+❿
+①
+②
+③
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+↑
+→
+↓
+↕
+←
+↔
+⇒
+⇐
+⇔
+∀
+∃
+∄
+∴
+∵
+∝
+∞
+∩
+∪
+∂
+∫
+∬
+∭
+∮
+∯
+∰
+∑
+∏
+√
+∛
+∜
+∱
+∲
+∳
+∶
+∷
+∼
+∖
+∗
+≈
+≠
+≡
+≤
+≥
+⊂
+⊃
+⊥
+⊾
+⊿
+□
+∥
+∋
+′
+″
+ÿ
+ο
+Å
+ℏ
+⌀
+⍺
+⍵
+𝑢
+𝜓
+०
+‥
+︽
+﹥
+•
+∕
+∙
+⋅
+∓
+∟
+∠
+∡
+∢
+℧
+☺

+ 524 - 0
mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_th_dict.txt

@@ -0,0 +1,524 @@
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+[
+]
+_
+`
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+‥
+{
+}
+\
+|
+@
+^
+~
+∕
+∙
+⋅
+∓
+∩
+∪
+□
+←
+↔
+⇒
+⇐
+⇔
+∀
+∃
+∄
+∴
+∵
+∝
+∞
+⊥
+∟
+∠
+∡
+∢
+′
+″
+∥
+⊾
+⊿
+∂
+∫
+∬
+∭
+∮
+∯
+∰
+∑
+∏
+√
+∛
+∜
+∱
+∲
+∳
+∶
+∷
+∼
+℉
+℧
+Å
+⌀
+ℏ
+⅀
+⍺
+⍵
+€
+₿
+Ⅰ
+Ⅱ
+Ⅲ
+Ⅳ
+Ⅴ
+Ⅵ
+Ⅶ
+Ⅷ
+Ⅸ
+Ⅹ
+Ⅺ
+Ⅻ
+ⅰ
+ⅱ
+ⅲ
+ⅳ
+ⅴ
+ⅵ
+ⅶ
+ⅷ
+ⅸ
+ⅹ
+ⅺ
+ⅻ
+➀
+➁
+➂
+➃
+➄
+➅
+➆
+➇
+➈
+➉
+➊
+➋
+➌
+➍
+➎
+➏
+➐
+➑
+➒
+➓
+❶
+❷
+❸
+❹
+❺
+❻
+❼
+❽
+❾
+❿
+①
+②
+③
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+●
+▶
+𝑢
+︽
+–
+﹥
+𝜓
+•
+∋
+०
+⬆
+Ạ
+◀
+
+▫
+︾
+ÿ
+¿
+‐
+‑
+‒
+—
+―
+‖
+‗
+‘
+’
+‚
+‛
+“
+”
+„
+‟
+†
+‡
+‣
+․
+…
+‧
+‰
+‴
+‵
+‶
+‷
+‸
+‹
+›
+※
+‼
+‽
+‾
+₤
+₡
+₹
+−
+∖
+∗
+≈
+≠
+≡
+≤
+≥
+⊂
+⊃
+↑
+→
+↓
+↕
+™
+Ω
+℮
+∆
+✓
+✗
+✘
+▪
+◼
+✔
+✕
+☑
+☒
+№
+₽
+₴
+ο
+ก
+ข
+ค
+ฅ
+ฆ
+ง
+จ
+ฉ
+ช
+ซ
+ฌ
+ญ
+ฎ
+ฏ
+ฐ
+ฑ
+ฒ
+ณ
+ด
+ต
+ถ
+ท
+ธ
+น
+บ
+ป
+ผ
+ฝ
+พ
+ฟ
+ภ
+ม
+ย
+ร
+ฤ
+ล
+ฦ
+ว
+ศ
+ษ
+ส
+ห
+ฬ
+อ
+ฮ
+ฯ
+ะ
+ั
+า
+ำ
+ิ
+ี
+ึ
+ื
+ุ
+ู
+ฺ
+฿
+เ
+แ
+โ
+ใ
+ไ
+ๅ
+ๆ
+็
+่
+้
+๊
+๋
+์
+ํ
+๐
+๑
+๒
+๓
+๔
+๕
+๖
+๗
+๘
+๙
+๚

+ 14 - 2
mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml

@@ -19,7 +19,7 @@ lang:
     det: ch_PP-OCRv5_det_infer.pth
     rec: ch_PP-OCRv4_rec_server_doc_infer.pth
     dict: ppocrv4_doc_dict.txt
-  en:
+  en_v4:
     det: en_PP-OCRv3_det_infer.pth
     rec: en_PP-OCRv4_rec_infer.pth
     dict: en_dict.txt
@@ -66,4 +66,16 @@ lang:
   east_slavic:
     det: ch_PP-OCRv5_det_infer.pth
     rec: eslav_PP-OCRv5_rec_infer.pth
-    dict: ppocrv5_eslav_dict.txt
+    dict: ppocrv5_eslav_dict.txt
+  el:
+    det: ch_PP-OCRv5_det_infer.pth
+    rec: el_PP-OCRv5_rec_infer.pth
+    dict: ppocrv5_el_dict.txt
+  th:
+    det: ch_PP-OCRv5_det_infer.pth
+    rec: th_PP-OCRv5_rec_infer.pth
+    dict: ppocrv5_th_dict.txt
+  en:
+    det: ch_PP-OCRv5_det_infer.pth
+    rec: en_PP-OCRv5_rec_infer.pth
+    dict: ppocrv5_en_dict.txt