|
|
@@ -1,3 +1,5 @@
|
|
|
+from pathlib import Path
|
|
|
+
|
|
|
from magika import Magika
|
|
|
|
|
|
|
|
|
@@ -10,11 +12,17 @@ def guess_language_by_text(code):
|
|
|
return lang if lang != "unknown" else DEFAULT_LANG
|
|
|
|
|
|
|
|
|
-def guess_suffix_by_bytes(file_bytes) -> str:
|
|
|
+def guess_suffix_by_bytes(file_bytes, file_path=None) -> str:
|
|
|
suffix = magika.identify_bytes(file_bytes).prediction.output.label
|
|
|
+ if file_path and suffix in ["ai"] and Path(file_path).suffix.lower() in [".pdf"]:
|
|
|
+ suffix = "pdf"
|
|
|
return suffix
|
|
|
|
|
|
|
|
|
def guess_suffix_by_path(file_path) -> str:
|
|
|
+ if not isinstance(file_path, Path):
|
|
|
+ file_path = Path(file_path)
|
|
|
suffix = magika.identify_path(file_path).prediction.output.label
|
|
|
+ if suffix in ["ai"] and file_path.suffix.lower() in [".pdf"]:
|
|
|
+ suffix = "pdf"
|
|
|
return suffix
|