guess_suffix_or_lang.py 540 B

1234567891011121314151617181920
  1. from magika import Magika
  2. DEFAULT_LANG = "txt"
  3. magika = Magika()
  4. def guess_language_by_text(code):
  5. codebytes = code.encode(encoding="utf-8")
  6. lang = magika.identify_bytes(codebytes).prediction.output.label
  7. return lang if lang != "unknown" else DEFAULT_LANG
  8. def guess_suffix_by_bytes(file_bytes) -> str:
  9. suffix = magika.identify_bytes(file_bytes).prediction.output.label
  10. return suffix
  11. def guess_suffix_by_path(file_path) -> str:
  12. suffix = magika.identify_path(file_path).prediction.output.label
  13. return suffix