markdown_utils.py 944 B

12345678910111213141516171819202122232425262728293031
  1. import re
  2. def escape_special_markdown_char(pymu_blocks):
  3. """
  4. 转义正文里对markdown语法有特殊意义的字符
  5. """
  6. special_chars = ["*", "`", "~", "$"]
  7. for blk in pymu_blocks:
  8. for line in blk['lines']:
  9. for span in line['spans']:
  10. for char in special_chars:
  11. span_text = span['text']
  12. span_type = span.get("_type", None)
  13. if span_type in ['inline-equation', 'interline-equation']:
  14. continue
  15. elif span_text:
  16. span['text'] = span['text'].replace(char, "\\" + char)
  17. return pymu_blocks
  18. def ocr_escape_special_markdown_char(content):
  19. """
  20. 转义正文里对markdown语法有特殊意义的字符
  21. """
  22. special_chars = ["*", "`", "~", "$"]
  23. for char in special_chars:
  24. content = content.replace(char, "\\" + char)
  25. return content