fix: add Noto fonts installation for Chinese character support in Dockerfile
@@ -1,8 +1,13 @@
# Use the official sglang image
FROM lmsysorg/sglang:v0.4.8.post1-cu126
-# Install libgl for opencv support
-RUN apt-get update && apt-get install -y libgl1 && apt-get clean && rm -rf /var/lib/apt/lists/*
+# Install libgl for opencv support & Noto fonts for Chinese characters
+RUN apt-get update && \
+ apt-get install -y fonts-noto-core fonts-noto-cjk && \
+ apt-get install -y libgl1 && \
+ apt-get clean && \
+ fc-cache -fv && \
+ rm -rf /var/lib/apt/lists/*
# Install mineru latest
RUN python3 -m pip install -U 'mineru[core]' -i https://mirrors.aliyun.com/pypi/simple --break-system-packages
@@ -2,7 +2,12 @@
# Install libgl for opencv support
RUN python3 -m pip install -U 'mineru[core]' --break-system-packages
@@ -89,6 +89,7 @@ def page_model_info_to_page_info(page_model_info, image_dict, page, image_writer
"type": ContentType.INTERLINE_EQUATION,
'score': block['score'],
"bbox": block['bbox'],
+ "content": "",
})
all_bboxes, all_discarded_blocks, footnote_blocks = prepare_block_bboxes(