- Change `bits` to `self._data_bits` for language detection - This fixes the TypeError when opening PDF files
@@ -249,7 +249,7 @@ class ImageDataset(Dataset):
elif lang == 'auto':
from magic_pdf.model.sub_modules.language_detection.utils import \
auto_detect_lang
- self._lang = auto_detect_lang(bits)
+ self._lang = auto_detect_lang(self._data_bits)
logger.info(f'lang: {lang}, detect_lang: {self._lang}')
else:
self._lang = lang