server.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. import os
  2. import base64
  3. import tempfile
  4. from pathlib import Path
  5. import litserve as ls
  6. from fastapi import HTTPException
  7. from loguru import logger
  8. from mineru.cli.common import do_parse, read_fn
  9. from mineru.utils.config_reader import get_device
  10. from mineru.utils.model_utils import get_vram
  11. from _config_endpoint import config_endpoint
  12. class MinerUAPI(ls.LitAPI):
  13. def __init__(self, output_dir='/tmp'):
  14. super().__init__()
  15. self.output_dir = output_dir
  16. def setup(self, device):
  17. """Setup environment variables exactly like MinerU CLI does"""
  18. logger.info(f"Setting up on device: {device}")
  19. if os.getenv('MINERU_DEVICE_MODE', None) == None:
  20. os.environ['MINERU_DEVICE_MODE'] = device if device != 'auto' else get_device()
  21. device_mode = os.environ['MINERU_DEVICE_MODE']
  22. if os.getenv('MINERU_VIRTUAL_VRAM_SIZE', None) == None:
  23. if device_mode.startswith("cuda") or device_mode.startswith("npu"):
  24. vram = round(get_vram(device_mode))
  25. os.environ['MINERU_VIRTUAL_VRAM_SIZE'] = str(vram)
  26. else:
  27. os.environ['MINERU_VIRTUAL_VRAM_SIZE'] = '1'
  28. logger.info(f"MINERU_VIRTUAL_VRAM_SIZE: {os.environ['MINERU_VIRTUAL_VRAM_SIZE']}")
  29. if os.getenv('MINERU_MODEL_SOURCE', None) in ['huggingface', None]:
  30. config_endpoint()
  31. logger.info(f"MINERU_MODEL_SOURCE: {os.environ['MINERU_MODEL_SOURCE']}")
  32. def decode_request(self, request):
  33. """Decode file and options from request"""
  34. file_b64 = request['file']
  35. options = request.get('options', {})
  36. file_bytes = base64.b64decode(file_b64)
  37. with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as temp:
  38. temp.write(file_bytes)
  39. temp_file = Path(temp.name)
  40. return {
  41. 'input_path': str(temp_file),
  42. 'backend': options.get('backend', 'pipeline'),
  43. 'method': options.get('method', 'auto'),
  44. 'lang': options.get('lang', 'ch'),
  45. 'formula_enable': options.get('formula_enable', True),
  46. 'table_enable': options.get('table_enable', True),
  47. 'start_page_id': options.get('start_page_id', 0),
  48. 'end_page_id': options.get('end_page_id', None),
  49. 'server_url': options.get('server_url', None),
  50. }
  51. def predict(self, inputs):
  52. """Call MinerU's do_parse - same as CLI"""
  53. input_path = inputs['input_path']
  54. output_dir = Path(self.output_dir)
  55. try:
  56. os.makedirs(output_dir, exist_ok=True)
  57. file_name = Path(input_path).stem
  58. pdf_bytes = read_fn(Path(input_path))
  59. do_parse(
  60. output_dir=str(output_dir),
  61. pdf_file_names=[file_name],
  62. pdf_bytes_list=[pdf_bytes],
  63. p_lang_list=[inputs['lang']],
  64. backend=inputs['backend'],
  65. parse_method=inputs['method'],
  66. formula_enable=inputs['formula_enable'],
  67. table_enable=inputs['table_enable'],
  68. server_url=inputs['server_url'],
  69. start_page_id=inputs['start_page_id'],
  70. end_page_id=inputs['end_page_id']
  71. )
  72. return str(output_dir/Path(input_path).stem)
  73. except Exception as e:
  74. logger.error(f"Processing failed: {e}")
  75. raise HTTPException(status_code=500, detail=str(e))
  76. finally:
  77. # Cleanup temp file
  78. if Path(input_path).exists():
  79. Path(input_path).unlink()
  80. def encode_response(self, response):
  81. return {'output_dir': response}
  82. if __name__ == '__main__':
  83. server = ls.LitServer(
  84. MinerUAPI(output_dir='/tmp/mineru_output'),
  85. accelerator='auto',
  86. devices='auto',
  87. workers_per_device=1,
  88. timeout=False
  89. )
  90. logger.info("Starting MinerU server on port 8000")
  91. server.run(port=8000, generate_client_file=False)