client.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import base64
  2. import os
  3. from loguru import logger
  4. import asyncio
  5. import aiohttp
  6. async def mineru_parse_async(session, file_path, url='http://127.0.0.1:8000/predict', **options):
  7. """
  8. Asynchronous version of the parse function.
  9. """
  10. try:
  11. # Asynchronously read and encode the file
  12. with open(file_path, 'rb') as f:
  13. file_b64 = base64.b64encode(f.read()).decode('utf-8')
  14. payload = {
  15. 'file': file_b64,
  16. 'options': options
  17. }
  18. # Use the aiohttp session to send the request
  19. async with session.post(url, json=payload) as response:
  20. if response.status == 200:
  21. result = await response.json()
  22. logger.info(f"✅ Processed: {file_path} -> {result.get('output_dir', 'N/A')}")
  23. return result
  24. else:
  25. error_text = await response.text()
  26. logger.error(f"❌ Server error for {file_path}: {error_text}")
  27. return {'error': error_text}
  28. except Exception as e:
  29. logger.error(f"❌ Failed to process {file_path}: {e}")
  30. return {'error': str(e)}
  31. async def main():
  32. """
  33. Main function to run all parsing tasks concurrently.
  34. """
  35. test_files = [
  36. '../../demo/pdfs/demo1.pdf',
  37. '../../demo/pdfs/demo2.pdf',
  38. '../../demo/pdfs/demo3.pdf',
  39. '../../demo/pdfs/small_ocr.pdf',
  40. ]
  41. test_files = [os.path.join(os.path.dirname(__file__), f) for f in test_files]
  42. existing_files = [f for f in test_files if os.path.exists(f)]
  43. if not existing_files:
  44. logger.warning("No test files found.")
  45. return
  46. # Create an aiohttp session to be reused across requests
  47. async with aiohttp.ClientSession() as session:
  48. # === Basic Processing ===
  49. basic_tasks = [mineru_parse_async(session, file_path) for file_path in existing_files[:2]]
  50. # === Custom Options ===
  51. custom_options = {
  52. 'backend': 'pipeline', 'lang': 'ch', 'method': 'auto',
  53. 'formula_enable': True, 'table_enable': True,
  54. # Example for remote vlm server (vllm/sglang/lmdeploy...)
  55. # 'backend': 'vlm-http-client', 'server_url': 'http://127.0.0.1:30000',
  56. }
  57. custom_tasks = [mineru_parse_async(session, file_path, **custom_options) for file_path in existing_files[2:]]
  58. # Start all tasks
  59. all_tasks = basic_tasks + custom_tasks
  60. all_results = await asyncio.gather(*all_tasks)
  61. logger.info(f"All Results: {all_results}")
  62. logger.info("🎉 All processing completed!")
  63. if __name__ == '__main__':
  64. # Run the async main function
  65. asyncio.run(main())