Эх сурвалжийг харах

Merge pull request #3756 from myhloli/dev

Set OMP_NUM_THREADS environment variable to 1 for vllm backend initialization
Xiaomeng Zhao 1 сар өмнө
parent
commit
1b724f3336

+ 31 - 29
mineru/backend/vlm/vlm_analyze.py

@@ -87,35 +87,37 @@ class ModelSingleton:
                     except Exception as e:
                         logger.warning(f'Error determining VRAM: {e}, using default batch_ratio: 1')
                         batch_size = 1
-                elif backend == "vllm-engine":
-                    try:
-                        import vllm
-                        from mineru_vl_utils import MinerULogitsProcessor
-                    except ImportError:
-                        raise ImportError("Please install vllm to use the vllm-engine backend.")
-                    if "gpu_memory_utilization" not in kwargs:
-                        kwargs["gpu_memory_utilization"] = 0.5
-                    if "model" not in kwargs:
-                        kwargs["model"] = model_path
-                    if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
-                        kwargs["logits_processors"] = [MinerULogitsProcessor]
-                    # 使用kwargs为 vllm初始化参数
-                    vllm_llm = vllm.LLM(**kwargs)
-                elif backend == "vllm-async-engine":
-                    try:
-                        from vllm.engine.arg_utils import AsyncEngineArgs
-                        from vllm.v1.engine.async_llm import AsyncLLM
-                        from mineru_vl_utils import MinerULogitsProcessor
-                    except ImportError:
-                        raise ImportError("Please install vllm to use the vllm-async-engine backend.")
-                    if "gpu_memory_utilization" not in kwargs:
-                        kwargs["gpu_memory_utilization"] = 0.5
-                    if "model" not in kwargs:
-                        kwargs["model"] = model_path
-                    if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
-                        kwargs["logits_processors"] = [MinerULogitsProcessor]
-                    # 使用kwargs为 vllm初始化参数
-                    vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
+                else:
+                    os.environ["OMP_NUM_THREADS"] = "1"
+                    if backend == "vllm-engine":
+                        try:
+                            import vllm
+                            from mineru_vl_utils import MinerULogitsProcessor
+                        except ImportError:
+                            raise ImportError("Please install vllm to use the vllm-engine backend.")
+                        if "gpu_memory_utilization" not in kwargs:
+                            kwargs["gpu_memory_utilization"] = 0.5
+                        if "model" not in kwargs:
+                            kwargs["model"] = model_path
+                        if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
+                            kwargs["logits_processors"] = [MinerULogitsProcessor]
+                        # 使用kwargs为 vllm初始化参数
+                        vllm_llm = vllm.LLM(**kwargs)
+                    elif backend == "vllm-async-engine":
+                        try:
+                            from vllm.engine.arg_utils import AsyncEngineArgs
+                            from vllm.v1.engine.async_llm import AsyncLLM
+                            from mineru_vl_utils import MinerULogitsProcessor
+                        except ImportError:
+                            raise ImportError("Please install vllm to use the vllm-async-engine backend.")
+                        if "gpu_memory_utilization" not in kwargs:
+                            kwargs["gpu_memory_utilization"] = 0.5
+                        if "model" not in kwargs:
+                            kwargs["model"] = model_path
+                        if enable_custom_logits_processors() and ("logits_processors" not in kwargs):
+                            kwargs["logits_processors"] = [MinerULogitsProcessor]
+                        # 使用kwargs为 vllm初始化参数
+                        vllm_async_llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**kwargs))
             self._models[key] = MinerUClient(
                 backend=backend,
                 model=model,

+ 3 - 0
mineru/model/vlm_vllm_model/server.py

@@ -1,3 +1,4 @@
+import os
 import sys
 
 from mineru.backend.vlm.custom_logits_processors import enable_custom_logits_processors
@@ -51,6 +52,8 @@ def main():
     # 重构参数,将模型路径作为位置参数
     sys.argv = [sys.argv[0]] + ["serve", model_path] + args
 
+    os.environ["OMP_NUM_THREADS"] = "1"
+
     # 启动vllm服务器
     print(f"start vllm server: {sys.argv}")
     vllm_main()