ソースを参照

fix PaddleOCR-VL name (#4610)

zhang-prog 1 ヶ月 前
コミット
bd76f95977

+ 6 - 6
paddlex/inference/genai/models/paddleocr_vl_09b/__init__.py

@@ -15,21 +15,21 @@
 
 def get_network_class(backend):
     if backend == "vllm":
-        from ._vllm import PPOCRVLForConditionalGeneration
+        from ._vllm import PaddleOCRVLForConditionalGeneration
 
-        return PPOCRVLForConditionalGeneration
+        return PaddleOCRVLForConditionalGeneration
     elif backend == "sglang":
-        from ._sglang import PPOCRVLForConditionalGeneration
+        from ._sglang import PaddleOCRVLForConditionalGeneration
 
-        return PPOCRVLForConditionalGeneration
+        return PaddleOCRVLForConditionalGeneration
     else:
         raise ValueError(f"Unsupported backend: {backend}")
 
 
 def get_processor_class(backend):
     if backend == "sglang":
-        from ._sglang import PPOCRVLImageProcessor
+        from ._sglang import PaddleOCRVLImageProcessor
 
-        return PPOCRVLImageProcessor
+        return PaddleOCRVLImageProcessor
     else:
         raise ValueError(f"Unsupported backend: {backend}")

+ 2 - 2
paddlex/inference/genai/models/paddleocr_vl_09b/_sglang/__init__.py

@@ -12,5 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from .network import PPOCRVLForConditionalGeneration
-from .processor import PPOCRVLImageProcessor
+from .network import PaddleOCRVLForConditionalGeneration
+from .processor import PaddleOCRVLImageProcessor

+ 2 - 2
paddlex/inference/genai/models/paddleocr_vl_09b/_sglang/network.py

@@ -626,7 +626,7 @@ if all(
             return sample_hidden_state
 
     class SiglipVisionModel(nn.Module):
-        config_class = "PPOCRVisionConfig"
+        config_class = "PaddleOCRVisionConfig"
         main_input_name = "pixel_values"
 
         def __init__(
@@ -679,7 +679,7 @@ if all(
                 cu_seqlens=cu_seqlens,
             )
 
-    class PPOCRVLForConditionalGeneration(Ernie4_5_ForCausalLM):
+    class PaddleOCRVLForConditionalGeneration(Ernie4_5_ForCausalLM):
 
         def __init__(self, *, config, quant_config=None, prefix: str = ""):
             super().__init__(config=config, prefix=prefix)

+ 1 - 1
paddlex/inference/genai/models/paddleocr_vl_09b/_sglang/processor.py

@@ -87,7 +87,7 @@ if all(map(is_dep_available, ("sglang", "torch"))):
     async def resize_image_async(image, min_pixels, max_pixels, factor):
         return resize_image(image, min_pixels, max_pixels, factor)
 
-    class PPOCRVLImageProcessor(BaseMultimodalProcessor):
+    class PaddleOCRVLImageProcessor(BaseMultimodalProcessor):
 
         def __init__(self, hf_config, server_args, _processor, *args, **kwargs):
             super().__init__(hf_config, server_args, _processor, *args, **kwargs)

+ 12 - 8
paddlex/inference/genai/models/paddleocr_vl_09b/_vllm.py

@@ -136,7 +136,7 @@ if all(
             w_bar = math.ceil(width * beta / factor) * factor
         return h_bar, w_bar
 
-    class PPOCRVLProcessingInfo(BaseProcessingInfo):
+    class PaddleOCRVLProcessingInfo(BaseProcessingInfo):
 
         def get_hf_config(self):
             return self.ctx.get_hf_config()
@@ -194,7 +194,9 @@ if all(
             image_size = hf_config.vision_config.image_size
             return ImageSize(height=image_size, width=image_size)
 
-    class PPOCRVLDummyInputsBuilder(BaseDummyInputsBuilder[PPOCRVLProcessingInfo]):
+    class PaddleOCRVLDummyInputsBuilder(
+        BaseDummyInputsBuilder[PaddleOCRVLProcessingInfo]
+    ):
 
         def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
             num_images = mm_counts.get("image", 0)
@@ -221,7 +223,9 @@ if all(
                 )
             }
 
-    class PPOCRVLMultiModalProcessor(BaseMultiModalProcessor[PPOCRVLProcessingInfo]):
+    class PaddleOCRVLMultiModalProcessor(
+        BaseMultiModalProcessor[PaddleOCRVLProcessingInfo]
+    ):
 
         def _call_hf_processor(
             self,
@@ -955,7 +959,7 @@ if all(
             return sample_hidden_state
 
     class SiglipVisionModel(nn.Module):
-        config_class = "PPOCRVisionConfig"
+        config_class = "PaddleOCRVisionConfig"
         main_input_name = "pixel_values"
 
         def __init__(
@@ -1073,9 +1077,9 @@ if all(
             return loaded_params
 
     @MULTIMODAL_REGISTRY.register_processor(
-        PPOCRVLMultiModalProcessor,
-        info=PPOCRVLProcessingInfo,
-        dummy_inputs=PPOCRVLDummyInputsBuilder,
+        PaddleOCRVLMultiModalProcessor,
+        info=PaddleOCRVLProcessingInfo,
+        dummy_inputs=PaddleOCRVLDummyInputsBuilder,
     )
     @support_torch_compile(
         # set dynamic_arg_dims to support mrope
@@ -1086,7 +1090,7 @@ if all(
             "inputs_embeds": 0,
         }
     )
-    class PPOCRVLForConditionalGeneration(Ernie4_5_ForCausalLM, SupportsMultiModal):
+    class PaddleOCRVLForConditionalGeneration(Ernie4_5_ForCausalLM, SupportsMultiModal):
 
         def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
             super().__init__(vllm_config=vllm_config, prefix=prefix)