浏览代码

fix PaddleOCR-VL name - local (#4617)

* fix PaddleOCR-VL name - local

* fix
zhang-prog 1 月之前
父节点
当前提交
e1664fe334

+ 2 - 2
paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_config.py

@@ -29,7 +29,7 @@
 from ....common.vlm.transformers import PretrainedConfig
 
 
-class PPOCRVisionConfig(PretrainedConfig):
+class PaddleOCRVisionConfig(PretrainedConfig):
     model_type = "paddleocr_vl"
     base_config_key = "vision_config"
 
@@ -70,7 +70,7 @@ class PPOCRVisionConfig(PretrainedConfig):
 class PaddleOCRVLConfig(PretrainedConfig):
     model_type = "paddleocr_vl"
     keys_to_ignore_at_inference = ["past_key_values"]
-    sub_configs = {"vision_config": PPOCRVisionConfig}
+    sub_configs = {"vision_config": PaddleOCRVisionConfig}
 
     base_model_tp_plan = {
         "layers.*.self_attn.q_proj": "colwise",

+ 5 - 5
paddlex/inference/models/doc_vlm/modeling/paddleocr_vl/_siglip.py

@@ -48,7 +48,7 @@ from ....common.vlm.transformers.model_outputs import (
     BaseModelOutput,
     BaseModelOutputWithPooling,
 )
-from ._config import PaddleOCRVLConfig, PPOCRVisionConfig
+from ._config import PaddleOCRVisionConfig, PaddleOCRVLConfig
 
 
 def rotate_half(x):
@@ -617,7 +617,7 @@ class SiglipEncoder(nn.Layer):
 class SiglipMultiheadAttentionPoolingHead(nn.Layer):
     """Multihead Attention Pooling."""
 
-    def __init__(self, config: PPOCRVisionConfig):
+    def __init__(self, config: PaddleOCRVisionConfig):
         super().__init__()
 
         self.probe = self.create_parameter(
@@ -646,7 +646,7 @@ class SiglipMultiheadAttentionPoolingHead(nn.Layer):
 
 
 class SiglipVisionTransformer(nn.Layer):
-    def __init__(self, config: PPOCRVisionConfig):
+    def __init__(self, config: PaddleOCRVisionConfig):
         super().__init__()
         self.config = config
         embed_dim = config.hidden_size
@@ -816,10 +816,10 @@ class SiglipPreTrainedModel(PretrainedModel):
 
 
 class SiglipVisionModel(SiglipPreTrainedModel):
-    config_class = PPOCRVisionConfig
+    config_class = PaddleOCRVisionConfig
     main_input_name = "pixel_values"
 
-    def __init__(self, config: PPOCRVisionConfig):
+    def __init__(self, config: PaddleOCRVisionConfig):
         super().__init__(config)
 
         self.vision_model = SiglipVisionTransformer(config)