|
|
@@ -43,7 +43,7 @@ from ....common.vlm.transformers import PretrainedModel
|
|
|
from ....common.vlm.transformers.model_outputs import (
|
|
|
BaseModelOutputWithPastAndCrossAttentions,
|
|
|
)
|
|
|
-from ._config import PPOCRVLConfig
|
|
|
+from ._config import PaddleOCRVLConfig
|
|
|
from ._distributed import (
|
|
|
AllGatherVarlenOp,
|
|
|
ColumnParallelLinear,
|
|
|
@@ -76,7 +76,7 @@ def calc_lm_head_logits(
|
|
|
handling sequence parallelism and tensor parallelism configurations.
|
|
|
|
|
|
Args:
|
|
|
- config (PPOCRVLConfig): Model configuration.
|
|
|
+ config (PaddleOCRVLConfig): Model configuration.
|
|
|
hidden_states (Tensor): Hidden states from the transformer layers
|
|
|
weight (Tensor): Weight matrix for the language model head
|
|
|
bias (Tensor): Bias vector for the language model head
|
|
|
@@ -263,7 +263,7 @@ class RMSNorm(nn.Layer):
|
|
|
Initialize RMSNorm layer.
|
|
|
|
|
|
Args:
|
|
|
- config (PPOCRVLConfig): Model configuration.
|
|
|
+ config (PaddleOCRVLConfig): Model configuration.
|
|
|
"""
|
|
|
super().__init__()
|
|
|
self.hidden_size = config.hidden_size
|
|
|
@@ -324,7 +324,7 @@ class LayerNorm(nn.LayerNorm):
|
|
|
Initialize LayerNorm with configuration.
|
|
|
|
|
|
Args:
|
|
|
- config (PPOCRVLConfig): Model configuration contains normalization parameters and flags.
|
|
|
+ config (PaddleOCRVLConfig): Model configuration contains normalization parameters and flags.
|
|
|
"""
|
|
|
super().__init__(config.hidden_size, epsilon=config.rms_norm_eps)
|
|
|
self.config = config
|
|
|
@@ -334,7 +334,7 @@ class LayerNorm(nn.LayerNorm):
|
|
|
|
|
|
|
|
|
class KeyeRotaryEmbedding(nn.Layer):
|
|
|
- def __init__(self, config: PPOCRVLConfig, device=None):
|
|
|
+ def __init__(self, config: PaddleOCRVLConfig, device=None):
|
|
|
super().__init__()
|
|
|
self.rope_kwargs = {}
|
|
|
if config is None:
|
|
|
@@ -408,7 +408,7 @@ class Ernie4_5MLP(nn.Layer):
|
|
|
Initialize the MLP module with configuration options.
|
|
|
|
|
|
Args:
|
|
|
- config (PPOCRVLConfig): Model configurations.
|
|
|
+ config (PaddleOCRVLConfig): Model configurations.
|
|
|
layer_idx (int): Index of current layer (default: 0)
|
|
|
"""
|
|
|
super().__init__()
|
|
|
@@ -507,7 +507,7 @@ class Ernie4_5Attention(nn.Layer):
|
|
|
"""Initialize the attention layer.
|
|
|
|
|
|
Args:
|
|
|
- config (PPOCRVLConfig): Model configuration.
|
|
|
+ config (PaddleOCRVLConfig): Model configuration.
|
|
|
layer_idx (int, optional): Index in transformer stack. Defaults to 0.
|
|
|
"""
|
|
|
super().__init__()
|
|
|
@@ -1223,7 +1223,7 @@ class ErniePretrainingCriterion(paddle.nn.Layer):
|
|
|
"""Initialize the pretraining criterion.
|
|
|
|
|
|
Args:
|
|
|
- config (PPOCRVLConfig): Model configuration.
|
|
|
+ config (PaddleOCRVLConfig): Model configuration.
|
|
|
return_tuple (bool): Whether to return loss as tuple (loss, loss_sum). Defaults to True.
|
|
|
"""
|
|
|
super(ErniePretrainingCriterion, self).__init__()
|
|
|
@@ -1516,7 +1516,7 @@ class Ernie4_5LMHead(nn.Layer):
|
|
|
"""Initialize the language model head.
|
|
|
|
|
|
Args:
|
|
|
- config (PPOCRVLConfig): Model configuration containing:
|
|
|
+ config (PaddleOCRVLConfig): Model configuration containing:
|
|
|
- vocab_size: Size of vocabulary
|
|
|
- hidden_size: Dimension of hidden states
|
|
|
- tensor_parallel_degree: Degree of tensor parallelism
|
|
|
@@ -1632,7 +1632,7 @@ class Ernie4_5DecoderLayer(nn.Layer):
|
|
|
"""Initialize the decoder layer.
|
|
|
|
|
|
Args:
|
|
|
- config (PPOCRVLConfig): Model configuration.
|
|
|
+ config (PaddleOCRVLConfig): Model configuration.
|
|
|
layer_idx (int): Index of this layer in the transformer stack
|
|
|
"""
|
|
|
super().__init__()
|
|
|
@@ -1776,7 +1776,7 @@ class Ernie4_5DecoderLayer(nn.Layer):
|
|
|
class Ernie4_5PretrainedModel(PretrainedModel):
|
|
|
"""Base class for ERNIE pretrained models."""
|
|
|
|
|
|
- config_class = PPOCRVLConfig
|
|
|
+ config_class = PaddleOCRVLConfig
|
|
|
base_model_prefix = "ernie"
|
|
|
|
|
|
@classmethod
|
|
|
@@ -1784,7 +1784,7 @@ class Ernie4_5PretrainedModel(PretrainedModel):
|
|
|
"""Generate tensor parallel mappings for model conversion.
|
|
|
|
|
|
Args:
|
|
|
- config (PPOCRVLConfig): Model configuration.
|
|
|
+ config (PaddleOCRVLConfig): Model configuration.
|
|
|
is_split (bool): Whether to generate split mappings (True)
|
|
|
or merge mappings (False). Defaults to True.
|
|
|
|
|
|
@@ -2005,11 +2005,11 @@ class Ernie4_5PretrainedModel(PretrainedModel):
|
|
|
class Ernie4_5Model(Ernie4_5PretrainedModel):
|
|
|
"""The core ERNIE transformer model"""
|
|
|
|
|
|
- def __init__(self, config: PPOCRVLConfig):
|
|
|
+ def __init__(self, config: PaddleOCRVLConfig):
|
|
|
"""Initialize the ERNIE model architecture.
|
|
|
|
|
|
Args:
|
|
|
- config (PPOCRVLConfig): Model configuration.
|
|
|
+ config (PaddleOCRVLConfig): Model configuration.
|
|
|
"""
|
|
|
super().__init__(config)
|
|
|
self.padding_idx = config.pad_token_id
|