| 1234567891011121314151617181920212223242526272829303132333435363738 |
- from transformers import Qwen2Config
- class Mineru2QwenConfig(Qwen2Config):
- model_type = "mineru2_qwen"
- def __init__(
- self,
- ignore_index=-100,
- image_aspect_ratio="square_anyres_max_9",
- image_grid_pinpoints="(1x1),...,(4x4)",
- image_token_index=151646,
- mm_hidden_size=1152,
- mm_patch_merge_type="spatial_unpad",
- mm_projector_type="mlp2x_gelu",
- mm_vision_select_feature="full",
- mm_vision_select_layer=-2,
- mm_vision_tower="google/siglip-so400m-patch14-384",
- tie_word_embeddings=False,
- tokenizer_model_max_length=16384,
- tokenizer_padding_side="right",
- unfreeze_mm_vision_tower=True,
- **kwargs,
- ):
- self.ignore_index = ignore_index
- self.image_aspect_ratio = image_aspect_ratio
- self.image_grid_pinpoints = image_grid_pinpoints
- self.image_token_index = image_token_index
- self.mm_hidden_size = mm_hidden_size
- self.mm_patch_merge_type = mm_patch_merge_type
- self.mm_projector_type = mm_projector_type
- self.mm_vision_select_feature = mm_vision_select_feature
- self.mm_vision_select_layer = mm_vision_select_layer
- self.mm_vision_tower = mm_vision_tower
- self.tokenizer_model_max_length = tokenizer_model_max_length
- self.tokenizer_padding_side = tokenizer_padding_side
- self.unfreeze_mm_vision_tower = unfreeze_mm_vision_tower
- super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
|