configuration_mineru2.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. from transformers import Qwen2Config
  2. class Mineru2QwenConfig(Qwen2Config):
  3. model_type = "mineru2_qwen"
  4. def __init__(
  5. self,
  6. ignore_index=-100,
  7. image_aspect_ratio="square_anyres_max_9",
  8. image_grid_pinpoints="(1x1),...,(4x4)",
  9. image_token_index=151646,
  10. mm_hidden_size=1152,
  11. mm_patch_merge_type="spatial_unpad",
  12. mm_projector_type="mlp2x_gelu",
  13. mm_vision_select_feature="full",
  14. mm_vision_select_layer=-2,
  15. mm_vision_tower="google/siglip-so400m-patch14-384",
  16. tie_word_embeddings=False,
  17. tokenizer_model_max_length=16384,
  18. tokenizer_padding_side="right",
  19. unfreeze_mm_vision_tower=True,
  20. **kwargs,
  21. ):
  22. self.ignore_index = ignore_index
  23. self.image_aspect_ratio = image_aspect_ratio
  24. self.image_grid_pinpoints = image_grid_pinpoints
  25. self.image_token_index = image_token_index
  26. self.mm_hidden_size = mm_hidden_size
  27. self.mm_patch_merge_type = mm_patch_merge_type
  28. self.mm_projector_type = mm_projector_type
  29. self.mm_vision_select_feature = mm_vision_select_feature
  30. self.mm_vision_select_layer = mm_vision_select_layer
  31. self.mm_vision_tower = mm_vision_tower
  32. self.tokenizer_model_max_length = tokenizer_model_max_length
  33. self.tokenizer_padding_side = tokenizer_padding_side
  34. self.unfreeze_mm_vision_tower = unfreeze_mm_vision_tower
  35. super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)