tokenizer_config.json 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192
  1. {
  2. "added_tokens_decoder": {
  3. "151851": {
  4. "content": "<ref>",
  5. "lstrip": false,
  6. "normalized": false,
  7. "rstrip": false,
  8. "single_word": false,
  9. "special": true
  10. },
  11. "151852": {
  12. "content": "</ref>",
  13. "lstrip": false,
  14. "normalized": false,
  15. "rstrip": false,
  16. "single_word": false,
  17. "special": true
  18. },
  19. "151853": {
  20. "content": "<box>",
  21. "lstrip": false,
  22. "normalized": false,
  23. "rstrip": false,
  24. "single_word": false,
  25. "special": true
  26. },
  27. "151854": {
  28. "content": "</box>",
  29. "lstrip": false,
  30. "normalized": false,
  31. "rstrip": false,
  32. "single_word": false,
  33. "special": true
  34. },
  35. "151855": {
  36. "content": "<quad>",
  37. "lstrip": false,
  38. "normalized": false,
  39. "rstrip": false,
  40. "single_word": false,
  41. "special": true
  42. },
  43. "151856": {
  44. "content": "</quad>",
  45. "lstrip": false,
  46. "normalized": false,
  47. "rstrip": false,
  48. "single_word": false,
  49. "special": true
  50. },
  51. "151857": {
  52. "content": "<img>",
  53. "lstrip": false,
  54. "normalized": false,
  55. "rstrip": false,
  56. "single_word": false,
  57. "special": true
  58. },
  59. "151858": {
  60. "content": "</img>",
  61. "lstrip": false,
  62. "normalized": false,
  63. "rstrip": false,
  64. "single_word": false,
  65. "special": true
  66. },
  67. "151859": {
  68. "content": "<imgpad>",
  69. "lstrip": false,
  70. "normalized": false,
  71. "rstrip": false,
  72. "single_word": false,
  73. "special": true
  74. }
  75. },
  76. "additional_special_tokens": [
  77. "<ref>",
  78. "</ref>",
  79. "<box>",
  80. "</box>",
  81. "<quad>",
  82. "</quad>",
  83. "<img>",
  84. "</img>",
  85. "<imgpad>"
  86. ],
  87. "clean_up_tokenization_spaces": true,
  88. "model_max_length": 8000,
  89. "pad_token": "<|endoftext|>",
  90. "padding_side": "right",
  91. "tokenizer_class": "QWenTokenizer"
  92. }