|
@@ -18,11 +18,13 @@ import unicodedata
|
|
|
from functools import lru_cache
|
|
from functools import lru_cache
|
|
|
from typing import List, Optional, Tuple
|
|
from typing import List, Optional, Tuple
|
|
|
|
|
|
|
|
-import regex as re
|
|
|
|
|
-
|
|
|
|
|
|
|
+from .....utils.deps import is_dep_available
|
|
|
from .tokenizer_utils import PretrainedTokenizer
|
|
from .tokenizer_utils import PretrainedTokenizer
|
|
|
from .tokenizer_utils_base import AddedToken, TextInput
|
|
from .tokenizer_utils_base import AddedToken, TextInput
|
|
|
|
|
|
|
|
|
|
+if is_dep_available("regex"):
|
|
|
|
|
+ import regex as re
|
|
|
|
|
+
|
|
|
VOCAB_FILES_NAMES = {
|
|
VOCAB_FILES_NAMES = {
|
|
|
"vocab_file": "vocab.json",
|
|
"vocab_file": "vocab.json",
|
|
|
"merges_file": "merges.txt",
|
|
"merges_file": "merges.txt",
|