| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747 |
- import re
- from ipaddress import (
- IPv4Address,
- IPv4Interface,
- IPv4Network,
- IPv6Address,
- IPv6Interface,
- IPv6Network,
- _BaseAddress,
- _BaseNetwork,
- )
- from typing import (
- TYPE_CHECKING,
- Any,
- Collection,
- Dict,
- Generator,
- List,
- Match,
- Optional,
- Pattern,
- Set,
- Tuple,
- Type,
- Union,
- cast,
- no_type_check,
- )
- from pydantic.v1 import errors
- from pydantic.v1.utils import Representation, update_not_none
- from pydantic.v1.validators import constr_length_validator, str_validator
- if TYPE_CHECKING:
- import email_validator
- from typing_extensions import TypedDict
- from pydantic.v1.config import BaseConfig
- from pydantic.v1.fields import ModelField
- from pydantic.v1.typing import AnyCallable
- CallableGenerator = Generator[AnyCallable, None, None]
- class Parts(TypedDict, total=False):
- scheme: str
- user: Optional[str]
- password: Optional[str]
- ipv4: Optional[str]
- ipv6: Optional[str]
- domain: Optional[str]
- port: Optional[str]
- path: Optional[str]
- query: Optional[str]
- fragment: Optional[str]
- class HostParts(TypedDict, total=False):
- host: str
- tld: Optional[str]
- host_type: Optional[str]
- port: Optional[str]
- rebuild: bool
- else:
- email_validator = None
- class Parts(dict):
- pass
- NetworkType = Union[str, bytes, int, Tuple[Union[str, bytes, int], Union[str, int]]]
- __all__ = [
- 'AnyUrl',
- 'AnyHttpUrl',
- 'FileUrl',
- 'HttpUrl',
- 'stricturl',
- 'EmailStr',
- 'NameEmail',
- 'IPvAnyAddress',
- 'IPvAnyInterface',
- 'IPvAnyNetwork',
- 'PostgresDsn',
- 'CockroachDsn',
- 'AmqpDsn',
- 'RedisDsn',
- 'MongoDsn',
- 'KafkaDsn',
- 'validate_email',
- ]
- _url_regex_cache = None
- _multi_host_url_regex_cache = None
- _ascii_domain_regex_cache = None
- _int_domain_regex_cache = None
- _host_regex_cache = None
- _host_regex = (
- r'(?:'
- r'(?P<ipv4>(?:\d{1,3}\.){3}\d{1,3})(?=$|[/:#?])|' # ipv4
- r'(?P<ipv6>\[[A-F0-9]*:[A-F0-9:]+\])(?=$|[/:#?])|' # ipv6
- r'(?P<domain>[^\s/:?#]+)' # domain, validation occurs later
- r')?'
- r'(?::(?P<port>\d+))?' # port
- )
- _scheme_regex = r'(?:(?P<scheme>[a-z][a-z0-9+\-.]+)://)?' # scheme https://tools.ietf.org/html/rfc3986#appendix-A
- _user_info_regex = r'(?:(?P<user>[^\s:/]*)(?::(?P<password>[^\s/]*))?@)?'
- _path_regex = r'(?P<path>/[^\s?#]*)?'
- _query_regex = r'(?:\?(?P<query>[^\s#]*))?'
- _fragment_regex = r'(?:#(?P<fragment>[^\s#]*))?'
- def url_regex() -> Pattern[str]:
- global _url_regex_cache
- if _url_regex_cache is None:
- _url_regex_cache = re.compile(
- rf'{_scheme_regex}{_user_info_regex}{_host_regex}{_path_regex}{_query_regex}{_fragment_regex}',
- re.IGNORECASE,
- )
- return _url_regex_cache
- def multi_host_url_regex() -> Pattern[str]:
- """
- Compiled multi host url regex.
- Additionally to `url_regex` it allows to match multiple hosts.
- E.g. host1.db.net,host2.db.net
- """
- global _multi_host_url_regex_cache
- if _multi_host_url_regex_cache is None:
- _multi_host_url_regex_cache = re.compile(
- rf'{_scheme_regex}{_user_info_regex}'
- r'(?P<hosts>([^/]*))' # validation occurs later
- rf'{_path_regex}{_query_regex}{_fragment_regex}',
- re.IGNORECASE,
- )
- return _multi_host_url_regex_cache
- def ascii_domain_regex() -> Pattern[str]:
- global _ascii_domain_regex_cache
- if _ascii_domain_regex_cache is None:
- ascii_chunk = r'[_0-9a-z](?:[-_0-9a-z]{0,61}[_0-9a-z])?'
- ascii_domain_ending = r'(?P<tld>\.[a-z]{2,63})?\.?'
- _ascii_domain_regex_cache = re.compile(
- fr'(?:{ascii_chunk}\.)*?{ascii_chunk}{ascii_domain_ending}', re.IGNORECASE
- )
- return _ascii_domain_regex_cache
- def int_domain_regex() -> Pattern[str]:
- global _int_domain_regex_cache
- if _int_domain_regex_cache is None:
- int_chunk = r'[_0-9a-\U00040000](?:[-_0-9a-\U00040000]{0,61}[_0-9a-\U00040000])?'
- int_domain_ending = r'(?P<tld>(\.[^\W\d_]{2,63})|(\.(?:xn--)[_0-9a-z-]{2,63}))?\.?'
- _int_domain_regex_cache = re.compile(fr'(?:{int_chunk}\.)*?{int_chunk}{int_domain_ending}', re.IGNORECASE)
- return _int_domain_regex_cache
- def host_regex() -> Pattern[str]:
- global _host_regex_cache
- if _host_regex_cache is None:
- _host_regex_cache = re.compile(
- _host_regex,
- re.IGNORECASE,
- )
- return _host_regex_cache
- class AnyUrl(str):
- strip_whitespace = True
- min_length = 1
- max_length = 2**16
- allowed_schemes: Optional[Collection[str]] = None
- tld_required: bool = False
- user_required: bool = False
- host_required: bool = True
- hidden_parts: Set[str] = set()
- __slots__ = ('scheme', 'user', 'password', 'host', 'tld', 'host_type', 'port', 'path', 'query', 'fragment')
- @no_type_check
- def __new__(cls, url: Optional[str], **kwargs) -> object:
- return str.__new__(cls, cls.build(**kwargs) if url is None else url)
- def __init__(
- self,
- url: str,
- *,
- scheme: str,
- user: Optional[str] = None,
- password: Optional[str] = None,
- host: Optional[str] = None,
- tld: Optional[str] = None,
- host_type: str = 'domain',
- port: Optional[str] = None,
- path: Optional[str] = None,
- query: Optional[str] = None,
- fragment: Optional[str] = None,
- ) -> None:
- str.__init__(url)
- self.scheme = scheme
- self.user = user
- self.password = password
- self.host = host
- self.tld = tld
- self.host_type = host_type
- self.port = port
- self.path = path
- self.query = query
- self.fragment = fragment
- @classmethod
- def build(
- cls,
- *,
- scheme: str,
- user: Optional[str] = None,
- password: Optional[str] = None,
- host: str,
- port: Optional[str] = None,
- path: Optional[str] = None,
- query: Optional[str] = None,
- fragment: Optional[str] = None,
- **_kwargs: str,
- ) -> str:
- parts = Parts(
- scheme=scheme,
- user=user,
- password=password,
- host=host,
- port=port,
- path=path,
- query=query,
- fragment=fragment,
- **_kwargs, # type: ignore[misc]
- )
- url = scheme + '://'
- if user:
- url += user
- if password:
- url += ':' + password
- if user or password:
- url += '@'
- url += host
- if port and ('port' not in cls.hidden_parts or cls.get_default_parts(parts).get('port') != port):
- url += ':' + port
- if path:
- url += path
- if query:
- url += '?' + query
- if fragment:
- url += '#' + fragment
- return url
- @classmethod
- def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
- update_not_none(field_schema, minLength=cls.min_length, maxLength=cls.max_length, format='uri')
- @classmethod
- def __get_validators__(cls) -> 'CallableGenerator':
- yield cls.validate
- @classmethod
- def validate(cls, value: Any, field: 'ModelField', config: 'BaseConfig') -> 'AnyUrl':
- if value.__class__ == cls:
- return value
- value = str_validator(value)
- if cls.strip_whitespace:
- value = value.strip()
- url: str = cast(str, constr_length_validator(value, field, config))
- m = cls._match_url(url)
- # the regex should always match, if it doesn't please report with details of the URL tried
- assert m, 'URL regex failed unexpectedly'
- original_parts = cast('Parts', m.groupdict())
- parts = cls.apply_default_parts(original_parts)
- parts = cls.validate_parts(parts)
- if m.end() != len(url):
- raise errors.UrlExtraError(extra=url[m.end() :])
- return cls._build_url(m, url, parts)
- @classmethod
- def _build_url(cls, m: Match[str], url: str, parts: 'Parts') -> 'AnyUrl':
- """
- Validate hosts and build the AnyUrl object. Split from `validate` so this method
- can be altered in `MultiHostDsn`.
- """
- host, tld, host_type, rebuild = cls.validate_host(parts)
- return cls(
- None if rebuild else url,
- scheme=parts['scheme'],
- user=parts['user'],
- password=parts['password'],
- host=host,
- tld=tld,
- host_type=host_type,
- port=parts['port'],
- path=parts['path'],
- query=parts['query'],
- fragment=parts['fragment'],
- )
- @staticmethod
- def _match_url(url: str) -> Optional[Match[str]]:
- return url_regex().match(url)
- @staticmethod
- def _validate_port(port: Optional[str]) -> None:
- if port is not None and int(port) > 65_535:
- raise errors.UrlPortError()
- @classmethod
- def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
- """
- A method used to validate parts of a URL.
- Could be overridden to set default values for parts if missing
- """
- scheme = parts['scheme']
- if scheme is None:
- raise errors.UrlSchemeError()
- if cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
- raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))
- if validate_port:
- cls._validate_port(parts['port'])
- user = parts['user']
- if cls.user_required and user is None:
- raise errors.UrlUserInfoError()
- return parts
- @classmethod
- def validate_host(cls, parts: 'Parts') -> Tuple[str, Optional[str], str, bool]:
- tld, host_type, rebuild = None, None, False
- for f in ('domain', 'ipv4', 'ipv6'):
- host = parts[f] # type: ignore[literal-required]
- if host:
- host_type = f
- break
- if host is None:
- if cls.host_required:
- raise errors.UrlHostError()
- elif host_type == 'domain':
- is_international = False
- d = ascii_domain_regex().fullmatch(host)
- if d is None:
- d = int_domain_regex().fullmatch(host)
- if d is None:
- raise errors.UrlHostError()
- is_international = True
- tld = d.group('tld')
- if tld is None and not is_international:
- d = int_domain_regex().fullmatch(host)
- assert d is not None
- tld = d.group('tld')
- is_international = True
- if tld is not None:
- tld = tld[1:]
- elif cls.tld_required:
- raise errors.UrlHostTldError()
- if is_international:
- host_type = 'int_domain'
- rebuild = True
- host = host.encode('idna').decode('ascii')
- if tld is not None:
- tld = tld.encode('idna').decode('ascii')
- return host, tld, host_type, rebuild # type: ignore
- @staticmethod
- def get_default_parts(parts: 'Parts') -> 'Parts':
- return {}
- @classmethod
- def apply_default_parts(cls, parts: 'Parts') -> 'Parts':
- for key, value in cls.get_default_parts(parts).items():
- if not parts[key]: # type: ignore[literal-required]
- parts[key] = value # type: ignore[literal-required]
- return parts
- def __repr__(self) -> str:
- extra = ', '.join(f'{n}={getattr(self, n)!r}' for n in self.__slots__ if getattr(self, n) is not None)
- return f'{self.__class__.__name__}({super().__repr__()}, {extra})'
- class AnyHttpUrl(AnyUrl):
- allowed_schemes = {'http', 'https'}
- __slots__ = ()
- class HttpUrl(AnyHttpUrl):
- tld_required = True
- # https://stackoverflow.com/questions/417142/what-is-the-maximum-length-of-a-url-in-different-browsers
- max_length = 2083
- hidden_parts = {'port'}
- @staticmethod
- def get_default_parts(parts: 'Parts') -> 'Parts':
- return {'port': '80' if parts['scheme'] == 'http' else '443'}
- class FileUrl(AnyUrl):
- allowed_schemes = {'file'}
- host_required = False
- __slots__ = ()
- class MultiHostDsn(AnyUrl):
- __slots__ = AnyUrl.__slots__ + ('hosts',)
- def __init__(self, *args: Any, hosts: Optional[List['HostParts']] = None, **kwargs: Any):
- super().__init__(*args, **kwargs)
- self.hosts = hosts
- @staticmethod
- def _match_url(url: str) -> Optional[Match[str]]:
- return multi_host_url_regex().match(url)
- @classmethod
- def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
- return super().validate_parts(parts, validate_port=False)
- @classmethod
- def _build_url(cls, m: Match[str], url: str, parts: 'Parts') -> 'MultiHostDsn':
- hosts_parts: List['HostParts'] = []
- host_re = host_regex()
- for host in m.groupdict()['hosts'].split(','):
- d: Parts = host_re.match(host).groupdict() # type: ignore
- host, tld, host_type, rebuild = cls.validate_host(d)
- port = d.get('port')
- cls._validate_port(port)
- hosts_parts.append(
- {
- 'host': host,
- 'host_type': host_type,
- 'tld': tld,
- 'rebuild': rebuild,
- 'port': port,
- }
- )
- if len(hosts_parts) > 1:
- return cls(
- None if any([hp['rebuild'] for hp in hosts_parts]) else url,
- scheme=parts['scheme'],
- user=parts['user'],
- password=parts['password'],
- path=parts['path'],
- query=parts['query'],
- fragment=parts['fragment'],
- host_type=None,
- hosts=hosts_parts,
- )
- else:
- # backwards compatibility with single host
- host_part = hosts_parts[0]
- return cls(
- None if host_part['rebuild'] else url,
- scheme=parts['scheme'],
- user=parts['user'],
- password=parts['password'],
- host=host_part['host'],
- tld=host_part['tld'],
- host_type=host_part['host_type'],
- port=host_part.get('port'),
- path=parts['path'],
- query=parts['query'],
- fragment=parts['fragment'],
- )
- class PostgresDsn(MultiHostDsn):
- allowed_schemes = {
- 'postgres',
- 'postgresql',
- 'postgresql+asyncpg',
- 'postgresql+pg8000',
- 'postgresql+psycopg',
- 'postgresql+psycopg2',
- 'postgresql+psycopg2cffi',
- 'postgresql+py-postgresql',
- 'postgresql+pygresql',
- }
- user_required = True
- __slots__ = ()
- class CockroachDsn(AnyUrl):
- allowed_schemes = {
- 'cockroachdb',
- 'cockroachdb+psycopg2',
- 'cockroachdb+asyncpg',
- }
- user_required = True
- class AmqpDsn(AnyUrl):
- allowed_schemes = {'amqp', 'amqps'}
- host_required = False
- class RedisDsn(AnyUrl):
- __slots__ = ()
- allowed_schemes = {'redis', 'rediss'}
- host_required = False
- @staticmethod
- def get_default_parts(parts: 'Parts') -> 'Parts':
- return {
- 'domain': 'localhost' if not (parts['ipv4'] or parts['ipv6']) else '',
- 'port': '6379',
- 'path': '/0',
- }
- class MongoDsn(AnyUrl):
- allowed_schemes = {'mongodb'}
- # TODO: Needed to generic "Parts" for "Replica Set", "Sharded Cluster", and other mongodb deployment modes
- @staticmethod
- def get_default_parts(parts: 'Parts') -> 'Parts':
- return {
- 'port': '27017',
- }
- class KafkaDsn(AnyUrl):
- allowed_schemes = {'kafka'}
- @staticmethod
- def get_default_parts(parts: 'Parts') -> 'Parts':
- return {
- 'domain': 'localhost',
- 'port': '9092',
- }
- def stricturl(
- *,
- strip_whitespace: bool = True,
- min_length: int = 1,
- max_length: int = 2**16,
- tld_required: bool = True,
- host_required: bool = True,
- allowed_schemes: Optional[Collection[str]] = None,
- ) -> Type[AnyUrl]:
- # use kwargs then define conf in a dict to aid with IDE type hinting
- namespace = dict(
- strip_whitespace=strip_whitespace,
- min_length=min_length,
- max_length=max_length,
- tld_required=tld_required,
- host_required=host_required,
- allowed_schemes=allowed_schemes,
- )
- return type('UrlValue', (AnyUrl,), namespace)
- def import_email_validator() -> None:
- global email_validator
- try:
- import email_validator
- except ImportError as e:
- raise ImportError('email-validator is not installed, run `pip install pydantic[email]`') from e
- class EmailStr(str):
- @classmethod
- def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
- field_schema.update(type='string', format='email')
- @classmethod
- def __get_validators__(cls) -> 'CallableGenerator':
- # included here and below so the error happens straight away
- import_email_validator()
- yield str_validator
- yield cls.validate
- @classmethod
- def validate(cls, value: Union[str]) -> str:
- return validate_email(value)[1]
- class NameEmail(Representation):
- __slots__ = 'name', 'email'
- def __init__(self, name: str, email: str):
- self.name = name
- self.email = email
- def __eq__(self, other: Any) -> bool:
- return isinstance(other, NameEmail) and (self.name, self.email) == (other.name, other.email)
- @classmethod
- def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
- field_schema.update(type='string', format='name-email')
- @classmethod
- def __get_validators__(cls) -> 'CallableGenerator':
- import_email_validator()
- yield cls.validate
- @classmethod
- def validate(cls, value: Any) -> 'NameEmail':
- if value.__class__ == cls:
- return value
- value = str_validator(value)
- return cls(*validate_email(value))
- def __str__(self) -> str:
- return f'{self.name} <{self.email}>'
- class IPvAnyAddress(_BaseAddress):
- __slots__ = ()
- @classmethod
- def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
- field_schema.update(type='string', format='ipvanyaddress')
- @classmethod
- def __get_validators__(cls) -> 'CallableGenerator':
- yield cls.validate
- @classmethod
- def validate(cls, value: Union[str, bytes, int]) -> Union[IPv4Address, IPv6Address]:
- try:
- return IPv4Address(value)
- except ValueError:
- pass
- try:
- return IPv6Address(value)
- except ValueError:
- raise errors.IPvAnyAddressError()
- class IPvAnyInterface(_BaseAddress):
- __slots__ = ()
- @classmethod
- def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
- field_schema.update(type='string', format='ipvanyinterface')
- @classmethod
- def __get_validators__(cls) -> 'CallableGenerator':
- yield cls.validate
- @classmethod
- def validate(cls, value: NetworkType) -> Union[IPv4Interface, IPv6Interface]:
- try:
- return IPv4Interface(value)
- except ValueError:
- pass
- try:
- return IPv6Interface(value)
- except ValueError:
- raise errors.IPvAnyInterfaceError()
- class IPvAnyNetwork(_BaseNetwork): # type: ignore
- @classmethod
- def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
- field_schema.update(type='string', format='ipvanynetwork')
- @classmethod
- def __get_validators__(cls) -> 'CallableGenerator':
- yield cls.validate
- @classmethod
- def validate(cls, value: NetworkType) -> Union[IPv4Network, IPv6Network]:
- # Assume IP Network is defined with a default value for ``strict`` argument.
- # Define your own class if you want to specify network address check strictness.
- try:
- return IPv4Network(value)
- except ValueError:
- pass
- try:
- return IPv6Network(value)
- except ValueError:
- raise errors.IPvAnyNetworkError()
- pretty_email_regex = re.compile(r'([\w ]*?) *<(.*)> *')
- MAX_EMAIL_LENGTH = 2048
- """Maximum length for an email.
- A somewhat arbitrary but very generous number compared to what is allowed by most implementations.
- """
- def validate_email(value: Union[str]) -> Tuple[str, str]:
- """
- Email address validation using https://pypi.org/project/email-validator/
- Notes:
- * raw ip address (literal) domain parts are not allowed.
- * "John Doe <local_part@domain.com>" style "pretty" email addresses are processed
- * spaces are striped from the beginning and end of addresses but no error is raised
- """
- if email_validator is None:
- import_email_validator()
- if len(value) > MAX_EMAIL_LENGTH:
- raise errors.EmailError()
- m = pretty_email_regex.fullmatch(value)
- name: Union[str, None] = None
- if m:
- name, value = m.groups()
- email = value.strip()
- try:
- parts = email_validator.validate_email(email, check_deliverability=False)
- except email_validator.EmailNotValidError as e:
- raise errors.EmailError from e
- if hasattr(parts, 'normalized'):
- # email-validator >= 2
- email = parts.normalized
- assert email is not None
- name = name or parts.local_part
- return name, email
- else:
- # email-validator >1, <2
- at_index = email.index('@')
- local_part = email[:at_index] # RFC 5321, local part must be case-sensitive.
- global_part = email[at_index:].lower()
- return name or local_part, local_part + global_part
|