How to use the ural.patterns.PROTOCOL_RE.match function in ural

To help you get started, we’ve selected a few ural examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github medialab / ural / ural / normalize_url.py View on Github external
Defaults to True.

    Returns:
        string: The normalized url.

    """
    original_url_arg = url

    if infer_redirection:
        url = resolve(url)

    if isinstance(url, SplitResult):
        has_protocol = bool(splitted.scheme)
        splitted = url
    else:
        has_protocol = PROTOCOL_RE.match(url)

        # Ensuring scheme so parsing works correctly
        if not has_protocol:
            url = 'http://' + url

        # Parsing
        try:
            splitted = urlsplit(url)
        except ValueError:
            return original_url_arg

    scheme, netloc, path, query, fragment = splitted

    # Fixing common mistakes
    if fix_common_mistakes:
        if query:
github medialab / ural / ural / force_protocol.py View on Github external
def force_protocol(url, protocol='http'):
    """
    Function force-replacing the url protocol by the given one (and adding it if there is none).

    Args:
        url (str): Target URL as a string.
        protocol (str): protocol wanted. Is 'http' by default.

    Returns:
        string: The protocol-equipped url.

    """
    protocol = protocol.rstrip(':/')

    if not PROTOCOL_RE.match(url):
        url = protocol + '://' + url
    elif url[:2] == '//':
        url = protocol + ':' + url
    else:
        url = re.sub(PROTOCOL_RE, protocol + '://', url)

    return url
github medialab / ural / ural / ensure_protocol.py View on Github external
def ensure_protocol(url, protocol='http'):
    """
    Function checking if the url has a protocol, and adding the given one if there is none.

    Args:
        url (str): Target URL as a string.
        protocol (str): protocol to use if there is none in url. Is 'http' by default.

    Returns:
        string: The protocol-equipped url.

    """
    protocol = protocol.rstrip(':/')

    if not PROTOCOL_RE.match(url):
        url = protocol + '://' + url
    elif url.startswith('//'):
        url = protocol + ':' + url

    return url