How to use the ural.utils.SplitResult function in ural

To help you get started, we’ve selected a few ural examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github medialab / ural / ural / normalize_url.py View on Github external
def get_normalized_hostname(url, normalize_amp=True, strip_lang_subdomains=False,
                            infer_redirection=True):

    if infer_redirection:
        url = resolve(url)

    if isinstance(url, SplitResult):
        splitted = url
    else:
        try:
            splitted = urlsplit(ensure_protocol(url))
        except ValueError:
            return None

    if not splitted.hostname:
        return None

    hostname = splitted.hostname.lower()

    pattern = IRRELEVANT_SUBDOMAIN_AMP_RE if normalize_amp else IRRELEVANT_SUBDOMAIN_RE

    hostname = pattern.sub('', hostname)
github medialab / ural / ural / normalize_url.py View on Github external
# Dropping trailing slash
    if strip_trailing_slash and path.endswith('/'):
        path = path.rstrip('/')

    # Quoting or not
    if quoted:
        path = quote(path)
        query = quote(query, RESERVED_CHARACTERS)
        fragment = quote(fragment, SAFE_CHARACTERS)
    else:
        path = unquote(path)
        query = unquote(query)
        fragment = unquote(fragment)

    # Result
    result = SplitResult(
        scheme,
        netloc.lower(),
        path,
        query,
        fragment
    )

    if not unsplit:
        return result

    # TODO: check if works with `unsplit=False`
    if strip_protocol or not has_protocol:
        result = urlunsplit(result)[2:]
    else:
        result = urlunsplit(result)
github medialab / ural / ural / youtube.py View on Github external
fix_common_mistakes (bool, optional): Whether to fix common mistakes
            in Youtube urls as you can find them on the web. Defaults to `True`.

    """

    # Inferring redirection
    url = infer_redirection(url)

    # Continuation urls
    m = NEXT_V_RE.search(url) or NESTED_NEXT_V_RE.search(url)

    if m:
        return YoutubeVideo(id=m.group(1))

    # Parsing
    if isinstance(url, SplitResult):
        parsed = url
    else:
        url = ensure_protocol(url)
        parsed = urlsplit(url)

    if not is_youtube_url(parsed):
        return

    _, _, path, query, fragment = parsed

    # youtu.be
    if parsed.hostname.endswith('youtu.be'):

        if path.count('/') > 0:
            v = urlpathsplit(path)[0]
github medialab / ural / ural / normalize_url.py View on Github external
Defaults to True.
        infer_redirection (bool, optional): Whether to attempt resolving common
            redirects by leveraging well-known GET parameters. Defaults to `False`.
        quoted (bool, optional): Normalizing to quoted or unquoted.
            Defaults to True.

    Returns:
        string: The normalized url.

    """
    original_url_arg = url

    if infer_redirection:
        url = resolve(url)

    if isinstance(url, SplitResult):
        has_protocol = bool(splitted.scheme)
        splitted = url
    else:
        has_protocol = PROTOCOL_RE.match(url)

        # Ensuring scheme so parsing works correctly
        if not has_protocol:
            url = 'http://' + url

        # Parsing
        try:
            splitted = urlsplit(url)
        except ValueError:
            return original_url_arg

    scheme, netloc, path, query, fragment = splitted
github medialab / ural / ural / facebook.py View on Github external
def is_facebook_url(url):
    """
    Function returning whether the given url is a valid Facebook url.

    Args:
        url (str): Url to test.

    Returns:
        bool: Whether given url is from Facebook.

    """
    if isinstance(url, SplitResult):
        return bool(re.search(FACEBOOK_DOMAIN_RE, url.hostname))

    return bool(re.match(FACEBOOK_URL_RE, url))
github medialab / ural / ural / youtube.py View on Github external
def is_youtube_url(url):
    """
    Function returning whether the given url is a valid Youtube url.

    Args:
        url (str): Url to test.

    Returns:
        bool: Whether given url is from Youtube.

    """
    if isinstance(url, SplitResult):
        return bool(re.search(YOUTUBE_DOMAINS_RE, url.hostname))

    return bool(re.match(YOUTUBE_URL_RE, url))