How to use the ural.utils.unquote function in ural

To help you get started, we’ve selected a few ural examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github medialab / ural / ural / normalize_url.py View on Github external
if normalize_amp and netloc.startswith('amp-'):
        netloc = netloc[4:]

    # Dropping trailing slash
    if strip_trailing_slash and path.endswith('/'):
        path = path.rstrip('/')

    # Quoting or not
    if quoted:
        path = quote(path)
        query = quote(query, RESERVED_CHARACTERS)
        fragment = quote(fragment, SAFE_CHARACTERS)
    else:
        path = unquote(path)
        query = unquote(query)
        fragment = unquote(fragment)

    # Result
    result = SplitResult(
        scheme,
        netloc.lower(),
        path,
        query,
        fragment
    )

    if not unsplit:
        return result

    # TODO: check if works with `unsplit=False`
    if strip_protocol or not has_protocol:
        result = urlunsplit(result)[2:]
github medialab / ural / ural / infer_redirection.py View on Github external
Args:
        url (string): Target url.

    Returns:
        string: Redirected url or the original url if nothing was found.
    """

    redirection_split = REDIRECTION_DOMAINS_RE.split(url, 1)

    if len(redirection_split) > 1:
        return infer_redirection('https://' + redirection_split[1])

    obvious_redirect_match = re.search(OBVIOUS_REDIRECTS_RE, url)

    if obvious_redirect_match is not None:
        target = unquote(obvious_redirect_match.group(1))

        if target.startswith('http://') or target.startswith('https://'):
            return target

        if target.startswith('/'):
            return urljoin(url, target)

    return url
github medialab / ural / ural / facebook.py View on Github external
def extract_url_from_facebook_link(url):
    m = URL_EXTRACT_RE.search(url)

    if m is None:
        return None

    return unquote(m.group(1))
github medialab / ural / ural / normalize_url.py View on Github external
# Normalizing AMP subdomains
    if normalize_amp and netloc.startswith('amp-'):
        netloc = netloc[4:]

    # Dropping trailing slash
    if strip_trailing_slash and path.endswith('/'):
        path = path.rstrip('/')

    # Quoting or not
    if quoted:
        path = quote(path)
        query = quote(query, RESERVED_CHARACTERS)
        fragment = quote(fragment, SAFE_CHARACTERS)
    else:
        path = unquote(path)
        query = unquote(query)
        fragment = unquote(fragment)

    # Result
    result = SplitResult(
        scheme,
        netloc.lower(),
        path,
        query,
        fragment
    )

    if not unsplit:
        return result

    # TODO: check if works with `unsplit=False`
    if strip_protocol or not has_protocol:
github medialab / ural / ural / normalize_url.py View on Github external
# Normalizing AMP subdomains
    if normalize_amp and netloc.startswith('amp-'):
        netloc = netloc[4:]

    # Dropping trailing slash
    if strip_trailing_slash and path.endswith('/'):
        path = path.rstrip('/')

    # Quoting or not
    if quoted:
        path = quote(path)
        query = quote(query, RESERVED_CHARACTERS)
        fragment = quote(fragment, SAFE_CHARACTERS)
    else:
        path = unquote(path)
        query = unquote(query)
        fragment = unquote(fragment)

    # Result
    result = SplitResult(
        scheme,
        netloc.lower(),
        path,
        query,
        fragment
    )

    if not unsplit:
        return result

    # TODO: check if works with `unsplit=False`
github medialab / ural / ural / google.py View on Github external
def extract_url_from_google_link(url):
    m = URL_EXTRACT_RE.search(url)

    if m is None:
        return None

    return unquote(m.group(1))