How to use the ural.utils.safe_urlsplit function in ural

To help you get started, we’ve selected a few ural examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github medialab / ural / ural / facebook.py View on Github external
def is_facebook_link(url):
    splitted = safe_urlsplit(url)

    if not splitted.hostname or '.facebook.' not in splitted.hostname:
        return False

    if splitted.path != '/l.php':
        return False

    return True
github medialab / ural / ural / google.py View on Github external
def is_amp_url(url):
    splitted = safe_urlsplit(url)

    if splitted.hostname.endswith('.ampproject.org'):
        return True

    if splitted.hostname.startswith('amp-'):
        return True

    if splitted.hostname.startswith('amp.'):
        return True

    if '/amp/' in splitted.path:
        return True

    if AMP_SUFFIXES_RE.search(splitted.path):
        return True
github medialab / ural / ural / facebook.py View on Github external
def parse_facebook_url(url, allow_relative_urls=False):

    # Allowing relative urls scraped from facebook?
    if (
        allow_relative_urls and
        not url.startswith('http://') and
        not url.startswith('https://') and
        'facebook.' not in url
    ):
        url = urljoin(BASE_FACEBOOK_URL, url)
    else:
        if not is_facebook_url(url):
            return None

    splitted = safe_urlsplit(url)

    if not splitted.path or splitted.path == '/':
        return None

    # Obvious post path
    if '/posts/' in splitted.path:
        parts = urlpathsplit(splitted.path)

        parent_id_or_handle = parts[0]

        if NUMERIC_ID_RE.match(parent_id_or_handle):
            return FacebookPost(parts[2], parent_id=parent_id_or_handle)

        return FacebookPost(parts[2], parent_handle=parent_id_or_handle)

    # Ye olded permalink path
github medialab / ural / ural / google.py View on Github external
def is_google_link(url):
    splitted = safe_urlsplit(url)

    if not splitted.hostname or 'google.' not in splitted.hostname:
        return False

    if splitted.path != '/url':
        return False

    return True