How to use the ural.utils.urljoin function in ural

To help you get started, we’ve selected a few ural examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github medialab / ural / ural / infer_redirection.py View on Github external
redirection_split = REDIRECTION_DOMAINS_RE.split(url, 1)

    if len(redirection_split) > 1:
        return infer_redirection('https://' + redirection_split[1])

    obvious_redirect_match = re.search(OBVIOUS_REDIRECTS_RE, url)

    if obvious_redirect_match is not None:
        target = unquote(obvious_redirect_match.group(1))

        if target.startswith('http://') or target.startswith('https://'):
            return target

        if target.startswith('/'):
            return urljoin(url, target)

    return url
github medialab / ural / ural / facebook.py View on Github external
def parse_facebook_url(url, allow_relative_urls=False):

    # Allowing relative urls scraped from facebook?
    if (
        allow_relative_urls and
        not url.startswith('http://') and
        not url.startswith('https://') and
        'facebook.' not in url
    ):
        url = urljoin(BASE_FACEBOOK_URL, url)
    else:
        if not is_facebook_url(url):
            return None

    splitted = safe_urlsplit(url)

    if not splitted.path or splitted.path == '/':
        return None

    # Obvious post path
    if '/posts/' in splitted.path:
        parts = urlpathsplit(splitted.path)

        parent_id_or_handle = parts[0]

        if NUMERIC_ID_RE.match(parent_id_or_handle):
github medialab / ural / ural / facebook.py View on Github external
def url(self):
        if self.handle is None:
            return urljoin(BASE_FACEBOOK_URL, '/profile.php?id=%s' % self.id)

        return urljoin(BASE_FACEBOOK_URL, '/%s' % self.handle)