Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from ural.normalize_url import normalize_url
from ural.ensure_protocol import ensure_protocol
with open('./scripts/data/amp-urls.txt') as f:
for url in f:
url = url.strip()[1:-1]
url = normalize_url(url)
print(ensure_protocol(url))
def normalized_lru_stems(url, tld_aware=False, **kwargs):
full_url = ensure_protocol(url)
parsed_url = normalize_url(full_url, unsplit=False, **kwargs)
return lru_stems_from_parsed_url(parsed_url, tld_aware=tld_aware)
url (str): Target URL as a string.
sort_query (bool, optional): Whether to sort query items or not.
Defaults to `True`.
strip_authentication (bool, optional): Whether to drop authentication.
Defaults to `True`.
strip_trailing_slash (bool, optional): Whether to drop trailing slash.
Defaults to `False`.
strip_index (bool, optional): Whether to drop trailing index at the end
of the url. Defaults to `True`.
Returns:
list: The normalized lru, with a prefix identifying the type of each part.
"""
full_url = ensure_protocol(url, protocol=default_protocol)
return parsed_url_to_lru(normalize_url(
full_url, parsed=True, **kwargs))
def normalized_lru_stems(url, tld_aware=False, **kwargs):
full_url = ensure_protocol(url)
parsed_url = normalize_url(full_url, unsplit=False, **kwargs)
return lru_stems_from_parsed_url(parsed_url, tld_aware=tld_aware)