Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def parse(raw_url, custom_rules=None, user_agent=None):
if raw_url is None and user_agent is None:
return Referrer.BLANK_REFERRER
raw_url = raw_url.strip().decode("utf-8").replace('\x00', '').encode("utf-8")
rules = custom_rules or Referrer.rules
url = urlparse(raw_url)
domain_info = tldextract.extract(raw_url)
user_agent_info = Referrer.extract_user_agent_info(user_agent)
referrer = {
'type': Referrer.Types.INDIRECT,
'url': raw_url or user_agent_info['url'],
'subdomain': domain_info.subdomain,
'domain': domain_info.domain or user_agent_info['domain'],
'label': domain_info.domain.title(),
'tld': domain_info.suffix or user_agent_info['tld'],
'path': url.path,
'query': ''
}
if Referrer.is_valid_url(url, domain_info):
# First check for an exact match of the url. Then check for a match with different combinations of domain, subdomain and tld
known_url = rules.get(url.netloc + url.path)\
or rules.get(domain_info.registered_domain + url.path)\
known_url = rules.get(url.netloc + url.path)\
or rules.get(domain_info.registered_domain + url.path)\
or rules.get(url.netloc)\
or rules.get(domain_info.registered_domain)
if known_url:
referrer['label'] = known_url['label']
referrer['type'] = known_url['type']
referrer['query'] = Referrer.parse_query_string(url, known_url.get('parameters'))
elif user_agent_info['registered_domain']:
known_url = rules.get(user_agent_info['registered_domain'])
if known_url:
referrer['label'] = known_url['label']
referrer['type'] = known_url['type']
referrer['query'] = Referrer.parse_query_string(url, known_url.get('parameters'))
else:
referrer['type'] = Referrer.Types.INVALID if raw_url else Referrer.Types.DIRECT
referrer['google_search_type'] = Referrer.google_search_type(referrer['type'], referrer['label'], referrer['path'])
return referrer
def parse(raw_url, custom_rules=None, user_agent=None):
if raw_url is None and user_agent is None:
return Referrer.BLANK_REFERRER
raw_url = raw_url.strip()
rules = custom_rules or Referrer.rules
url = urlparse(raw_url)
domain_info = tldextract.extract(raw_url)
user_agent_info = Referrer.extract_user_agent_info(user_agent)
referrer = {
'type': Referrer.Types.INDIRECT,
'url': raw_url or user_agent_info['url'],
'subdomain': domain_info.subdomain,
'domain': domain_info.domain or user_agent_info['domain'],
'label': domain_info.domain.title(),
'tld': domain_info.suffix or user_agent_info['tld'],
'path': url.path,
'query': ''
}
if Referrer.is_valid_url(url, domain_info):
# First check for an exact match of the url. Then check for a match with different combinations of domain, subdomain and tld
known_url = rules.get(url.netloc + url.path) \
or rules.get(domain_info.registered_domain + url.path) \
or rules.get(url.netloc) \
or rules.get(domain_info.registered_domain)
def parse(raw_url, custom_rules=None, user_agent=None):
if raw_url is None and user_agent is None:
return Referrer.BLANK_REFERRER
raw_url = raw_url.strip()
rules = custom_rules or Referrer.rules
url = urlparse(raw_url)
domain_info = tldextract.extract(raw_url)
user_agent_info = Referrer.extract_user_agent_info(user_agent)
referrer = {
'type': Referrer.Types.INDIRECT,
'url': raw_url or user_agent_info['url'],
'subdomain': domain_info.subdomain,
'domain': domain_info.domain or user_agent_info['domain'],
'label': domain_info.domain.title(),
'tld': domain_info.suffix or user_agent_info['tld'],
'path': url.path,
'query': ''
}
if Referrer.is_valid_url(url, domain_info):
# First check for an exact match of the url. Then check for a match with different combinations of domain, subdomain and tld
known_url = rules.get(url.netloc + url.path) \
or rules.get(domain_info.registered_domain + url.path) \
def parse(raw_url, custom_rules=None, user_agent=None):
if raw_url is None and user_agent is None:
return Referrer.BLANK_REFERRER
raw_url = raw_url.strip().decode("utf-8").replace('\x00', '').encode("utf-8")
rules = custom_rules or Referrer.rules
url = urlparse(raw_url)
domain_info = tldextract.extract(raw_url)
user_agent_info = Referrer.extract_user_agent_info(user_agent)
referrer = {
'type': Referrer.Types.INDIRECT,
'url': raw_url or user_agent_info['url'],
'subdomain': domain_info.subdomain,
'domain': domain_info.domain or user_agent_info['domain'],
'label': domain_info.domain.title(),
'tld': domain_info.suffix or user_agent_info['tld'],
'path': url.path,
'query': ''
}
if Referrer.is_valid_url(url, domain_info):
def parse(raw_url, custom_rules=None, user_agent=None):
if raw_url is None and user_agent is None:
return Referrer.BLANK_REFERRER
raw_url = raw_url.strip()
rules = custom_rules or Referrer.rules
url = urlparse(raw_url)
domain_info = tldextract.extract(raw_url)
user_agent_info = Referrer.extract_user_agent_info(user_agent)
referrer = {
'type': Referrer.Types.INDIRECT,
'url': raw_url or user_agent_info['url'],
'subdomain': domain_info.subdomain,
'domain': domain_info.domain or user_agent_info['domain'],
'label': domain_info.domain.title(),
'tld': domain_info.suffix or user_agent_info['tld'],
'path': url.path,
'query': ''
}
if Referrer.is_valid_url(url, domain_info):
'tld': domain_info.suffix or user_agent_info['tld'],
'path': url.path,
'query': ''
}
if Referrer.is_valid_url(url, domain_info):
# First check for an exact match of the url. Then check for a match with different combinations of domain, subdomain and tld
known_url = rules.get(url.netloc + url.path) \
or rules.get(domain_info.registered_domain + url.path) \
or rules.get(url.netloc) \
or rules.get(domain_info.registered_domain)
if known_url:
referrer['label'] = known_url['label']
referrer['type'] = known_url['type']
referrer['query'] = Referrer.parse_query_string(url, known_url.get('parameters'))
elif user_agent_info['registered_domain']:
known_url = rules.get(user_agent_info['registered_domain'])
if known_url:
referrer['label'] = known_url['label']
referrer['type'] = known_url['type']
referrer['query'] = Referrer.parse_query_string(url, known_url.get('parameters'))
else:
referrer['type'] = Referrer.Types.INVALID if raw_url else Referrer.Types.DIRECT
return referrer
def parse(raw_url, custom_rules=None, user_agent=None):
if raw_url is None and user_agent is None:
return Referrer.BLANK_REFERRER
raw_url = raw_url.strip().decode("utf-8").replace('\x00', '').encode("utf-8")
rules = custom_rules or Referrer.rules
url = urlparse(raw_url)
domain_info = tldextract.extract(raw_url)
user_agent_info = Referrer.extract_user_agent_info(user_agent)
referrer = {
'type': Referrer.Types.INDIRECT,
'url': raw_url or user_agent_info['url'],
'subdomain': domain_info.subdomain,
'domain': domain_info.domain or user_agent_info['domain'],
'label': domain_info.domain.title(),
'tld': domain_info.suffix or user_agent_info['tld'],
'path': url.path,
'query': ''
}
url = urlparse(raw_url)
domain_info = tldextract.extract(raw_url)
user_agent_info = Referrer.extract_user_agent_info(user_agent)
referrer = {
'type': Referrer.Types.INDIRECT,
'url': raw_url or user_agent_info['url'],
'subdomain': domain_info.subdomain,
'domain': domain_info.domain or user_agent_info['domain'],
'label': domain_info.domain.title(),
'tld': domain_info.suffix or user_agent_info['tld'],
'path': url.path,
'query': ''
}
if Referrer.is_valid_url(url, domain_info):
# First check for an exact match of the url. Then check for a match with different combinations of domain, subdomain and tld
known_url = rules.get(url.netloc + url.path)\
or rules.get(domain_info.registered_domain + url.path)\
or rules.get(url.netloc)\
or rules.get(domain_info.registered_domain)
if known_url:
referrer['label'] = known_url['label']
referrer['type'] = known_url['type']
referrer['query'] = Referrer.parse_query_string(url, known_url.get('parameters'))
elif user_agent_info['registered_domain']:
known_url = rules.get(user_agent_info['registered_domain'])
if known_url:
referrer['label'] = known_url['label']
referrer['type'] = known_url['type']
def parse(raw_url, custom_rules=None, user_agent=None):
if raw_url is None and user_agent is None:
return Referrer.BLANK_REFERRER
raw_url = raw_url.strip().decode("utf-8").replace('\x00', '').encode("utf-8")
rules = custom_rules or Referrer.rules
url = urlparse(raw_url)
domain_info = tldextract.extract(raw_url)
user_agent_info = Referrer.extract_user_agent_info(user_agent)
referrer = {
'type': Referrer.Types.INDIRECT,
'url': raw_url or user_agent_info['url'],
'subdomain': domain_info.subdomain,
'domain': domain_info.domain or user_agent_info['domain'],
'label': domain_info.domain.title(),
'tld': domain_info.suffix or user_agent_info['tld'],
'path': url.path,
'query': ''
}
if Referrer.is_valid_url(url, domain_info):
# First check for an exact match of the url. Then check for a match with different combinations of domain, subdomain and tld
known_url = rules.get(url.netloc + url.path)\
or rules.get(domain_info.registered_domain + url.path)\
or rules.get(url.netloc)\
or rules.get(domain_info.registered_domain)