Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def getWarningLevel( t_tld_orig, item ):
w_level = 0
if item in t_help:
return 0
if not item.startswith('http'):
item = 'https://'+item
tmp_parse = urlparse( item )
tmp_tld = tldextract.extract( tmp_parse.netloc )
# print(tmp_parse)
if tmp_tld.subdomain == t_tld_orig.subdomain and tmp_tld.domain == t_tld_orig.domain and tmp_tld.suffix == t_tld_orig.suffix:
w_level = 1
elif tmp_tld.domain == t_tld_orig.domain and tmp_tld.suffix == t_tld_orig.suffix:
w_level = 2
else:
w_level = 3
if '*' in tmp_parse.netloc:
w_level+=1
return w_level
def grabSubs( domain ):
print( "[+] Grabbing subdomains from crt.sh: %s" % domain )
url = 'https://crt.sh/?q=%25.' + domain + '&output=json'
try:
ex = 0
r = requests.get( url )
except Exception as e:
ex = 1
print( colored("[-] error occured: %s" % e, 'red') )
if ex == 0 and r.status_code == 200:
n = 0
j = r.json()
for item in j:
parse = tldextract.extract( item['name_value'] )
sub = item['name_value'].replace( '*.', '' )
if sub != domain and not sub in t_subs:
t_subs.append( sub )
try:
ex = 0
data = socket.gethostbyname( sub )
if not data in t_ips:
n = n + 1
t_ips.append( data )
except Exception as e:
ex = 1
print( colored("[+] %d subdomains found, %d ips added" % (len(t_subs),n), 'green') )
rule = line.split('$')[0]
if is_acceptable_rule(rule):
rules.append(rule)
except Exception:
logger.exception('Unexpected error while applying easylist rules.')
abr = AdblockRules(rules)
elapsed = timeit.default_timer() - start_time
logger.info('Took %i secs to parse easylist rules' % elapsed)
i = 0
for url in third_party_requests:
if abr.should_block(url):
ext = tldextract.extract(url)
trackers.append("{}.{}".format(ext.domain, ext.suffix))
i = i + 1
if i % 20 == 0:
elapsed = timeit.default_timer() - start_time
logger.info("Checked %i domains, %i secs elapsed..." % (i, elapsed))
return list(set(trackers))
def get_related_domains(self):
result = []
main_of_domain = tldextract.extract(self.domain).domain
reg_urls = re.compile('<a href="\?id=(.*?)">
urls = reg_urls.findall(self.resp)
reg_domains = re.compile('DNS:(.*?)<br>') #DNS:*.jdpay.com<br>
for item in urls:
url = "https://crt.sh/?id={0}".format(item)
resp = req.get(url, proxies=self.proxy).content
reg_common_name = re.compile("Subject:<br>(.*?)<br>")
common_name = reg_common_name.findall(resp)
if len(common_name) !=0:
common_name = common_name[0].replace(" ", "").split("=")[-1]
main_of_cn_domain = tldextract.extract(common_name).domain</a>
def get_root_domain(full_link: str, use_www=True) ->(False, str, str, str, str, str, str):
"""
get the root domain from url
:param full_link: e.g "http://www.google.com"
:return:Tuple(True is the domain is root domain else Sub-domain, the real root domain, link to root domain,
link to sub.domain, sub.domain, suffix of the domain, domain pure)
"""
scheme = "http"
if full_link.startswith("https"):
scheme = "https"
#scheme, target_domain, a, b, c = urlsplit(full_link)
#scheme = urlsplit(full_link)[0]
scheme += "://"
#ext = tldextract.extract(target_domain)
ext = tldextract.extract(full_link)
root = ext.domain+"."+ext.suffix
prefix = "www."
if len(ext.domain) == 0 or len(ext.suffix) == 0:
return False, "", "", "", "", "", ""
elif ext.subdomain is None or len(ext.subdomain) == 0:
if use_www and prefix not in full_link:
return True, root, scheme+prefix+root, scheme+prefix+root, prefix+root, ext.suffix, ext.domain
else:
return True, root, scheme+root, scheme+root, root, ext.suffix, ext.domain
else:
sub_domain = ext.subdomain+"."+root
if use_www:
return False, root, scheme+prefix+root, scheme+sub_domain, sub_domain, ext.suffix, ext.domain
else:
return False, root, scheme+root, scheme+sub_domain, sub_domain, ext.suffix, ext.domain
def get_domain_from_host(validation_dns_record):
""" Given an FQDN, return the domain
portion of a host
"""
domain_tld_info = tldextract.extract(validation_dns_record)
return "%s.%s" % (domain_tld_info.domain, domain_tld_info.suffix)
def _split_hostname(self, hostname):
""" split hostname into prefix + domain """
ext = tldextract.extract(hostname)
prefix = ext.subdomain
domain = ext.registered_domain
if not prefix:
prefix = '@'
return prefix, domain
result = set()
for m in match:
candidate = m.group(0).lower()
if '.' not in candidate:
continue
if not re.match('[a-z]+', candidate):
continue
if not re.match('[a-z0-9]+\.[a-z0-9]', candidate):
continue
tld = tldextract.extract(candidate)
if tld.suffix:
result.add(tld.domain + '.' + tld.suffix.rstrip('.'))
return list(result)
def _concat_full(url, suffix):
url_extract = extract(url)
site = url_extract.domain + "." + url_extract.suffix
if url_extract.subdomain != "":
site = url_extract.subdomain + "." + site
return site + suffix
def gwhEngine(target, wordlist, method, redirects=False):
error_codes_non_redir=[200,403]
error_codes_redir= [200,301,302,403]
with open(wordlist) as dirPerLine:
for dir in dirPerLine:
cleanDirName=str(dir.rstrip('\n'))
fullURL=tldextract.extract(target)
getHostname=fullURL.domain
resultFile=open(str(getHostname)+'.csv', 'a')
badResults=open(str(getHostname)+'_ignored.csv', 'a')
csvWritingObject = csv.writer(resultFile)
BadResultObject=csv.writer(badResults)
if method=="HEAD" and redirects=="False":
gwhRequester=requests.head(target+cleanDirName,verify=False)
gwhStatus=gwhRequester.status_code
if gwhStatus in error_codes_non_redir:
csvWritingObject.writerow( (target+cleanDirName, gwhStatus) )
resultFile.close()
print target+cleanDirName+" => "+ str(gwhStatus)
elif method=="HEAD" and redirects=="True":
gwhRequester=requests.head(target+cleanDirName,verify=False)
gwhStatus=gwhRequester.status_code
if gwhStatus in error_codes_redir: