Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def parse_rlslog(self, rlslog_url, task):
"""
:param rlslog_url: Url to parse from
:param task: Task instance
:return: List of release dictionaries
"""
# BeautifulSoup doesn't seem to work if data is already decoded to unicode :/
soup = get_soup(task.requests.get(rlslog_url, timeout=25).content)
releases = []
for entry in soup.find_all('div', attrs={'class': 'entry'}):
release = {}
h3 = entry.find('h3', attrs={'class': 'entrytitle'})
if not h3:
log.debug('FAIL: No h3 entrytitle')
continue
release['title'] = h3.a.contents[0].strip()
entrybody = entry.find('div', attrs={'class': 'entrybody'})
if not entrybody:
log.debug('FAIL: No entrybody')
continue
log.trace('Processing title %s' % (release['title']))
def parse_download_page(self, url, requests):
txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
try:
page = requests.get(url, headers=txheaders)
except requests.exceptions.RequestException as e:
msg = 'Cannot open "%s" : %s' % (url, str(e))
log.error(msg)
raise UrlRewritingError(msg)
try:
soup = get_soup(page.text)
except Exception as e:
raise UrlRewritingError(str(e))
down_link = soup.find('a', attrs={'href': re.compile(r"down\.php\?.*")})
if not down_link:
raise UrlRewritingError('Unable to locate download link from url "%s"' % url)
return 'http://bt.hliang.com/' + down_link.get('href')
else:
category = CATEGORIES.get(config.get('category', 'all'))
filter_url = '/0/%d/%d' % (sort, category)
entries = set()
for search_string in entry.get('search_strings', [entry['title']]):
query = normalize_unicode(search_string)
# TPB search doesn't like dashes or quotes
query = query.replace('-', ' ').replace("'", " ")
# urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
url = '%s/search/%s%s' % (self.url, quote(query.encode('utf-8')), filter_url)
log.debug('Using %s as piratebay search url' % url)
page = task.requests.get(url).content
soup = get_soup(page)
for link in soup.find_all('a', attrs={'class': 'detLink'}):
entry = Entry()
entry['title'] = self.extract_title(link)
if not entry['title']:
log.error('Malformed search result. No title or url found. Skipping.')
continue
href = link.get('href')
if href.startswith('/'): # relative link?
href = self.url + href
entry['url'] = href
tds = link.parent.parent.parent.find_all('td')
entry['torrent_seeds'] = int(tds[-2].contents[0])
entry['torrent_leeches'] = int(tds[-1].contents[0])
entry['torrent_availability'] = torrent_availability(
entry['torrent_seeds'], entry['torrent_leeches']
)
def _solveCaptcha(self, output, url_auth, params, opener):
"""
When trying to connect too many times with wrong password, a captcha can be requested.
This captcha is really simple and can be solved by the provider.
<label for="pass">204 + 65 = </label>
<input value="" id="lgn" name="captchaAnswer" size="40" type="text">
<input value="204 + 65 = " name="captchaQuery" type="hidden">
<input value="005d54a7428aaf587460207408e92145" name="captchaToken" type="hidden">
<br>
:param output: initial login output
:return: output after captcha resolution
"""
html = get_soup(output)
query = html.find('input', {'name': 'captchaQuery'})
token = html.find('input', {'name': 'captchaToken'})
if not query or not token:
log.error('Unable to solve login captcha.')
return output
query_expr = query.attrs['value'].strip('= ')
log.debug('Captcha query: ' + query_expr)
answer = arithmeticEval(query_expr)
log.debug('Captcha answer: %s' % answer)
params['captchaAnswer'] = answer
params['captchaQuery'] = query.attrs['value']
params['captchaToken'] = token.attrs['value']
"""
Gets the download information for 1337x result
"""
url = entry['url']
log.info('1337x rewriting download url: %s' % url)
try:
page = task.requests.get(url)
log.debug('requesting: %s', page.url)
except RequestException as e:
log.error('1337x request failed: %s', e)
raise UrlRewritingError('1337x request failed: %s', e)
soup = get_soup(page.content)
magnet_url = str(soup.find('a', href=re.compile(r'^magnet:\?')).get('href')).lower()
torrent_url = str(soup.find('a', href=re.compile(r'\.torrent$')).get('href')).lower()
entry['url'] = torrent_url
entry.setdefault('urls', []).append(torrent_url)
entry['urls'].append(magnet_url)
def parse_site(self, url, task):
"""Parse configured url and return releases array"""
try:
page = task.requests.get(url).content
except RequestException as e:
raise plugin.PluginError('Error getting input page: %s' % e)
soup = get_soup(page)
releases = []
for entry in soup.find_all('div', attrs={'class': 'entry'}):
release = {}
title = entry.find('h2')
if not title:
log.debug('No h2 entrytitle')
continue
release['title'] = title.a.contents[0].strip()
log.debug('Processing title %s' % (release['title']))
for link in entry.find_all('a'):
# no content in the link
if not link.contents:
continue
def entries_from_search(self, name, url=None):
"""Parses torrent download url from search results"""
name = normalize_unicode(name)
if not url:
url = 'http://www.newtorrents.info/search/%s' % quote(
name.encode('utf-8'), safe=b':/~?=&%'
)
log.debug('search url: %s' % url)
html = requests.get(url).text
# fix so that BS does not crash
# TODO: should use beautifulsoup massage
html = re.sub(r'()', r'\1\2', html)
soup = get_soup(html)
# saving torrents in dict
torrents = []
for link in soup.find_all('a', attrs={'href': re.compile('down.php')}):
torrent_url = 'http://www.newtorrents.info%s' % link.get('href')
release_name = link.parent.next.get('title')
# quick dirty hack
seed = link.find_next('td', attrs={'class': re.compile('s')}).renderContents()
if seed == 'n/a':
seed = 0
else:
try:
seed = int(seed)
except ValueError:
log.warning(
'Error converting seed value (%s) from newtorrents to integer.' % seed
)
except Exception:
discount = expired_time = None # 无优惠
try:
if hr_fn:
hr = hr_fn(detail_page)
else:
hr = False
for item in ['hitandrun', 'hit_run.gif', 'Hit and Run', 'Hit & Run']:
if item in detail_page.text:
hr = True
break
except Exception:
hr = False # 无HR
soup = get_soup(peer_page.replace('\n', ''), 'html5lib')
seeders = leechers = []
tables = soup.find_all('table', limit=2)
if len(tables) == 2: # 1. seeder leecher 均有
seeders = NexusPHP.get_peers(tables[0])
leechers = NexusPHP.get_peers(tables[1])
elif len(tables) == 1 and len(soup.body.contents) == 3: # 2. seeder leecher 有其一
nodes = soup.body.contents
if nodes[1].name == 'table': # 2.1 只有seeder 在第二个节点
seeders = NexusPHP.get_peers(nodes[1])
else: # 2.2 只有leecher 在第三个节点
leechers = NexusPHP.get_peers(nodes[2])
else: # 3. seeder leecher 均无
seeders = leechers = []
return discount, seeders, leechers, hr, expired_time
log.debug('Search Descargas2020')
url_search = 'https://descargas2020.org/buscar'
results = set()
for search_string in entry.get('search_strings', [entry['title']]):
query = normalize_unicode(search_string)
query = re.sub(r' \(\d\d\d\d\)$', '', query)
log.debug('Searching Descargas2020 %s', query)
query = unicodedata.normalize('NFD', query).encode('ascii', 'ignore')
data = {'q': query}
try:
response = task.requests.post(url_search, data=data)
except requests.RequestException as e:
log.error('Error searching Descargas2020: %s', e)
return results
content = response.content
soup = get_soup(content)
soup2 = soup.find('ul', attrs={'class': 'buscar-list'})
children = soup2.findAll('a', href=True)
for child in children:
entry = Entry()
entry['url'] = child['href']
entry_title = child.find('h2')
if entry_title is None:
log.debug('Ignore empty entry')
continue
entry_title = entry_title.text
if not entry_title:
continue
try:
entry_quality_lan = re.search(
r'.+ \[([^\]]+)\](\[[^\]]+\])+$', entry_title
).group(1)
def horrible_entries(requests, page_url):
entries = []
try:
soup = get_soup(requests.get(page_url).content)
except RequestException as e:
log.error('HorribleSubs request failed: %s', e)
return entries
for li_label in soup.findAll('li'):
title = '[HorribleSubs] {0}{1}'.format(
str(li_label.find('span').next_sibling), str(li_label.find('strong').text)
)
log.debug('Found title `%s`', title)
url = li_label.find('a')['href']
episode = re.sub(r'.*#', '', url)
# Get show ID
try:
soup = get_soup(requests.get('https://horriblesubs.info/{0}'.format(url)).content)
except RequestException as e:
log.error('HorribleSubs request failed: %s', e)