How to use the comiccrawler.core.Episode function in comiccrawler

To help you get started, we’ve selected a few comiccrawler examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github eight04 / ComicCrawler / comiccrawler / mods / yoedge.py View on Github external
def get_episodes(html, url):
	html = html[:html.index("am-topbar-fixed-bottom")]
	s = []
	
	for m in re.finditer(r'<a>]*?href="([^"]*?/smp-app/[^"]*)"&gt;([^&lt;]+)', html):
		ep_url, title = m.groups()
		s.append(Episode(title, ep_url))
	return s
</a>
github eight04 / ComicCrawler / comiccrawler / mods / weibo.py View on Github external
def get_episodes(html, url):
	if re.match("http://tw\.weibo\.com/\w+/\d+$", url):
		return [Episode("image", url)]
		
	s = []
	pattern = 'class="img_link" href="(http://tw.weibo.com/(\w+/\d+))">'
	for match in re.finditer(pattern, html):
		ep_url, ep_title = match.groups()
		s.append(Episode(ep_title, ep_url))
	return s[::-1]
github eight04 / ComicCrawler / comiccrawler / mods / buka.py View on Github external
def get_episodes(html, url):
	rx = ']+href="(/view/[^"]+)"[^&gt;]*&gt;([^&lt;]+)'
	arr = []
	for match in re.finditer(rx, html):
		ep_url, title = match.groups()
		title = title.strip()
		arr.append(Episode(title, urljoin(url, ep_url)))
	return arr[::-1]
github eight04 / ComicCrawler / comiccrawler / mods / pixiv.py View on Github external
def get_episodes_from_works(works):
	s = []
	for data in sorted(works, key=lambda i: int(i["id"])):
		s.append(Episode(
			"{} - {}".format(data["id"], data["title"]),
			"https://www.pixiv.net/member_illust.php?mode=medium&illust_id={}".format(data["id"])
		))
	return s
github eight04 / ComicCrawler / comiccrawler / mods / sankaku.py View on Github external
def get_episodes(html, url):
	login_check(html)
	s = []
	base = re.search("(https?://[^/]+)", url).group(1)
	for m in re.finditer(r'href="(/(?:[^/]*/)?post/show/(\d+))"', html):
		url, pid = m.groups()
		e = Episode(pid, base + url)
		s.append(e)
	return s[::-1]
github eight04 / ComicCrawler / comiccrawler / mods / tsundora.py View on Github external
def get_episodes(html, url):
	s = []
	# base = re.search("(https?://[^/]+)", url).group(1)
	match = None
	for match in re.finditer(
			r'href="(http://tsundora\.com/(\d+))"class="img_hover_trans"'
			'title="([^"]+)"', html):
		url, id, title = match.groups()
		s.append(Episode(id + " - " + title, url))
	return s[::-1]
github eight04 / ComicCrawler / comiccrawler / mods / dmzj_m.py View on Github external
def get_episodes(html, url):
	data_js = re.search("initIntroData(.+?);", html, re.DOTALL).group(1)
	data = eval(data_js)

	ep_data = []
	for category in data:
		ep_data += category["data"]
	ep_data = sorted(ep_data, key=lambda data: data["chapter_order"])

	episodes = []

	for data in ep_data:
		ep_url = "/view/{}/{}.html".format(data["comic_id"], data["id"])
		title = data["title"] + data["chapter_name"]
		episodes.append(Episode(title, urljoin(url, ep_url)))

	return episodes
github eight04 / ComicCrawler / comiccrawler / mods / nijie.py View on Github external
def get_episodes(html, url):
	s = []
	ep_set = set()
	
	for m in re.finditer(r'
github eight04 / ComicCrawler / comiccrawler / mods / wix.py View on Github external
s = []
	
	for page in pages:
		try:
			data = grabhtml(page["urls"][0])
		except KeyError:
			data = grabhtml("https://static.wixstatic.com/sites/" + page["pageJsonFileName"] + ".z?v=3")
		
		data = json.loads(data)
		data = data["data"]["document_data"]
		
		for item in data.values():
			if item["type"] != "Image":
				continue
			s.append(Episode(
				"{} - {}".format(page["title"], trim_ext(item.get("title", "")) or item["id"])		,
				"https://static.wixstatic.com/media/" + item["uri"],
				image="https://static.wixstatic.com/media/" + item["uri"]
			))
			
	return s