Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# -*- coding: utf-8 -*-
"""Stuff to pull from a New Yorker article."""
import scrapy
class NewYorkerItem(scrapy.Item):
"""Pull the title, author, text, and link."""
title = scrapy.Field()
author = scrapy.Field()
text = scrapy.Field()
link = scrapy.Field()
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class VideoItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
# 名字
name = scrapy.Field()
# 一句话描述
short_desc = scrapy.Field()
# 评分
score = scrapy.Field()
# 主演
stars = scrapy.Field()
# 播放量
hot = scrapy.Field()
# 播放地址
play_url = scrapy.Field()
# 图片
img = scrapy.Field()
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class BangumiItem(scrapy.Item):
# define the fields for your model here like:
# name = scrapy.Field()
pass
class BangumiIdItem(scrapy.Item):
bangumi_id = scrapy.Field()
bangumi_type = scrapy.Field()
bangumi_name = scrapy.Field()
create = scrapy.Field()
update = scrapy.Field()
class BangumiIdListItem(scrapy.Item):
bangumi_data = scrapy.Field()
class BangumiBookIDsItem(scrapy.Item):
bangumi_id_set = scrapy.Field()
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class BaiduBaikeItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
title = scrapy.Field()
title_id = scrapy.Field()
abstract = scrapy.Field()
infobox = scrapy.Field()
subject = scrapy.Field()
disambi = scrapy.Field()
redirect = scrapy.Field()
curLink = scrapy.Field()
interPic = scrapy.Field()
interLink = scrapy.Field()
exterLink = scrapy.Field()
relateLemma = scrapy.Field()
all_text = scrapy.Field()
# 每条微博的所有评论.
comment_list = scrapy.Field()
size = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()
class ForwardItem(scrapy.Item):
user_id = scrapy.Field()
post_id = scrapy.Field()
# 每条微博的所有转发.
forward_list = scrapy.Field()
size = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()
class ThumbupItem(scrapy.Item):
user_id = scrapy.Field()
post_id = scrapy.Field()
# 每条微博的所有点赞.
thumbup_list = scrapy.Field()
size = scrapy.Field()
# 爬取时间. 年月日.
crawl_date = scrapy.Field()
pass
class KeyWordLink(AbstractLink):
pass
class TutorLink(AbstractLink):
pass
class OrganizationLink(AbstractLink):
pass
class PaperDetail(scrapy.Item):
@staticmethod
def new_instance(url, title, authors, organizations, abstract, page_num, size,
catalog=None, tutors=None, doi=None, keywords=None, download_num=None):
res = PaperDetail()
res["url"] = url
res["title"] = title
res["authors"] = authors
res["organizations"] = organizations
res["abstract"] = abstract
res["tutors"] = tutors
res["catalog"] = catalog
res["page_num"] = page_num
res["size"] = size
res["doi"] = doi
res["keywords"] = keywords
res["download_num"] = download_num
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class EdbspiderItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
id = scrapy.Field()
title = scrapy.Field()
author = scrapy.Field()
date = scrapy.Field()
type = scrapy.Field()
platform = scrapy.Field()
# category = scrapy.Field()
class DownloadItem(scrapy.Item):
files = scrapy.Field()
file_urls = scrapy.Field()
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class CommonItem(scrapy.Item):
title = scrapy.Field()
description = scrapy.Field()
keywords = scrapy.Field()
p_texts = scrapy.Field() # Text content in each tag <p>
url = scrapy.Field()
crawled_timestamp = scrapy.Field() # Timestamp of crawl the current page
links = scrapy.Field()
links_text = scrapy.Field() # text of associated each link
simhash = scrapy.Field() # Simhash code,depend title,description,keywords,p_texts and links_text
</p>
class HupuPost(scrapy.Item):
# define the fields for your item here like:
id = scrapy.Field()
title = scrapy.Field()
url = scrapy.Field()
author = scrapy.Field()
post_time = scrapy.Field()
view_count = scrapy.Field()
reply_count = scrapy.Field()
content = scrapy.Field()
type = scrapy.Field()
class HupuPostReply(scrapy.Item):
hupu_reply_id = scrapy.Field()
author = scrapy.Field()
hupu_post_id = scrapy.Field()
reply_time = scrapy.Field()
like_count = scrapy.Field()
floor_num = scrapy.Field()
content = scrapy.Field()
class HupuImageItem(scrapy.Item):
image_urls = scrapy.Field() # 图片的链接
images = scrapy.Field()
image_paths = scrapy.Field()
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
import scrapy
class NoticiasItem(scrapy.Item):
title = scrapy.Field()
author = scrapy.Field()
text = scrapy.Field()
link = scrapy.Field()