How to use the scrapy.Item function in Scrapy

To help you get started, we’ve selected a few Scrapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github amperser / SublimeLinter-contrib-proselint / tests / corpus / newyorker / newyorker / items.py View on Github external
# -*- coding: utf-8 -*-

"""Stuff to pull from a New Yorker article."""

import scrapy


class NewYorkerItem(scrapy.Item):

    """Pull the title, author, text, and link."""

    title = scrapy.Field()
    author = scrapy.Field()
    text = scrapy.Field()
    link = scrapy.Field()
github Grois / Python-Video / video / items.py View on Github external
# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html

import scrapy


class VideoItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    # 名字
    name = scrapy.Field()
    # 一句话描述
    short_desc = scrapy.Field()
    # 评分
    score = scrapy.Field()
    # 主演
    stars = scrapy.Field()
    # 播放量
    hot = scrapy.Field()
    # 播放地址
    play_url = scrapy.Field()
    # 图片
    img = scrapy.Field()
github AllenTom / BangumiSpider / bangumi / items.py View on Github external
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html

import scrapy


class BangumiItem(scrapy.Item):
    # define the fields for your model here like:
    # name = scrapy.Field()
    pass


class BangumiIdItem(scrapy.Item):
    bangumi_id = scrapy.Field()
    bangumi_type = scrapy.Field()
    bangumi_name = scrapy.Field()
    create = scrapy.Field()
    update = scrapy.Field()


class BangumiIdListItem(scrapy.Item):
    bangumi_data = scrapy.Field()





class BangumiBookIDsItem(scrapy.Item):
    bangumi_id_set = scrapy.Field()
github Pelhans / Z_knowledge_graph / ie / craw / craw_all_baidu / baidu_baike / items.py View on Github external
# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html

import scrapy


class BaiduBaikeItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    title = scrapy.Field()
    title_id = scrapy.Field()
    abstract = scrapy.Field()
    infobox = scrapy.Field()
    subject = scrapy.Field()
    disambi = scrapy.Field()
    redirect = scrapy.Field()
    curLink = scrapy.Field()
    interPic = scrapy.Field()
    interLink = scrapy.Field()
    exterLink = scrapy.Field()
    relateLemma = scrapy.Field()
    all_text = scrapy.Field()
github cuckootan / WeiboSpider / WeiboSpider / items.py View on Github external
# 每条微博的所有评论.
    comment_list = scrapy.Field()
    size = scrapy.Field()
    # 爬取时间. 年月日.
    crawl_date = scrapy.Field()

class ForwardItem(scrapy.Item):
    user_id = scrapy.Field()
    post_id = scrapy.Field()
    # 每条微博的所有转发.
    forward_list = scrapy.Field()
    size = scrapy.Field()
    # 爬取时间. 年月日.
    crawl_date = scrapy.Field()

class ThumbupItem(scrapy.Item):
    user_id = scrapy.Field()
    post_id = scrapy.Field()
    # 每条微博的所有点赞.
    thumbup_list = scrapy.Field()
    size = scrapy.Field()
    # 爬取时间. 年月日.
    crawl_date = scrapy.Field()
github roliygu / CNKICrawler / cnki / paper_detail.py View on Github external
pass


class KeyWordLink(AbstractLink):
    pass


class TutorLink(AbstractLink):
    pass


class OrganizationLink(AbstractLink):
    pass


class PaperDetail(scrapy.Item):
    @staticmethod
    def new_instance(url, title, authors, organizations, abstract,  page_num, size,
                     catalog=None, tutors=None, doi=None, keywords=None, download_num=None):
        res = PaperDetail()
        res["url"] = url
        res["title"] = title
        res["authors"] = authors
        res["organizations"] = organizations
        res["abstract"] = abstract
        res["tutors"] = tutors
        res["catalog"] = catalog
        res["page_num"] = page_num
        res["size"] = size
        res["doi"] = doi
        res["keywords"] = keywords
        res["download_num"] = download_num
github Dy1aNT / Exploit-DB-Spider / EDBSpider / items.py View on Github external
# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html

import scrapy


class EdbspiderItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    id = scrapy.Field()
    title = scrapy.Field()
    author = scrapy.Field()
    date = scrapy.Field()
    type = scrapy.Field()
    platform = scrapy.Field()
    # category = scrapy.Field()


class DownloadItem(scrapy.Item):
    files = scrapy.Field()
    file_urls = scrapy.Field()
github SylvanasSun / FishFishJump / fish_core / scrapy / items.py View on Github external
# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html

import scrapy


class CommonItem(scrapy.Item):
    title = scrapy.Field()
    description = scrapy.Field()
    keywords = scrapy.Field()
    p_texts = scrapy.Field()  # Text content in each tag <p>
    url = scrapy.Field()
    crawled_timestamp = scrapy.Field()  # Timestamp of crawl the current page
    links = scrapy.Field()
    links_text = scrapy.Field()  # text of associated each link
    simhash = scrapy.Field()  # Simhash code,depend title,description,keywords,p_texts and links_text
</p>
github kongtrio / hupu_spider / hupu_spider / items.py View on Github external
class HupuPost(scrapy.Item):
    # define the fields for your item here like:
    id = scrapy.Field()
    title = scrapy.Field()
    url = scrapy.Field()
    author = scrapy.Field()
    post_time = scrapy.Field()
    view_count = scrapy.Field()
    reply_count = scrapy.Field()
    content = scrapy.Field()
    type = scrapy.Field()


class HupuPostReply(scrapy.Item):
    hupu_reply_id = scrapy.Field()
    author = scrapy.Field()
    hupu_post_id = scrapy.Field()
    reply_time = scrapy.Field()
    like_count = scrapy.Field()
    floor_num = scrapy.Field()
    content = scrapy.Field()


class HupuImageItem(scrapy.Item):
    image_urls = scrapy.Field()  # 图片的链接
    images = scrapy.Field()
    image_paths = scrapy.Field()
github marlesson / scrapy_tecnoblog / noticias / items.py View on Github external
# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html

import scrapy


class NoticiasItem(scrapy.Item):
    title = scrapy.Field()
    author = scrapy.Field()
    text = scrapy.Field()
    link = scrapy.Field()