How to use the scrapy.Field function in Scrapy

To help you get started, we’ve selected a few Scrapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github luisramirez-m / mercadolibre-scrapy / mercado / items.py View on Github external
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html

import scrapy


class MercadoItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()

    #info de producto
    titulo = scrapy.Field()
    modelo = scrapy.Field()
    marca = scrapy.Field()
    tecnologia = scrapy.Field()
    tipo = scrapy.Field()
    precio = scrapy.Field()
    condicion = scrapy.Field()
    envio = scrapy.Field()
    ubicacion = scrapy.Field()
    opiniones = scrapy.Field()

    #imagenes
    image_urls = scrapy.Field()
    images = scrapy.Field()
    image_name = scrapy.Field()


    #info de la tienda o vendedor
github mattdennewitz / baseball-brooks-pitch-importer / brooks / brooks / items.py View on Github external
import scrapy


class Pitch(scrapy.Item):
    date_stamp = scrapy.Field()
    park_sv_id = scrapy.Field()
    play_guid = scrapy.Field()
    ab_total = scrapy.Field()
    ab_count = scrapy.Field()
    pitcher_id = scrapy.Field()
    batter_id = scrapy.Field()
    ab_id = scrapy.Field()
    des = scrapy.Field()
    type = scrapy.Field()
    id = scrapy.Field()
    sz_top = scrapy.Field()
    sz_bot = scrapy.Field()
    pfx_xdatafile = scrapy.Field()
    pfx_zdatafile = scrapy.Field()
    mlbam_pitch_name = scrapy.Field()
    zone_location = scrapy.Field()
    pitch_con = scrapy.Field()
    stand = scrapy.Field()
    strikes = scrapy.Field()
    balls = scrapy.Field()
    p_throws = scrapy.Field()
    gid = scrapy.Field()
    pdes = scrapy.Field()
    spin = scrapy.Field()
    norm_ht = scrapy.Field()
    inning = scrapy.Field()
github code4romania / czl-scrape / sgg / sgg / sgg / spiders / sgg_spider.py View on Github external
import json 
import hashlib 

base_url = "http://www.sgg.ro"



class Item(scrapy.Item):
    identifier = scrapy.Field()
    title = scrapy.Field()
    type = scrapy.Field()
    institution = scrapy.Field()
    date = scrapy.Field()
    description = scrapy.Field()
    feedback_days = scrapy.Field()
    contact = scrapy.Field()
    documents = scrapy.Field() 
    
def xtract(obj, sel):
    ret = obj.xpath(sel).extract_first()

    if ret: 
        ret = " ".join(map(lambda s : s.strip(), ret.splitlines()))
        return ret
    return ""

def identify(institution, titlu):

    return " : ".join([hashlib.md5(titlu.encode('utf-8')).hexdigest(), institution])

class SggSpider(scrapy.Spider):
    name = "sgg_spider"
github Shinichi-Nakagawa / scrapy-sample-baseball / baseball / baseball / items.py View on Github external
ab = Field()        # 打数
    r = Field()         # 得点
    h = Field()         # 安打
    double = Field()    # 二塁打
    triple = Field()    # 三塁打
    hr = Field()        # 本塁打
    tb = Field()        # 塁打
    rbi = Field()       # 打点
    sb = Field()        # 盗塁
    cs = Field()        # 盗塁死
    sh = Field()        # 犠打(バント)
    sf = Field()        # 犠飛(犠牲フライ)
    bb = Field()        # 四球
    ibb = Field()       # 故意四球(敬遠)
    hbp = Field()       # 死球(デットボール)
    so = Field()        # 三振
    dp = Field()        # 併殺
    ba = Field()        # 打率
    slg = Field()       # 長打率
    obp = Field()       # 出塁率


class PitcherItem(Item):
    year = Field()      # 年度
    team = Field()      # チーム
    name = Field()      # 名前
    throw = Field()     # 右投げ or 左投げ
    games = Field()     # 登板数
    w = Field()         # 勝利
    l = Field()         # 敗北
    sv = Field()        # セーブ
    hld = Field()       # ホールド
github inspirehep / hepcrawl / hepcrawl / items.py View on Github external
thesis = scrapy.Field()
    """Thesis information

    Example:
        ::

            [{
                'date': '',
                'defense_date': '',
                'institutions': [],
                'degree_type': '',
            }]
    """

    urls = scrapy.Field()
    """URLs to splash page.

    Example:
        ::

            ['http://hdl.handle.net/1885/10005']
    """

    external_system_numbers = scrapy.Field()
    """External System Numbers

    Example:
        ::

            [
                {
github mattdennewitz / baseball-brooks-pitch-importer / brooks / brooks / items.py View on Github external
strikes = scrapy.Field()
    balls = scrapy.Field()
    p_throws = scrapy.Field()
    gid = scrapy.Field()
    pdes = scrapy.Field()
    spin = scrapy.Field()
    norm_ht = scrapy.Field()
    inning = scrapy.Field()
    pitcher_team = scrapy.Field()
    tstart = scrapy.Field()
    vystart = scrapy.Field()
    ftime = scrapy.Field()
    pfx_x = scrapy.Field()
    pfx_z = scrapy.Field()
    uncorrected_pfx_x = scrapy.Field()
    uncorrected_pfx_z = scrapy.Field()
    x0 = scrapy.Field()
    y0 = scrapy.Field()
    z0 = scrapy.Field()
    vx0 = scrapy.Field()
    vy0 = scrapy.Field()
    vz0 = scrapy.Field()
    ax = scrapy.Field()
    ay = scrapy.Field()
    az = scrapy.Field()
    start_speed = scrapy.Field()
    px = scrapy.Field()
    pz = scrapy.Field()
    pxold = scrapy.Field()
    pzold = scrapy.Field()
    tm_spin = scrapy.Field()
    sb = scrapy.Field()
github Germey / Zhihu / zhihuuser / items.py View on Github external
url_token = Field()
    gender = Field()
    cover_url = Field()
    type = Field()
    badge = Field()

    answer_count = Field()
    articles_count = Field()
    commercial_question_count = Field()
    favorite_count = Field()
    favorited_count = Field()
    follower_count = Field()
    following_columns_count = Field()
    following_count = Field()
    pins_count = Field()
    question_count = Field()
    thank_from_count = Field()
    thank_to_count = Field()
    thanked_count = Field()
    vote_from_count = Field()
    vote_to_count = Field()
    voteup_count = Field()
    following_favlists_count = Field()
    following_question_count = Field()
    following_topic_count = Field()
    marked_answers_count = Field()
    mutual_followees_count = Field()
    hosted_live_count = Field()
    participated_live_count = Field()

    locations = Field()
    educations = Field()
github Dengqlbq / ZhiHuSpider / zhihu / zhihu / items.py View on Github external
class ZhihuItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    title = scrapy.Field()


class ZhihuQuestionItem(scrapy.Item):

    name = scrapy.Field()
    url = scrapy.Field()
    keywords = scrapy.Field()
    answer_count = scrapy.Field()
    comment_count = scrapy.Field()
    flower_count = scrapy.Field()
    date_created = scrapy.Field()


class ZhihuAnswerItem(scrapy.Item):

    question_id = scrapy.Field()
    author = scrapy.Field()
    ans_url = scrapy.Field()
    comment_count = scrapy.Field()
    upvote_count = scrapy.Field()
    excerpt = scrapy.Field()
github ZhiqiKou / Scrapy_notes / ScrapyTest08 / dytt_redis_slaver / dytt_redis_slaver / items.py View on Github external
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html

import scrapy


class DyttRedisSlaverItem(scrapy.Item):
    # 片名
    name = scrapy.Field()
    # 年代
    year = scrapy.Field()
    # 语言
    language = scrapy.Field()
    # 类别
    movie_type = scrapy.Field()
    # 上映日期
    release_date = scrapy.Field()
    # 评分
    score = scrapy.Field()
    # 文件大小
    file_size = scrapy.Field()
    # 片长
    film_time = scrapy.Field()
    # 简介
    introduction = scrapy.Field()
    # 海报
    posters = scrapy.Field()
    # 下载链接
    download_link = scrapy.Field()
github nannantingyu / spider-scrapy / crawl / items.py View on Github external
description = scrapy.Field()

class CrawlArticleDetailItem(scrapy.Item):
    source_id = scrapy.Field()
    body = scrapy.Field()
    keywords = scrapy.Field()

class CrawlHotkey(scrapy.Item):
    time = scrapy.Field()
    keyword = scrapy.Field()
    order = scrapy.Field()
    source_id = scrapy.Field()

class CrawlWeibo(scrapy.Item):
    pub_time = scrapy.Field()
    content = scrapy.Field()
    author_name = scrapy.Field()
    author_link = scrapy.Field()
    author_img = scrapy.Field()
    source_url = scrapy.Field()
    source_id = scrapy.Field()
    images = scrapy.Field()