Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# http://doc.scrapy.org/en/latest/topics/items.html
from scrapy.item import Item, Field
class ZhihuPeopleItem(Item):
# define the fields for your item here like:
id = Field()
name = Field()
sign = Field()
location = Field()
business = Field()
employment = Field()
position = Field()
education = Field()
education_extra = Field()
description = Field()
agree = Field()
thanks = Field()
asks = Field()
answers = Field()
posts = Field()
collections = Field()
logs = Field()
followees = Field()
followers = Field()
follow_topics = Field()
from scrapy.item import Item, Field
from scrapy.contrib.loader import XPathItemLoader
from scrapy.contrib.exporter import BaseItemExporter
from scrapy.contrib.loader.processor import MapCompose, TakeFirst
class Proxy(Item):
address = Field()
port = Field()
class ProxyItemLoader(XPathItemLoader):
default_output_processor = TakeFirst()
address_in = MapCompose(unicode, unicode.strip)
port_in = MapCompose(int)
class IPPortItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs):
self._configure(kwargs, dont_fail=True)
self.file = file
def export_item(self, item):
return self.file.write('{item[address]}:{item[port]}\n'.format(item=item))
bottomline = Field()
duty = Field()
xxx = Field()
class SisForumListItem(Item):
content = Field() # raw content with all html
title = Field()
thread_type = Field()
author = Field()
post_time = Field()
link = Field()
star = Field()
comment = Field()
view = Field()
size = Field()
video_type = Field()
last_post_time = Field()
__author__ = 'carpedm20'
__date__ = '2014.07.25'
from scrapy.spider import BaseSpider
from scrapy.selector import HtmlXPathSelector
# http://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=cnt&date=20050207&tg=0
from scrapy.item import Item, Field
class Movie(Item):
name = Field()
url = Field()
rank = Field()
date = Field()
#tgs = range(20)
#tgs.remove(9)
def make_urls(tg):
url = "http://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=cnt&date=%s&tg=%s"
urls = []
from datetime import date, timedelta
current_date = date(2005, 2, 7)
end_date = date.today()
delta = timedelta(days=1)
while current_date <= end_date:
urls.append(url % (current_date.strftime("%Y%m%d"), tg))
from scrapy.item import Item, Field
class AndroidAppItem(Item):
name = Field()
category = Field()
company = Field()
email = Field()
developer_website = Field()
min_downloads = Field()
max_downloads = Field()
store_url = Field()
is_free = Field()
fiscal_year = Field()
end_date = Field()
revenues = Field()
investment_revenues = Field()
op_income = Field()
net_income = Field()
gross_profit = Field()
interest_expense = Field()
research_and_dev_expense = Field()
eps_basic = Field()
eps_diluted = Field()
dividend = Field()
# Taxes
tax_expense = Field()
net_taxes_paid = Field()
# Balance sheet stuffs
assets = Field()
cur_assets = Field()
acts_pay_current = Field()
acts_receive_current = Field()
acts_receive_noncurrent = Field()
total_liabilities = Field()
total_liabilities_equity = Field()
shares_outstanding = Field()
shares_outstanding_diluted = Field()
common_stock_outstanding = Field()
# Cash flow from operating, investing, and financing
cash_flow_op = Field()
cash_flow_inv = Field()
cash_flow_fin = Field()
class PriceItem(Item):
# Trading symbol
symbol = Field()
# YYYY-MM-DD
date = Field()
open = Field()
close = Field()
high = Field()
low = Field()
adj_close = Field()
volume = Field()
class SymbolItem(Item):
symbol = Field()
name = Field()
import re
import time
try:
import urlparse
except ImportError:
import urllib.parse as urlparse
from scrapy import item
from scrapy import linkextractors
from scrapy import spiders
class SitemapItem(item.Item):
'''Class to represent an item in the sitemap.'''
loc = item.Field()
lastmod = item.Field()
priority = item.Field()
changefreq = item.Field()
class SitemapSpider(spiders.CrawlSpider):
name = 'sitemap'
MAINT_SERIES = [
'newton',
'ocata',
'pike',
'queens',
'rocky',
'stein',
]