Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Test MongoPipeline
"""
from bson.son import SON
from pymongo.results import InsertOneResult
from scrapy import Spider
from scrapy.item import Field, Item
from scrapy.settings import Settings
from twisted.internet.defer import inlineCallbacks
from twisted.trial.unittest import TestCase
from txmongo.filter import ASCENDING, DESCENDING
from scrapy_pipelines.pipelines.mongo import MongoPipeline, get_args
from scrapy_pipelines.settings import default_settings
class TempItem(Item):
"""
A item class just for test purpose
"""
a = Field()
b = Field()
class TestGetArgs(TestCase):
"""
Test the functions in MongoPipeline
"""
def test_get_args(self):
"""
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class SneakerPoliticsItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class UrbanIndustryItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class UrbanOutfittersItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class LuisaItem(Item):
class ZhihuFollowersItem(Item):
_id=Field()
username = Field()
followers = Field()
class DoubanbookItem(Item):
# define the fields for your item here like:
# name = Field()
title = Field()
link = Field()
desc = Field()
num = Field()
class DoubanSubjectItem(Item):
title = Field()
link = Field()
info = Field()
rate = Field()
votes = Field()
content_intro = Field()
author_intro = Field()
tags = Field()
return ('', '')
else:
return ('__isnull', True)
else:
return None
value = get_field_value(item, field, value, pipeline, spider)
if isinstance(field, AddressField):
return ('', value)
elif isinstance(field, ManyToManyField):
return ('__contains', value)
elif isinstance(field, FileField):
return ('__contains', os.path.basename(value))
return ('', value)
class DjangoItem(Item):
__metaclass__ = DjangoItemMeta
django_model = None
# We use the "id" field to form links between models for related fields.
id = Field(input_processor=MapCompose(RemoveEntities(), Strip()), output_processor=TakeFirst())
scrape_url = Field(output_processor=TakeFirst())
def save(self, pipeline, spider):
# Convert all our values as needed. Mostly for addresses, dammit.
for field in self._model_fields:
value = self.get(field.name)
if value not in ['', None, []]:
self[field.name] = get_field_value(self, field, value, pipeline, spider)
# Create a search filter, beginning by adding all my unique fields.
fltr = {}
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/topics/items.html
from scrapy.item import Item, Field
class DianpingShopItem(Item):
link_url = Field()
name = Field()
n_rating = Field()
rating = Field()
taste_rating = Field()
service_rating = Field()
atmosphere_rating = rating = Field()
address = Field()
tel = Field()
category = Field()
city = Field()
avg_cost = Field()
bread_crumb = Field()
def parse(self, response):
item = Item()
l = ItemLoader(item=item, response=response)
for name, xpath in response.meta['fields'].items():
if xpath:
item.fields[name] = Field()
l.add_xpath(name, xpath)
return l.load_item()
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class DefShopItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class OffSpringItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class SoleKitchenItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class DromeItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class StickABushItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class KongItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class SaveOurSoleItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class InflammableItem(Item):
# Define here the models for your scraped items
#
# See documentation in:
# http://doc.scrapy.org/en/latest/topics/items.html
from scrapy.item import Item, Field
class sinanewsItem(Item):
# define the fields for your item here like:
name = Field()
content = Field()
url = Field()
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class FootDistrictItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class SizeItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class YCMCItem(Item):
date = Field()
price = Field()
image = Field()
link = Field()
name = Field()
size = Field()
class CityItem(Item):