Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if line[-1:] == b'\r': line = line[:-1]
if self.state == u'STATUS': self.statusReceived(line); self.state = u'HEADER'
elif self.state == u'HEADER':
if not line or line[0] not in b' \t':
if self._partialHeader is not None:
_temp = b''.join(self._partialHeader).split(b':', 1)
name, value = _temp if len(_temp) == 2 else (_temp[0], b'')
self.headerReceived(name, value.strip())
if not line: self.allHeadersReceived()
else: self._partialHeader = [line]
else: self._partialHeader.append(line)
import twisted.web._newclient
twisted.web._newclient.HTTPParser.lineReceived = lineReceived
# 以下补丁代码:解决 idna 库过于严格,导致带有下划线的 hostname 无法验证通过的异常
import idna.core
_check_label_bak = idna.core.check_label
def check_label(label):
try: return _check_label_bak(label)
except idna.core.InvalidCodepoint: pass
idna.core.check_label = check_label
import json
import types
import traceback
from scrapy import Request
class VSpider(RedisSpider):
name = 'v'
def parse(self, response):
_plusmeta = response._plusmeta.copy()
taskid = _plusmeta.pop('taskid')
spider_name = _plusmeta.pop('spider_name')
if self._partialHeader is not None:
_temp = b''.join(self._partialHeader).split(b':', 1)
name, value = _temp if len(_temp) == 2 else (_temp[0], b'')
self.headerReceived(name, value.strip())
if not line: self.allHeadersReceived()
else: self._partialHeader = [line]
else: self._partialHeader.append(line)
import twisted.web._newclient
twisted.web._newclient.HTTPParser.lineReceived = lineReceived
# 以下补丁代码:解决 idna 库过于严格,导致带有下划线的 hostname 无法验证通过的异常
import idna.core
_check_label_bak = idna.core.check_label
def check_label(label):
try: return _check_label_bak(label)
except idna.core.InvalidCodepoint: pass
idna.core.check_label = check_label
import json
import types
import traceback
from scrapy import Request
class VSpider(RedisSpider):
name = 'v'
def parse(self, response):
_plusmeta = response._plusmeta.copy()
taskid = _plusmeta.pop('taskid')
spider_name = _plusmeta.pop('spider_name')
module_name = _plusmeta.pop('module_name')
__callerr__ = _plusmeta.pop('__callerr__')
# 在传递脚本的 start_requests 执行时会执行一次将脚本加载成对象放入
seen = set()
filtered = []
for d in self.domains:
if d['domain-name'] in seen:
continue
seen.add(d['domain-name'])
if self.__validate_domain(d['domain-name']):
filtered.append(d)
self.domains = filtered
idna.core.check_label = old_func
def __filter_domains(self):
# IDNA encoding's detailed check makes this 4x slower, and we validate
# all requests that just query a domain later on.
old_func = idna.core.check_label
idna.core.check_label = lambda l: None
seen = set()
filtered = []
for d in self.domains:
if d['domain-name'] in seen:
continue
seen.add(d['domain-name'])
if self.__validate_domain(d['domain-name']):
filtered.append(d)
self.domains = filtered
def __filter_domains(self):
# IDNA encoding's detailed check makes this 4x slower, and we validate
# all requests that just query a domain later on.
old_func = idna.core.check_label
idna.core.check_label = lambda l: None
seen = set()
filtered = []
for d in self.domains:
if d['domain-name'] in seen:
continue
seen.add(d['domain-name'])
if self.__validate_domain(d['domain-name']):
filtered.append(d)
self.domains = filtered
idna.core.check_label = old_func