Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import time
import psycopg2
import logging
import queuelib
from queuelib import QueueService
import urlparse
import requests
class CategoryImporter(QueueService):
CATEGORIFY_API = 'https://categorify.org/api'
QUEUE_NAME = 'category'
def setup_bindings(self):
self.ch.queue_declare("category", durable=True, auto_delete=False)
self.ch.queue_bind("category", "org.blocked", "url.org")
self.ch.queue_bind("category", "org.blocked", "url.public")
self.session = requests.session()
def process_message(self, data):
url = data['url']
parsed_url = urlparse.urlparse(url)
domain = parsed_url.netloc.lower()
if domain.startswith('www.'):
domain = domain.split('.', 1)[-1]
import queuelib
from queuelib import QueueService
import requests
import bs4
"""
This daemon listens on a dedicated queue for URLs fetch, and extracts metadata from the HTML.
The metadata is saved to the site_description table.
"""
class MetadataGatherer(QueueService):
QUEUE_NAME = 'metadata'
def __init__(self):
super(MetadataGatherer, self).__init__()
self.count = 0
self.headers = {'User-agent': self.config.get('useragent')}
def save_description(self, url, data):
c = self.conn.cursor()
#c.execute("update urls set description = %s where url = %s", )
c.execute("""insert into site_description(urlid, created, description)
select urlID, now(), %s from urls where url = %s""",
[json.dumps(data), url]
)
if data.get('title'):
c.execute("update urls set title=%s where url = %s",
[data['title'], url])
self.secret = secret.encode('utf8')
def sign(self, *args):
msg = ':'.join(
[str(x) if not isinstance(x, (unicode,str)) else x for x in args])
logging.debug("Using signature string: %s", msg)
hm = hmac.new(self.secret, msg.encode('utf8'), hashlib.sha512)
return hm.hexdigest()
def get_signature(self, args, keys):
return self.sign(*[args[x] for x in keys])
def timestamp(self):
return datetime.datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
class CloudflareProbe(QueueService):
CLOUDFLARE_API = 'https://family.cloudflare-dns.com/dns-query'
QUEUE_NAME = 'url.cloudflare_family.org'
HEADERS = {
'Accept': 'application/dns-json',
'User-Agent': 'CloudflareProbe/1.0 (+https://www.blocked.org.uk)'
}
def __init__(self):
super(CloudflareProbe, self).__init__()
self.signer = RequestSigner(self.cfg.get('cf-probe','probe_secret'))
self.network = self.cfg.get('cf-probe','network_name')
def setup_bindings(self):
self.session = requests.session()
def connect_db(self):
import logging
import psycopg2
import resource
import urlparse
import subprocess
import queuelib
from queuelib import QueueService
"""
This daemon listens on a dedicated queue for URLs fetch, and saves the whois expiry date
"""
class WhoisLookup(QueueService):
AGE = 180
QUEUE_NAME = 'whois'
def __init__(self):
super(WhoisLookup, self).__init__()
self.count = 0
def setup_bindings(self):
self.ch.queue_declare(self.QUEUE_NAME, durable=True, auto_delete=False)
self.ch.queue_bind(self.QUEUE_NAME, "org.blocked", "url.org")
self.ch.queue_bind(self.QUEUE_NAME, "org.blocked", "url.public")
def check_expiry_cache(self, url):
"""Returns True if cache is expired"""
c = self.conn.cursor()
c.execute("""select whois_expiry_last_checked