How to use the dataset.connect function in dataset

To help you get started, we’ve selected a few dataset examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lepinkainen / pyfibot / pyfibot / modules / available / module_rss.py View on Github external
def init(bot, testing=False):
    """ Initialize updater """
    global DATABASE
    global config
    global botref
    global updater
    global logger

    if testing:
        DATABASE = dataset.connect("sqlite:///:memory:")
    else:
        DATABASE = dataset.connect("sqlite:///databases/rss.db")

    logger.info("RSS module initialized")
    botref = bot
    config = bot.config.get("rss", {})
    finalize()
    # As there's no signal if this is a rehash or restart
    # update feeds in 30 seconds
    updater = callLater(30, update_feeds)
github AIworx-Labs / chocolate / chocolate / connection / sqlite.py View on Github external
def insert_complementary(self, document):
        """Insert a new document (row) in the complementary information table.
        """
        gc.collect()
        db = dataset.connect(self.url)
        return db[self.complementary_table_name].insert(document)
github ghostwords / chameleon-crawler / crawler / collector.py View on Github external
def collect(crawl_id, result_queue, log):
    db = dataset.connect(DATABASE_URL)

    while True:
        if result_queue.empty():
            sleep(0.01)
            continue

        result = result_queue.get()

        if result is None:
            break

        crawl_url, error, result = result

        if not result:
            with db:
                db['result'].insert(dict(
github carlosperate / LightUpPi-Alarm / LightUpAlarm / AlarmDb.py View on Github external
def __connect_settings(self):
        """ Connecting to a SQLite database table 'settings'. """
        settings_table = dataset.connect(self.db_file)['settings']
        return settings_table
github alephdata / opensanctions / sources / zz_wikipedia / scrape.py View on Github external
import dataset
import re
import mwclient

site = mwclient.Site('en.wikipedia.org')
disam = re.compile('\(.*\)$')
engine = dataset.connect('sqlite:///data.sqlite')
pages_table = engine['data']
categories_table = engine['categories']

COLLECTIONS = {
    'uganda': ['Ugandan_politicians', 'Presidents_of_Uganda', 'Ugandan_rebels',
               'Speakers_of_the_Parliament_of_Uganda', 'National_Resistance_Movement_politicians',
               'Prime_Ministers_of_Uganda', 'Government_ministers_of_Uganda',
               'Political_office-holders_in_Uganda'],
    'mozambique': ['Presidents_of_Mozambique', 'Heads_of_state_of_Mozambique',
                   'FRELIMO_politicians', 'Mozambican_politicians_by_party',
                   'Government_ministers_of_Mozambique'],
    'southafrica': ['Members_of_the_National_Assembly_of_South_Africa',
                    'South_African_revolutionaries', 'South_African_people_by_political_party',
                    'South_African_people_by_political_orientation']
}
github abelsonlive / scrape-the-gibson / 04-regex.py View on Github external
import re

#############
# Thready is a very simple code snippet: 
#   http://github.com/pudo/thready
#############
from thready import threaded

# A list of missed connections in New York
BASE_URL = 'http://newyork.craigslist.org/'

# a directory for cacheing file's we've already downloaded
CACHE_DIR = os.path.join(os.path.dirname(__file__), 'cache')

# connect to our database
database = dataset.connect('sqlite:///missed_connections.db')

# get a table
table = database['missed_connections']

# a regular expression to extract metadata from the subject
re_subject = re.compile(r"(.*) - ([a-z]{1,2}4[a-z]{1,2})( - ([0-9]{1,2}))?( \((.*)\))?")

def parse_subject(soup):
    """
    Extract additional metadata from the missed connection's subject
    """
    # extract the subject
    raw_subject = soup.find("h2", {'class': 'postingtitle'}).text.strip()
    
    # apply our regular expression
    m = re_subject.search(raw_subject)
github abelsonlive / scrape-the-gibson / 03-multithreading.py View on Github external
import requests
from bs4 import BeautifulSoup
from pprint import pprint
from urlparse import urljoin
from thready import threaded
import dataset
import os
from hashlib import sha1

# A list of missed connections in New York
BASE_URL = 'http://newyork.craigslist.org/'

# connect to our database
db = dataset.connect('sqlite:///missed_connections.db')

# a directory for caching file's we've already downloaded
CACHE_DIR = os.path.join(os.path.dirname(__file__), 'cache')

def url_to_filename(url):
    """ Make a URL into a file name, using SHA1 hashes. """

    # use a sha1 hash to convert the url into a unique filename
    hash_file = sha1(url).hexdigest() + '.html'
    return os.path.join(CACHE_DIR, hash_file)


def store_local(url, content):
    """ Save a local copy of the file. """

    # If the cache directory does not exist, make one.
github karan / slashRemindMe / streamer / streamer.py View on Github external
# Do not respond to queries by these accounts
BLACKLIST = [
    'pixelsorter',
    'Lowpolybot',
    'slashKareBear',
    'slashgif'
]


logging.basicConfig(filename='logger.log',
                    level=logging.INFO,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Connect to the db
db = dataset.connect(DATABASE_URL)
print('DATABASE_URL=%s' % DATABASE_URL)
table = db['reminders']

# Twitter client
auth = tweepy.OAuthHandler(TWITTER_KEY, TWITTER_SECRET)
auth.set_access_token(TWITTER_TOKEN, TWITTER_TOKEN_SECRET)
api = tweepy.API(auth)

# backoff time
backoff = BACKOFF

# parsedatetime object
cal = pdt.Calendar()


def now():
github LukeB42 / Emissary / Emissary.py View on Github external
parser.add_option("-i", "--interactive", dest="interactive", action="store_true", help="Launch interactive shell.")
	parser.add_option("--logfile", dest="logfile", action="store", default='emissary.log', help="(defaults to ./emissary.log)")
	parser.add_option("--pidfile", dest="pidfile", action="store", default='emissary.pid', help="(defaults to ./emissary.pid)")
	parser.add_option("--run-as", dest="run_as",action="store", default=None, help="(defaults to the invoking user)")
	parser.add_option("--driver", dest="driver", action="store", default='sqlite', help="(defaults to sqlite)")
	parser.add_option("--db", dest="db", action="store", default='cache.db', help="(defaults to ./cache.db)")
	parser.add_option("-a", "--address", dest="address", action="store", default='127.0.0.1', help="(defaults to 127.0.0.1)")
	parser.add_option("-p", "--port", dest="port", action="store", default='6362', help="(defaults to 6362)")
	(options, args) = parser.parse_args()

# handle rc.d
	if options.stop or options.restart:
		halt(options.pidfile)

# init db
	db = dataset.connect(options.driver + ':///' + options.db)

# init logging
	log = Log.Log(__file__,log_file=options.logfile,log_stdout=options.foreground) # Logging to db possible but causes contentions.
	log.debug = options.debug
	log('Emissary %s started.' % VERSION)

	if (pwd.getpwuid(os.getuid())[2] == 0) and (options.run_as == None):
		log("Running as root is not permitted here.",'warning')
		log("Use the --run-as option to drop privileges.",'warning')
		raise SystemExit

	config = Config.Config(db,log)
	# if options.config: load configuration from json file.
	if (not 'version' in config.config.keys()) or (VERSION != config['version']):
		config.safe = False
	config['version'] 		= VERSION