Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_filtercols_limit(self):
# ?_c=District&_limit=200 returns 200 values. (_limit= defaults to 100 values)
cols = ['District']
limit = 200
args = {'_c': cols, '_limit': [limit]}
result = gramex.data.filtercols(args=args, **self.urls['census'])
self.check_keys(result, cols)
for key, val in result.items():
eqframe(val, self.unique_of(self.census, key).head(limit))
def test_dir(self):
def check(expected, **params):
actual = pd.DataFrame(self.get('/formhandler/dir', params=params).json())
expected.index = actual.index
afe(actual, expected, check_like=True)
for path in ('dir/subdir', 'dir/', 'subapp'):
df = gramex.data.dirstat(os.path.join(folder, path))
check(df, root=path)
check(df.sort_values('size'), root=path, _sort='size')
check(df.sort_values('name', ascending=False), root=path, _sort='-name')
def check_insert_db(self, url, dbname):
self.db.add(dbname)
rows = self.insert_rows.copy()
rows['index'] = [1, 2] # create a primary key
inserted = gramex.data.insert(url, args=rows, table='test_insert', id='index')
eq_(inserted, 2)
# query table here
actual = gramex.data.filter(url, table='test_insert')
expected = pd.DataFrame(rows)
for df in [actual, expected]:
df['sales'] = df['sales'].astype(float)
afe(actual, expected, check_like=True)
# Check if it created a primary key
engine = sa.create_engine(url)
insp = sa.inspect(engine)
ok_('index' in insp.get_pk_constraint('test_insert')['constrained_columns'])
# Inserting duplicate keys raises an Exception
with assert_raises(sa.exc.IntegrityError):
gramex.data.insert(url, args=rows, table='test_insert', id='index')
def consolidate():
'''Consolidate log data into a database'''
log_file = variables['LOGFILE']
data_file = variables['LOGDB']
# Connect to DB and initialize
data_url = 'sqlite:///' + data_file
engine = sa.create_engine(data_url)
engine.execute('''CREATE TABLE IF NOT EXISTS logs (
src TEXT, time INT, event TEXT, ip TEXT,
system TEXT, node TEXT, release TEXT, version TEXT, machine TEXT, processor TEXT,
pid NUM, args TEXT, cwd TEXT, dir TEXT,
date TEXT
)''')
merged = set(gramex.data.filter(url=data_url, query='SELECT DISTINCT src FROM logs')['src'])
def merge(path, force=False):
'''Merge log file from path into database'''
src = os.path.split(path)[-1]
if src in merged and not force:
return
app_log.info('consolidating %s', src)
result = []
for line in io.open(path, 'r', encoding='utf-8'):
row = json.loads(line)
row['src'] = src
# uname is a list. Convert into system data
(row['system'], row['node'], row['release'], row['version'],
row['machine'], row['processor']) = row.pop('uname')
raise ValueError('cache= must be a FormHandler dict config, not %r' % cache)
# Store data in cache with fixed columns: source, target, q, t
result = pd.DataFrame(columns=['source', 'target', 'q', 't'])
if not q:
return result
original_q = q
# Fetch from cache, if any
if cache:
try:
args = {'q': q, 'target': [target] * len(q)}
if source:
args['source'] = [source] * len(q)
with _translate_cache_lock:
result = gramex.data.filter(args=args, **cache)
except Exception:
app_log.exception('Cannot query %r in translate cache: %r', args, dict(cache))
# Remove already cached results from q
q = [v for v in q if v not in set(result.get('q', []))]
if len(q):
new_data = translate_api[api](q, source, target, key)
if new_data is not None:
result = result.append(pd.DataFrame(new_data), sort=False)
if cache:
with _translate_cache_lock:
gramex.data.insert(id=['source', 'target', 'q'], args=new_data, **cache)
# Sort results by q
result['order'] = result['q'].map(original_q.index)
result.sort_values('order', inplace=True)
def put(self, *path_args, **path_kwargs):
'''
Request to /model/name/ with no params will create a blank model.
Request to /model/name/ with args will interpret as model paramters.
Set Model-Retrain: true in headers to either train a model from scratch or extend it.
To Extend a trained model, don't update the parameters and send Model-Retrain in headers.
Request to /model/name/data with args will update the training data,
doesn't currently work on DF's thanks to the gramex.data bug.
'''
try:
model = gramex.cache.open(self.pickle_file_path, gramex.ml.load)
except EnvironmentError: # noqa
model = gramex.ml.Classifier(**self.request_body)
if self.get_data_flag():
file_kwargs = self.listify(model.input + [model.output] + ['id'])
gramex.data.update(model.url, args=file_kwargs, id=file_kwargs['id'])
else:
if not self._train(model):
model.save(self.pickle_file_path)
def prepare_where(query, args, columns):
'''prepare where clause'''
wheres = []
for key, vals in args.items():
col, agg, op = gramex.data._filter_col(key, columns)
if col not in columns:
continue
if op == '':
wheres.append('"{}" IN ("{}")'.format(col, '", "'.join(vals)))
elif op == '!':
wheres.append('"{}" NOT IN ("{}")'.format(col, '", "'.join(vals)))
elif op == '>':
wheres.append('"{}" > "{}"'.format(col, min(vals)))
elif op == '>~':
wheres.append('"{}" >= "{}"'.format(col, min(vals)))
elif op == '<':
wheres.append('"{}" < "{}"'.format(col, max(vals)))
elif op == '<~':
wheres.append('"{}" <= "{}"'.format(col, max(vals)))
elif op == '~':
q = ' OR '.join('"{}" LIKE "%{}%"'.format(col, x) for x in vals)
if not value[key]:
error[key] = stderr.strip()
value['python', 'version'] = '{0}.{1}.{2}'.format(*sys.version_info[:3])
value['python', 'path'] = sys.executable
value['gramex', 'version'] = gramex.__version__
value['gramex', 'path'] = os.path.dirname(gramex.__file__)
import pandas as pd
df = pd.DataFrame({'value': value, 'error': error}).reset_index()
df.columns = ['section', 'key'] + df.columns[2:].tolist()
df = df[['section', 'key', 'value', 'error']].sort_values(['section', 'key'])
df['error'] = df['error'].fillna('')
data = gramex.data.filter(df, handler.args)
# TODO: handle _format, _meta, _download, etc just like FormHandler
raise Return(gramex.data.download(data))
self.url = 'https://stream.twitter.com/1.1/statuses/filter.json'
self.valid_params = {
'follow', 'track', 'locations', 'delimited', 'stall_warnings',
'filter_level', 'language'}
self.enabled = True
self.delay = 0
# Set up writers
if 'path' in kwargs:
self.stream = StreamWriter(kwargs['path'], flush=kwargs.get('flush', False))
self.process_bytes = self.stream.write
elif 'function' in kwargs:
self.process_json = build_transform(
kwargs, vars={'message': {}}, filename='TwitterStream:function')
elif kwargs.get('driver') == 'sqlalchemy':
engine = gramex.data.create_engine(kwargs['url'], **kwargs.get('parameters', {}))
table = gramex.data.get_table(kwargs['table'])
fields = kwargs['fields']
for field in list(fields.keys()):
if field not in table.columns:
app_log.error('TwitterStream field %s not in table' % field)
fields.pop(field)
flatten = flattener(fields=fields)
self.process_json = lambda tweet: engine.execute(table.insert(flatten(tweet)))
self.buf = bytearray()
self.client = tornado.httpclient.HTTPClient()
while True:
# Set .enabled to False to temporarily disable streamer
if self.enabled:
params = {key: val.encode('utf-8') for key, val in self.params.items()
if key in self.valid_params}
self.valid_params = {
'follow', 'track', 'locations', 'delimited', 'stall_warnings',
'filter_level', 'language'}
self.enabled = True
self.delay = 0
# Set up writers
if 'path' in kwargs:
self.stream = StreamWriter(kwargs['path'], flush=kwargs.get('flush', False))
self.process_bytes = self.stream.write
elif 'function' in kwargs:
self.process_json = build_transform(
kwargs, vars={'message': {}}, filename='TwitterStream:function')
elif kwargs.get('driver') == 'sqlalchemy':
engine = gramex.data.create_engine(kwargs['url'], **kwargs.get('parameters', {}))
table = gramex.data.get_table(kwargs['table'])
fields = kwargs['fields']
for field in list(fields.keys()):
if field not in table.columns:
app_log.error('TwitterStream field %s not in table' % field)
fields.pop(field)
flatten = flattener(fields=fields)
self.process_json = lambda tweet: engine.execute(table.insert(flatten(tweet)))
self.buf = bytearray()
self.client = tornado.httpclient.HTTPClient()
while True:
# Set .enabled to False to temporarily disable streamer
if self.enabled:
params = {key: val.encode('utf-8') for key, val in self.params.items()
if key in self.valid_params}
if 'follow' not in params and 'track' not in params and 'locations' not in params: