Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def iterjoin(left, right, lkey, rkey, leftouter=False, rightouter=False,
missing=None, lprefix=None, rprefix=None):
lit = iter(left)
rit = iter(right)
lflds = next(lit)
rflds = next(rit)
# determine indices of the key fields in left and right tables
lkind = asindices(lflds, lkey)
rkind = asindices(rflds, rkey)
# construct functions to extract key values from both tables
lgetk = comparable_itemgetter(*lkind)
rgetk = comparable_itemgetter(*rkind)
# determine indices of non-key fields in the right table
# (in the output, we only include key fields from the left table - we
# don't want to duplicate fields)
rvind = [i for i in range(len(rflds)) if i not in rkind]
rgetv = rowgetter(*rvind)
# determine the output fields
if lprefix is None:
outflds = list(lflds)
else:
outflds = [(str(lprefix) + str(f))
def itercut(source, spec, missing=None):
it = iter(source)
spec = tuple(spec) # make sure no-one can change midstream
# convert field selection into field indices
flds = next(it)
indices = asindices(flds, spec)
# define a function to transform each row in the source data
# according to the field selection
transform = rowgetter(*indices)
# yield the transformed field names
yield transform(flds)
# construct the transformed data
for row in it:
try:
yield transform(row)
except IndexError:
# row is short, let's be kind and fill in any missing fields
yield tuple(row[i] if i < len(row) else missing for i in indices)
def itercutout(source, spec, missing=None):
it = iter(source)
spec = tuple(spec) # make sure no-one can change midstream
# convert field selection into field indices
flds = next(it)
indicesout = asindices(flds, spec)
indices = [i for i in range(len(flds)) if i not in indicesout]
# define a function to transform each row in the source data
# according to the field selection
transform = rowgetter(*indices)
# yield the transformed field names
yield transform(flds)
# construct the transformed data
for row in it:
try:
yield transform(row)
except IndexError:
# row is short, let's be kind and fill in any missing fields
yield tuple(row[i] if i < len(row) else missing for i in indices)
def iterduplicates(source, key):
# assume source is sorted
# first need to sort the data
it = iter(source)
try:
flds = it.next()
yield flds
# convert field selection into field indices
indices = asindices(flds, key)
# now use field indices to construct a getkey function
# N.B., this may raise an exception on short rows, depending on
# the field selection
getkey = itemgetter(*indices)
previous = None
previous_yielded = False
for row in it:
if previous is None:
previous = row
else:
kprev = getkey(previous)
kcurr = getkey(row)
if kprev == kcurr:
def _iternocache(self, source, key, reverse):
debug('iterate without cache')
self._clearcache()
it = iter(source)
flds = next(it)
yield tuple(flds)
if key is not None:
# convert field selection into field indices
indices = asindices(flds, key)
else:
indices = range(len(flds))
# now use field indices to construct a _getkey function
# N.B., this will probably raise an exception on short rows
getkey = comparable_itemgetter(*indices)
# TODO support native comparison
# initialise the first chunk
rows = list(itertools.islice(it, 0, self.buffersize))
# print(repr(getkey))
# print(rows)
# for row in rows:
# print(row, getkey(row))
rows.sort(key=getkey, reverse=reverse)
def __iter__(self):
it = iter(self.table)
# determine output fields
fields = list(next(it))
newfields = [f for f in fields if f != self.field]
newfields.insert(self.index, self.field)
yield tuple(newfields)
# define a function to transform each row in the source data
# according to the field selection
indices = asindices(fields, newfields)
transform = rowgetter(*indices)
# construct the transformed data
for row in it:
try:
yield transform(row)
except IndexError:
# row is short, let's be kind and fill in any missing fields
yield tuple(row[i] if i < len(row) else self.missing
for i in indices)
def iterantijoin(left, right, key):
lit = iter(left)
rit = iter(right)
try:
lflds = lit.next()
rflds = rit.next()
yield lflds
# determine indices of the key fields in left and right tables
lkind = asindices(lflds, key)
rkind = asindices(rflds, key)
# construct functions to extract key values from both tables
lgetk = itemgetter(*lkind)
rgetk = itemgetter(*rkind)
# construct group iterators for both tables
lgit = groupby(lit, key=lgetk)
rgit = groupby(rit, key=rgetk)
# loop until *either* of the iterators is exhausted
try:
# pick off initial row groups
lkval, lrowgrp = lgit.next()
rkval, rrowgrp = rgit.next()
def iterhashantijoin(left, right, lkey, rkey):
lit = iter(left)
rit = iter(right)
lflds = next(lit)
rflds = next(rit)
yield tuple(lflds)
# determine indices of the key fields in left and right tables
lkind = asindices(lflds, lkey)
rkind = asindices(rflds, rkey)
# construct functions to extract key values from both tables
lgetk = operator.itemgetter(*lkind)
rgetk = operator.itemgetter(*rkind)
rkeys = set()
for rrow in rit:
rk = rgetk(rrow)
rkeys.add(rk)
for lrow in lit:
lk = lgetk(lrow)
if lk not in rkeys:
yield tuple(lrow)
def __init__(self, default_connections, keyed_connections, fields,
discriminator):
super(PartitionConnection, self).__init__(default_connections,
keyed_connections, fields)
if callable(discriminator):
self.discriminator = discriminator
else: # assume field or fields
self.discriminator = itemgetter(*asindices(fields, discriminator))