How to use the petl.util.asindices function in petl

To help you get started, we’ve selected a few petl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github petl-developers / petl / src / petl / transform / joins.py View on Github external
def iterjoin(left, right, lkey, rkey, leftouter=False, rightouter=False,
             missing=None, lprefix=None, rprefix=None):
    lit = iter(left)
    rit = iter(right)

    lflds = next(lit)
    rflds = next(rit)

    # determine indices of the key fields in left and right tables
    lkind = asindices(lflds, lkey)
    rkind = asindices(rflds, rkey)

    # construct functions to extract key values from both tables
    lgetk = comparable_itemgetter(*lkind)
    rgetk = comparable_itemgetter(*rkind)

    # determine indices of non-key fields in the right table
    # (in the output, we only include key fields from the left table - we
    # don't want to duplicate fields)
    rvind = [i for i in range(len(rflds)) if i not in rkind]
    rgetv = rowgetter(*rvind)

    # determine the output fields
    if lprefix is None:
        outflds = list(lflds)
    else:
        outflds = [(str(lprefix) + str(f))
github petl-developers / petl / src / petl / transform / basics.py View on Github external
def itercut(source, spec, missing=None):
    it = iter(source)
    spec = tuple(spec)  # make sure no-one can change midstream
    
    # convert field selection into field indices
    flds = next(it)
    indices = asindices(flds, spec)

    # define a function to transform each row in the source data 
    # according to the field selection
    transform = rowgetter(*indices)
    
    # yield the transformed field names
    yield transform(flds)
    
    # construct the transformed data
    for row in it:
        try:
            yield transform(row) 
        except IndexError:
            # row is short, let's be kind and fill in any missing fields
            yield tuple(row[i] if i < len(row) else missing for i in indices)
github petl-developers / petl / src / petl / transform / basics.py View on Github external
def itercutout(source, spec, missing=None):
    it = iter(source)
    spec = tuple(spec)  # make sure no-one can change midstream
    
    # convert field selection into field indices
    flds = next(it)
    indicesout = asindices(flds, spec)
    indices = [i for i in range(len(flds)) if i not in indicesout]
    
    # define a function to transform each row in the source data 
    # according to the field selection
    transform = rowgetter(*indices)
    
    # yield the transformed field names
    yield transform(flds)
    
    # construct the transformed data
    for row in it:
        try:
            yield transform(row) 
        except IndexError:
            # row is short, let's be kind and fill in any missing fields
            yield tuple(row[i] if i < len(row) else missing for i in indices)
github petl-developers / petl / src / petl / transform.py View on Github external
def iterduplicates(source, key):
    # assume source is sorted
    # first need to sort the data
    it = iter(source)

    try:
        flds = it.next()
        yield flds

        # convert field selection into field indices
        indices = asindices(flds, key)
            
        # now use field indices to construct a getkey function
        # N.B., this may raise an exception on short rows, depending on
        # the field selection
        getkey = itemgetter(*indices)
        
        previous = None
        previous_yielded = False
        
        for row in it:
            if previous is None:
                previous = row
            else:
                kprev = getkey(previous)
                kcurr = getkey(row)
                if kprev == kcurr:
github petl-developers / petl / src / petl / transform / sorts.py View on Github external
def _iternocache(self, source, key, reverse):
        debug('iterate without cache')
        self._clearcache()
        it = iter(source)

        flds = next(it)
        yield tuple(flds)

        if key is not None:
            # convert field selection into field indices
            indices = asindices(flds, key)
        else:
            indices = range(len(flds))
        # now use field indices to construct a _getkey function
        # N.B., this will probably raise an exception on short rows
        getkey = comparable_itemgetter(*indices)

        # TODO support native comparison

        # initialise the first chunk
        rows = list(itertools.islice(it, 0, self.buffersize))
        # print(repr(getkey))
        # print(rows)
        # for row in rows:
        #     print(row, getkey(row))
        rows.sort(key=getkey, reverse=reverse)
github petl-developers / petl / src / petl / transform / basics.py View on Github external
def __iter__(self):
        it = iter(self.table)

        # determine output fields
        fields = list(next(it))
        newfields = [f for f in fields if f != self.field]
        newfields.insert(self.index, self.field)
        yield tuple(newfields)

        # define a function to transform each row in the source data
        # according to the field selection
        indices = asindices(fields, newfields)
        transform = rowgetter(*indices)

        # construct the transformed data
        for row in it:
            try:
                yield transform(row)
            except IndexError:
                # row is short, let's be kind and fill in any missing fields
                yield tuple(row[i] if i < len(row) else self.missing
                            for i in indices)
github petl-developers / petl / src / petl / transform.py View on Github external
def iterantijoin(left, right, key):
    lit = iter(left)
    rit = iter(right)
    try:
        lflds = lit.next()
        rflds = rit.next()
        yield lflds

        # determine indices of the key fields in left and right tables
        lkind = asindices(lflds, key)
        rkind = asindices(rflds, key)
        
        # construct functions to extract key values from both tables
        lgetk = itemgetter(*lkind)
        rgetk = itemgetter(*rkind)
        
        # construct group iterators for both tables
        lgit = groupby(lit, key=lgetk)
        rgit = groupby(rit, key=rgetk)
        
        # loop until *either* of the iterators is exhausted
        try:

            # pick off initial row groups
            lkval, lrowgrp = lgit.next() 
            rkval, rrowgrp = rgit.next()
github petl-developers / petl / src / petl / transform / hashjoins.py View on Github external
def iterhashantijoin(left, right, lkey, rkey):
    lit = iter(left)
    rit = iter(right)

    lflds = next(lit)
    rflds = next(rit)
    yield tuple(lflds)

    # determine indices of the key fields in left and right tables
    lkind = asindices(lflds, lkey)
    rkind = asindices(rflds, rkey)
    
    # construct functions to extract key values from both tables
    lgetk = operator.itemgetter(*lkind)
    rgetk = operator.itemgetter(*rkind)
    
    rkeys = set()
    for rrow in rit:
        rk = rgetk(rrow)
        rkeys.add(rk)
        
    for lrow in lit:
        lk = lgetk(lrow)
        if lk not in rkeys:
            yield tuple(lrow)
github petl-developers / petl / src / petl / push.py View on Github external
def __init__(self, default_connections, keyed_connections, fields,
                 discriminator):
        super(PartitionConnection, self).__init__(default_connections,
                                                  keyed_connections, fields)
        if callable(discriminator):
            self.discriminator = discriminator
        else: # assume field or fields
            self.discriminator = itemgetter(*asindices(fields, discriminator))