How to use the petl.util.RowContainer function in petl

To help you get started, we’ve selected a few petl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github petl-developers / petl / src / petl / transform / regex.py View on Github external
| '2'  | '15'    | 'para'     | '2'   |
        +------+---------+------------+-------+
        | '3'  | '18'    | 'temp'     | '1'   |
        +------+---------+------------+-------+
        | '4'  | '19'    | 'temp'     | '2'   |
        +------+---------+------------+-------+

    See also :func:`re.split`.

    """

    return SplitView(table, field, pattern, newfields, include_original,
                     maxsplit, flags)


class SplitView(RowContainer):

    def __init__(self, source, field, pattern, newfields=None,
                 include_original=False, maxsplit=0, flags=0):
        self.source = source
        self.field = field
        self.pattern = pattern
        self.newfields = newfields
        self.include_original = include_original
        self.maxsplit = maxsplit
        self.flags = flags

    def __iter__(self):
        return itersplit(self.source, self.field, self.pattern, self.newfields,
                         self.include_original, self.maxsplit, self.flags)

github petl-developers / petl / src / petl / transform / reshape.py View on Github external
+------+------+-------+
        | 'B'  | 2    | False |
        +------+------+-------+
        | 'C'  | 9    | None  |
        +------+------+-------+

    See also :func:`flatten`.

    .. versionadded:: 0.7

    """

    return UnflattenView(*args, **kwargs)


class UnflattenView(RowContainer):

    def __init__(self, *args, **kwargs):
        if len(args) == 2:
            self.input = args[0]
            self.period = args[1]
        elif len(args) == 3:
            self.input = values(args[0], args[1])
            self.period = args[2]
        else:
            assert False, 'invalid arguments'
        self.missing = kwargs.get('missing', None)

    def __iter__(self):
        inpt = self.input
        period = self.period
        missing = self.missing
github petl-developers / petl / src / petl / transform / basics.py View on Github external
Note that :func:`cat` can be used with a single table argument, in which
    case it has the effect of ensuring all data rows are the same length as
    the header row, truncating any long rows and padding any short rows with
    the value of the `missing` keyword argument.

    By default, the fields for the output table will be determined as the
    union of all fields found in the input tables. Use the `header` keyword 
    argument to override this behaviour and specify a fixed set of fields for 
    the output table. 
    
    """
    
    return CatView(tables, **kwargs)
    
    
class CatView(RowContainer):
    
    def __init__(self, sources, missing=None, header=None):
        self.sources = sources
        self.missing = missing
        if header is not None:
            header = tuple(header)  # ensure hashable
        self.header = header

    def __iter__(self):
        return itercat(self.sources, self.missing, self.header)
    

def itercat(sources, missing, header):
    its = [iter(t) for t in sources]
    source_flds_lists = [next(it) for it in its]
github petl-developers / petl / src / petl / transform / hashjoins.py View on Github external
and the left table is large.
    
    .. versionadded:: 0.5

    .. versionchanged:: 0.24

    Added support for left and right tables with different key fields via the
    `lkey` and `rkey` arguments.

    """
    
    lkey, rkey = keys_from_args(left, right, key, lkey, rkey)
    return HashAntiJoinView(left, right, lkey, rkey)


class HashAntiJoinView(RowContainer):
    
    def __init__(self, left, right, lkey, rkey):
        self.left = left
        self.right = right
        self.lkey = lkey
        self.rkey = rkey

    def __iter__(self):
        return iterhashantijoin(self.left, self.right, self.lkey, self.rkey)
    
    
def iterhashantijoin(left, right, lkey, rkey):
    lit = iter(left)
    rit = iter(right)

    lflds = next(lit)
github petl-developers / petl / src / petl / io / html.py View on Github external
f.detach()


def teeuhtml(table, source=None, caption=None,
             encoding='utf-8', representation=text_type, lineterminator='\r\n'):
    """Return a table that writes rows to a Unicode HTML file as they are
    iterated over.

    """

    return TeeUHTMLContainer(table, source=source, caption=caption,
                             encoding=encoding, representation=representation,
                             lineterminator=lineterminator)


class TeeUHTMLContainer(RowContainer):

    def __init__(self, table, source=None, caption=None,
                 encoding='utf-8', representation=text_type,
                 lineterminator='\r\n'):
        self.table = table
        self.source = source
        self.caption = caption
        self.encoding = encoding
        self.representation = representation
        self.lineterminator = lineterminator

    def __iter__(self):
        source = write_source_from_arg(self.source)
        lineterminator = self.lineterminator
        caption = self.caption
        representation = self.representation
github petl-developers / petl / src / petl / transform / joins.py View on Github external
| 1    | 'blue'   | 3    | 'square' |
        +------+----------+------+----------+
        | 2    | 'red'    | 1    | 'circle' |
        +------+----------+------+----------+
        | 2    | 'red'    | 3    | 'square' |
        +------+----------+------+----------+

    See also :func:`join`, :func:`leftjoin`, :func:`rightjoint`,
    :func:`outerjoin`.

    """

    return CrossJoinView(*tables, **kwargs)


class CrossJoinView(RowContainer):

    def __init__(self, *sources, **kwargs):
        self.sources = sources
        self.prefix = kwargs.get('prefix', False)

    def __iter__(self):
        return itercrossjoin(self.sources, self.prefix)


def itercrossjoin(sources, prefix):

    # construct fields
    outflds = list()
    for i, s in enumerate(sources):
        if prefix:
            # use one-based numbering
github petl-developers / petl / src / petl / transform / maps.py View on Github external
def rowgroupmap(table, key, mapper, fields=None, presorted=False,
                buffersize=None, tempdir=None, cache=True):
    """
    Group rows under the given key then apply `mapper` to yield zero or more
    output rows for each input group of rows.

    .. versionadded:: 0.12

    """

    return RowGroupMapView(table, key, mapper, fields=fields,
                           presorted=presorted,
                           buffersize=buffersize, tempdir=tempdir, cache=cache)


class RowGroupMapView(RowContainer):

    def __init__(self, source, key, mapper, fields=None,
                 presorted=False, buffersize=None, tempdir=None, cache=True):
        if presorted:
            self.source = source
        else:
            self.source = sort(source, key, buffersize=buffersize,
                               tempdir=tempdir, cache=cache)
        self.key = key
        self.fields = fields
        self.mapper = mapper

    def __iter__(self):
        return iterrowgroupmap(self.source, self.key, self.mapper, self.fields)
github petl-developers / petl / src / petl / io.py View on Github external
| u'b'  | 2     |
        +-------+-------+
        | u'c'  | 2.0   |
        +-------+-------+

    .. versionchanged:: 0.10.2
    
    Either a database file name or a connection object can be given as the
    first argument. 
    
    """
    
    return Sqlite3View(source, query, *args, **kwargs)


class Sqlite3View(RowContainer):

    def __init__(self, source, query, *args, **kwargs):
        self.source = source
        self.query = query
        self.args = args
        self.kwargs = kwargs
        # setup the connection
        if isinstance(self.source, basestring):
            self.connection = sqlite3.connect(self.source)
            self.connection.row_factory = sqlite3.Row
        elif isinstance(self.source, sqlite3.Connection):
            self.connection = self.source
        else:
            raise Exception('source argument must be filename or connection; found %r' % self.source)
        
    def __iter__(self):
github petl-developers / petl / src / petl / transform / headers.py View on Github external
+----------+----------+
        | 'foofoo' | 'barbar' |
        +==========+==========+
        | 'a'      | 1        |
        +----------+----------+
        | 'b'      | 2        |
        +----------+----------+

    See also :func:`extendheader`, :func:`pushheader`.

    """

    return SetHeaderView(table, fields)


class SetHeaderView(RowContainer):

    def __init__(self, source, fields):
        self.source = source
        self.fields = fields

    def __iter__(self):
        return itersetheader(self.source, self.fields)


def itersetheader(source, fields):
    it = iter(source)
    next(it)  # discard source fields
    yield tuple(fields)
    for row in it:
        yield tuple(row)
github petl-developers / petl / petl / util.py View on Github external
| 0.4740252948218193  | 364   | 'vanilla'    |
        +---------------------+-------+--------------+
        | 0.166428545780258   | 59    | 'vanilla'    |
        +---------------------+-------+--------------+
        
    .. versionchanged:: 0.6
    
    Now supports different field types, e.g., non-numeric. Previous functionality
    is available as :func:`randomtable`.
        
    """
    
    return DummyTable(numrows=numrows, fields=fields, wait=wait)


class DummyTable(RowContainer):
    
    def __init__(self, numrows=100, fields=None, wait=0):
        self.numrows = numrows
        self.wait = wait
        if fields is None:
            self.fields = OrderedDict()
        else:
            self.fields = OrderedDict(fields)
        self.seed = datetime.datetime.now()

    def __setitem__(self, item, value):
        self.fields[str(item)] = value
            
    def __iter__(self):
        nr = self.numrows
        seed = self.seed