How to use the dataflows.processors.dumpers.dumper_base.DumperBase function in dataflows

To help you get started, we’ve selected a few dataflows examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github datahq / dataflows / dataflows / processors / dumpers / dumper_base.py View on Github external
def handle_datapackage(self):
        self.datapackage.commit()
        self.stats['count_of_rows'] = DumperBase.get_attr(self.datapackage.descriptor, self.datapackage_rowcount)
        self.stats['bytes'] = DumperBase.get_attr(self.datapackage.descriptor, self.datapackage_bytes)
        self.stats['hash'] = DumperBase.get_attr(self.datapackage.descriptor, self.datapackage_hash)
        self.stats['dataset_name'] = self.datapackage.descriptor.get('name')
github datahq / dataflows / dataflows / processors / dumpers / file_dumper.py View on Github external
resource_descriptor = resource.res.descriptor
        for descriptor in self.datapackage.descriptor['resources']:
            if descriptor['name'] == resource.res.descriptor['name']:
                resource_descriptor = descriptor

        # File size:
        filesize = temp_file.tell()
        DumperBase.inc_attr(self.datapackage.descriptor, self.datapackage_bytes, filesize)
        DumperBase.inc_attr(resource_descriptor, self.resource_bytes, filesize)

        # File Hash:
        if self.resource_hash:
            hasher = FileDumper.hash_handler(temp_file)
            # Update path with hash
            if self.add_filehash_to_path:
                DumperBase.insert_hash_in_path(resource_descriptor, hasher.hexdigest())
            DumperBase.set_attr(resource_descriptor, self.resource_hash, hasher.hexdigest())

        # Finalise
        filename = temp_file.name
        temp_file.close()
        self.write_file_to_output(filename, resource.res.source)
        os.unlink(filename)
github datahq / dataflows / dataflows / processors / dumpers / to_sql.py View on Github external
elif isinstance(obj, decimal.Decimal):
        return float(obj)
    elif isinstance(obj, (list, set)):
        return [strize(x) for x in obj]
    elif obj is None:
        return None
    assert False, "Don't know how to handle object %r" % obj


OBJECT_FIXERS = {
    'sqlite': [strize, jsonize],
    'postgresql': [strize]
}


class SQLDumper(DumperBase):

    def __init__(self,
                 tables,
                 engine='env://DATAFLOWS_DB_ENGINE',
                 updated_column=None, updated_id_column=None,
                 **options):
        super(SQLDumper, self).__init__(options)
        table_to_resource = tables

        if isinstance(engine, str):
            if engine.startswith('env://'):
                env_var = engine[6:]
                engine = os.environ.get(env_var)
                if engine is None:
                    raise ValueError("Couldn't connect to DB - "
                                     "Please set your '%s' environment variable" % env_var)
github datahq / dataflows / dataflows / processors / dumpers / file_dumper.py View on Github external
for descriptor in self.datapackage.descriptor['resources']:
            if descriptor['name'] == resource.res.descriptor['name']:
                resource_descriptor = descriptor

        # File size:
        filesize = temp_file.tell()
        DumperBase.inc_attr(self.datapackage.descriptor, self.datapackage_bytes, filesize)
        DumperBase.inc_attr(resource_descriptor, self.resource_bytes, filesize)

        # File Hash:
        if self.resource_hash:
            hasher = FileDumper.hash_handler(temp_file)
            # Update path with hash
            if self.add_filehash_to_path:
                DumperBase.insert_hash_in_path(resource_descriptor, hasher.hexdigest())
            DumperBase.set_attr(resource_descriptor, self.resource_hash, hasher.hexdigest())

        # Finalise
        filename = temp_file.name
        temp_file.close()
        self.write_file_to_output(filename, resource.res.source)
        os.unlink(filename)
github datahq / dataflows / dataflows / processors / dumpers / file_dumper.py View on Github external
import os
import json
import tempfile
import hashlib

from datapackage import Resource

from .dumper_base import DumperBase
from .file_formats import CSVFormat, JSONFormat


class FileDumper(DumperBase):

    def __init__(self, options):
        super(FileDumper, self).__init__(options)
        self.force_format = options.get('force_format', True)
        self.forced_format = options.get('format', 'csv')
        self.temporal_format_property = options.get('temporal_format_property', None)
        self.use_titles = options.get('use_titles', False)

    def process_datapackage(self, datapackage):
        datapackage = \
            super(FileDumper, self).process_datapackage(datapackage)

        self.file_formatters = {}

        # Make sure all resources are proper CSVs
        resource: Resource = None
github datahq / dataflows / dataflows / processors / dumpers / dumper_base.py View on Github external
schema_validator(resource.res, resource,
                                             **self.schema_validator_options)
                        )
            )
            ret = self.row_counter(resource, ret)
            yield ret

        # Calculate datapackage hash
        if self.datapackage_hash:
            datapackage_hash = hashlib.md5(
                        json.dumps(self.datapackage.descriptor,
                                   indent=2 if self.pretty_descriptor else None,
                                   sort_keys=True,
                                   ensure_ascii=True).encode('ascii')
                    ).hexdigest()
            DumperBase.set_attr(self.datapackage.descriptor, self.datapackage_hash, datapackage_hash)

        self.handle_datapackage()
        self.finalize()
github datahq / dataflows / dataflows / processors / dumpers / dumper_base.py View on Github external
def __init__(self, options={}):
        super(DumperBase, self).__init__()
        counters = options.get('counters', {})
        self.datapackage_rowcount = counters.get('datapackage-rowcount', 'count_of_rows')
        self.datapackage_bytes = counters.get('datapackage-bytes', 'bytes')
        self.datapackage_hash = counters.get('datapackage-hash', 'hash')
        self.resource_rowcount = counters.get('resource-rowcount', 'count_of_rows')
        self.resource_bytes = counters.get('resource-bytes', 'bytes')
        self.resource_hash = counters.get('resource-hash', 'hash')
        self.add_filehash_to_path = options.get('add_filehash_to_path', False)
        self.pretty_descriptor = options.get('pretty_descriptor', True)
        self.schema_validator_options = options.get('validator_options', {})