How to use the csvkit.cli.CSVKitUtility function in csvkit

To help you get started, we’ve selected a few csvkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wireservice / csvkit / csvkit / utilities / sql2csv.py View on Github external
#!/usr/bin/env python

import agate
from sqlalchemy import create_engine

from csvkit.cli import CSVKitUtility


class SQL2CSV(CSVKitUtility):
    description = 'Execute an SQL query on a database and output the result to a CSV file.'
    # Overrides all flags except --linenumbers, --verbose, --version.
    override_flags = 'f,b,d,e,H,K,L,p,q,S,t,u,z,blanks,date-format,datetime-format,zero'.split(',')

    def add_arguments(self):
        self.argparser.add_argument('--db', dest='connection_string', default='sqlite://',
                                    help='An sqlalchemy connection string to connect to a database.',)
        self.argparser.add_argument(metavar='FILE', nargs='?', dest='input_path',
                                    help='The file to use as SQL query. If both FILE and QUERY are omitted, query will be read as piped data via STDIN.')
        self.argparser.add_argument('--query',
                                    help="The SQL query to execute. If specified, it overrides FILE and STDIN.")
        self.argparser.add_argument('-e', '--encoding', dest='encoding', default='utf-8',
                                    help='Specify the encoding of the input query file.')
        self.argparser.add_argument('-H', '--no-header-row', dest='no_header_row', action='store_true',
                                    help='Do not output column names.')
github metagriffin / csvsed / csvsed / cli.py View on Github external
# You should have received a copy of the GNU General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
#------------------------------------------------------------------------------

'''
Command-line interface to `csvsed.sed`.
'''

import re, sys

from csvkit import CSVKitReader, CSVKitWriter
from csvkit.cli import CSVKitUtility, CSVFileType, parse_column_identifiers
from csvsed import sed

#------------------------------------------------------------------------------
class CsvSed(CSVKitUtility):

  description = 'A stream-oriented CSV modification tool. Like a ' \
      ' stripped-down "sed" command, but for tabular data.'
  override_flags = 'f'

  #----------------------------------------------------------------------------
  def add_arguments(self):
    self.argparser.add_argument(
      '-c', '--columns',
      dest='columns',
      help='A comma separated list of column indices or names to be modified.')
    # todo: support in-place file modification
    # todo: make sure that it supports backup spec, eg '-i.orig'
    # self.argparser.add_argument(
    #   '-i', '--in-place',
    #   dest='inplace',
github wireservice / csvkit / csvkit / utilities / csvgrep.py View on Github external
#!/usr/bin/env python

import re
from argparse import FileType

import agate

from csvkit.cli import CSVKitUtility
from csvkit.grep import FilteringCSVReader


class CSVGrep(CSVKitUtility):
    description = 'Search CSV files. Like the Unix "grep" command, but for tabular data.'
    override_flags = ['L', 'blanks', 'date-format', 'datetime-format']

    def add_arguments(self):
        self.argparser.add_argument('-n', '--names', dest='names_only', action='store_true',
                                    help='Display column names and indices from the input CSV and exit.')
        self.argparser.add_argument('-c', '--columns', dest='columns',
                                    help='A comma separated list of column indices or names to be searched.')
        self.argparser.add_argument('-m', '--match', dest="pattern", action='store',
                                    help='The string to search for.')
        self.argparser.add_argument('-r', '--regex', dest='regex', action='store',
                                    help='If specified, must be followed by a regular expression which will be tested against the specified columns.')
        self.argparser.add_argument('-f', '--file', dest='matchfile', type=FileType('r'), action='store',
                                    help='If specified, must be the path to a file. For each tested row, if any line in the file (stripped of line separators) is an exact match for the cell value, the row will pass.')
        self.argparser.add_argument('-i', '--invert-match', dest='inverse', action='store_true',
                                    help='If specified, select non-matching instead of matching rows.')
github wireservice / csvkit / csvkit / utilities / csvcut.py View on Github external
csvcut is originally the work of eminent hackers Joe Germuska and Aaron Bycoffe.

This code is forked from:
https://gist.github.com/561347/9846ebf8d0a69b06681da9255ffe3d3f59ec2c97

Used and modified with permission.
"""

import sys

import agate

from csvkit.cli import CSVKitUtility


class CSVCut(CSVKitUtility):
    description = 'Filter and truncate CSV files. Like the Unix "cut" command, but for tabular data.'
    override_flags = ['L', 'blanks', 'date-format', 'datetime-format']

    def add_arguments(self):
        self.argparser.add_argument('-n', '--names', dest='names_only', action='store_true',
                                    help='Display column names and indices from the input CSV and exit.')
        self.argparser.add_argument('-c', '--columns', dest='columns',
                                    help='A comma separated list of column indices, names or ranges to be extracted, e.g. "1,id,3-5". Defaults to all columns.')
        self.argparser.add_argument('-C', '--not-columns', dest='not_columns',
                                    help='A comma separated list of column indices, names or ranges to be excluded, e.g. "1,id,3-5". Defaults to no columns.')
        self.argparser.add_argument('-x', '--delete-empty-rows', dest='delete_empty', action='store_true',
                                    help='After cutting, delete rows which are completely empty.')

    def main(self):
        if self.args.names_only:
            self.print_column_names()
github wireservice / csvkit / csvkit / utilities / csvclean.py View on Github external
#!/usr/bin/env python

import sys
from os.path import splitext

import agate

from csvkit.cli import CSVKitUtility
from csvkit.cleanup import RowChecker


class CSVClean(CSVKitUtility):
    description = 'Fix common errors in a CSV file.'
    override_flags = ['L', 'blanks', 'date-format', 'datetime-format']

    def add_arguments(self):
        self.argparser.add_argument('-n', '--dry-run', dest='dryrun', action='store_true',
                                    help='Do not create output files. Information about what would have been done will be printed to STDERR.')

    def main(self):
        if self.additional_input_expected():
            sys.stderr.write('No input file or piped data provided. Waiting for standard input:\n')

        reader = agate.csv.reader(self.skip_lines(), **self.reader_kwargs)

        if self.args.dryrun:
            checker = RowChecker(reader)
github wireservice / csvkit / csvkit / utilities / csvsql.py View on Github external
#!/usr/bin/env python

import os.path
import sys
from pkg_resources import iter_entry_points

import agate
import agatesql  # noqa
from sqlalchemy import create_engine, dialects

from csvkit.cli import CSVKitUtility

DIALECTS = dialects.__all__ + tuple(e.name for e in iter_entry_points('sqlalchemy.dialects'))


class CSVSQL(CSVKitUtility):
    description = 'Generate SQL statements for one or more CSV files, or execute those statements directly on a database, and execute one or more SQL queries.'
    # Override 'f' because the utility accepts multiple files.
    override_flags = ['f']

    def add_arguments(self):
        self.argparser.add_argument(metavar='FILE', nargs='*', dest='input_paths', default=['-'],
                                    help='The CSV file(s) to operate on. If omitted, will accept input as piped data via STDIN.')
        self.argparser.add_argument('-i', '--dialect', dest='dialect', choices=DIALECTS,
                                    help='Dialect of SQL to generate. Only valid when --db is not specified.')
        self.argparser.add_argument('--db', dest='connection_string',
                                    help='If present, a SQLAlchemy connection string to use to directly execute generated SQL on a database.')
        self.argparser.add_argument('--query',
                                    help='Execute one or more SQL queries delimited by ";" and output the result of the last query as CSV. QUERY may be a filename.')
        self.argparser.add_argument('--insert', dest='insert', action='store_true',
                                    help='In addition to creating the table, also insert the data into the table. Only valid when --db is specified.')
        self.argparser.add_argument('--prefix', action='append', default=[],
github wireservice / csvkit / csvkit / utilities / csvjoin.py View on Github external
#!/usr/bin/env python

import agate

from csvkit.cli import CSVKitUtility, match_column_identifier


class CSVJoin(CSVKitUtility):
    description = 'Execute a SQL-like join to merge CSV files on a specified column or columns.'
    epilog = 'Note that the join operation requires reading all files into memory. Don\'t try this on very large files.'
    # Override 'f' because the utility accepts multiple files.
    override_flags = ['f']

    def add_arguments(self):
        self.argparser.add_argument(metavar='FILE', nargs='*', dest='input_paths', default=['-'],
                                    help='The CSV files to operate on. If only one is specified, it will be copied to STDOUT.')
        self.argparser.add_argument('-c', '--columns', dest='columns',
                                    help='The column name(s) on which to join. Should be either one name (or index) or a comma-separated list with one name (or index) for each file, in the same order that the files were specified. May also be left unspecified, in which case the two files will be joined sequentially without performing any matching.')
        self.argparser.add_argument('--outer', dest='outer_join', action='store_true',
                                    help='Perform a full outer join, rather than the default inner join.')
        self.argparser.add_argument('--left', dest='left_join', action='store_true',
                                    help='Perform a left outer join, rather than the default inner join. If more than two files are provided this will be executed as a sequence of left outer joins, starting at the left.')
        self.argparser.add_argument('--right', dest='right_join', action='store_true',
                                    help='Perform a right outer join, rather than the default inner join. If more than two files are provided this will be executed as a sequence of right outer joins, starting at the right.')
github wireservice / csvkit / csvkit / utilities / csvstat.py View on Github external
('stdev', {
        'aggregation': agate.StDev,
        'label': 'StDev: '
    }),
    ('len', {
        'aggregation': agate.MaxLength,
        'label': 'Longest value: '
    }),
    ('freq', {
        'aggregation': None,
        'label': 'Most common values: '
    })
])


class CSVStat(CSVKitUtility):
    description = 'Print descriptive statistics for each column in a CSV file.'
    override_flags = ['L', 'blanks', 'date-format', 'datetime-format']

    def add_arguments(self):
        self.argparser.add_argument('--csv', dest='csv_output', action='store_true',
                                    help='Output results as a CSV, rather than text.')
        self.argparser.add_argument('-n', '--names', dest='names_only', action='store_true',
                                    help='Display column names and indices from the input CSV and exit.')
        self.argparser.add_argument('-c', '--columns', dest='columns',
                                    help='A comma separated list of column indices, names or ranges to be examined, e.g. "1,id,3-5". Defaults to all columns.')
        self.argparser.add_argument('--type', dest='type_only', action='store_true',
                                    help='Only output data type.')
        self.argparser.add_argument('--nulls', dest='nulls_only', action='store_true',
                                    help='Only output whether columns contains nulls.')
        self.argparser.add_argument('--unique', dest='unique_only', action='store_true',
                                    help='Only output counts of unique values.')
github wireservice / csvkit / csvkit / utilities / in2csv.py View on Github external
import agate
import agatedbf  # noqa
import agateexcel  # noqa
import openpyxl
import six
import xlrd

from csvkit import convert
from csvkit.convert.fixed import fixed2csv
from csvkit.convert.geojs import geojson2csv
from csvkit.cli import CSVKitUtility

SUPPORTED_FORMATS = ['csv', 'dbf', 'fixed', 'geojson', 'json', 'ndjson', 'xls', 'xlsx']


class In2CSV(CSVKitUtility):
    description = 'Convert common, but less awesome, tabular data formats to CSV.'
    epilog = 'Some command-line flags only pertain to specific input formats.'
    # The utility handles the input file.
    override_flags = ['f']

    def add_arguments(self):
        # I feel that there ought to be a better way to do this across Python 2 and 3.
        def option_parser(bytestring):
            if six.PY2:
                return bytestring.decode(sys.getfilesystemencoding())
            else:
                return bytestring

        self.argparser.add_argument(metavar='FILE', nargs='?', dest='input_path',
                                    help='The CSV file to operate on. If omitted, will accept input as piped data via STDIN.')
        self.argparser.add_argument('-f', '--format', dest='filetype',
github wireservice / csvkit / csvkit / utilities / csvlook.py View on Github external
#!/usr/bin/env python

import agate

from csvkit.cli import CSVKitUtility


class CSVLook(CSVKitUtility):
    description = 'Render a CSV file in the console as a Markdown-compatible, fixed-width table.'
    buffers_input = True

    def add_arguments(self):
        self.argparser.add_argument('--max-rows', dest='max_rows', type=int,
                                    help='The maximum number of rows to display before truncating the data.')
        self.argparser.add_argument('--max-columns', dest='max_columns', type=int,
                                    help='The maximum number of columns to display before truncating the data.')
        self.argparser.add_argument('--max-column-width', dest='max_column_width', type=int,
                                    help='Truncate all columns to at most this width. The remainder will be replaced with ellipsis.')
        self.argparser.add_argument('-y', '--snifflimit', dest='sniff_limit', type=int,
                                    help='Limit CSV dialect sniffing to the specified number of bytes. Specify "0" to disable sniffing entirely.')
        self.argparser.add_argument('-I', '--no-inference', dest='no_inference', action='store_true',
                                    help='Disable type inference when parsing the input.')

    def main(self):