How to use the datalab.data function in datalab

To help you get started, we’ve selected a few datalab examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github googledatalab / pydatalab / datalab / utils / commands / _csv.py View on Github external
def _view(args, cell):
  csv = datalab.data.Csv(args['input'])
  num_lines = int(args['count'] or 5)
  headers = None
  if cell:
    ipy = IPython.get_ipython()
    config = _utils.parse_config(cell, ipy.user_ns)
    if 'columns' in config:
      headers = [e.strip() for e in config['columns'].split(',')]
  df = pd.DataFrame(csv.browse(num_lines, headers))
  if args['profile']:
    # TODO(gram): We need to generate a schema and type-convert the columns before this
    # will be useful for CSV
    return _utils.profile_df(df)
  else:
    return IPython.core.display.HTML(df.to_html(index=False))
github googledatalab / pydatalab / datalab / utils / commands / _utils.py View on Github external
first_row/count parameters).

  Raises:
    Exception if the request could not be fulfilled.
  """

  ipy = IPython.get_ipython()
  if env is None:
    env = {}
  env.update(ipy.user_ns)
  if isinstance(source, basestring):
    source = datalab.utils.get_item(ipy.user_ns, source, source)
    if isinstance(source, basestring):
      source = datalab.bigquery.Table(source)

  if isinstance(source, types.ModuleType) or isinstance(source, datalab.data.SqlStatement):
    source = datalab.bigquery.Query(source, values=env)

  if isinstance(source, list):
    if len(source) == 0:
      return _get_data_from_empty_list(source, fields, first_row, count, schema)
    elif isinstance(source[0], dict):
      return _get_data_from_list_of_dicts(source, fields, first_row, count, schema)
    elif isinstance(source[0], list):
      return _get_data_from_list_of_lists(source, fields, first_row, count, schema)
    else:
      raise Exception("To get tabular data from a list it must contain dictionaries or lists.")
  elif isinstance(source, pandas.DataFrame):
    return _get_data_from_dataframe(source, fields, first_row, count, schema)
  elif (isinstance(source, google.datalab.bigquery.Query) or
        isinstance(source, google.datalab.bigquery.Table)):
    return google.datalab.utils.commands._utils.get_data(
github googledatalab / pydatalab / datalab / data / commands / _sql.py View on Github external
else:
      define_wild_match = define_wild_re.match(line)
      if define_wild_match:
        raise Exception('Expected "DEFINE QUERY "')

  if last_def >= 0:
    # We were in a query so save this tail query.
    query = '\n'.join([line for line in lines[last_def:] if len(line)]).strip()
    statement = datalab.data.SqlStatement(query, module)
    module.__dict__[name] = statement
    module.__dict__[datalab.data._utils._SQL_MODULE_LAST] = statement

  if code is None:
    code = ''
  module.__dict__[datalab.data._utils._SQL_MODULE_ARGPARSE] = _arguments(code, module)
  return module.__dict__.get(datalab.data._utils._SQL_MODULE_LAST, None)
github googledatalab / pydatalab / datalab / bigquery / _query.py View on Github external
context = datalab.context.Context.default()
    self._context = context
    self._api = _api.Api(context)
    self._data_sources = data_sources
    self._udfs = udfs

    if data_sources is None:
      data_sources = {}

    self._results = None
    self._code = None
    self._imports = []
    if values is None:
      values = kwargs

    self._sql = datalab.data.SqlModule.expand(sql, values)

    # We need to take care not to include the same UDF code twice so we use sets.
    udfs = set(udfs if udfs else [])
    for value in list(values.values()):
      if isinstance(value, _udf.UDF):
        udfs.add(value)
    included_udfs = set([])

    tokens = datalab.data.tokenize(self._sql)
    udf_dict = {udf.name: udf for udf in udfs}

    for i, token in enumerate(tokens):
      # Find the preceding and following non-whitespace tokens
      prior = i - 1
      while prior >= 0 and tokens[prior].isspace():
        prior -= 1
github googledatalab / pydatalab / datalab / data / commands / _sql.py View on Github external
if define_match or select_match or standard_sql_match:
      # If this is the first query, get the preceding Python code.
      if code is None:
        code = ('\n'.join(lines[:i])).strip()
        if len(code):
          code += '\n'
      elif last_def >= 0:

        # This is not the first query, so gather the previous query text.
        query = '\n'.join([line for line in lines[last_def:i] if len(line)]).strip()
        if select_match and name != datalab.data._utils._SQL_MODULE_MAIN and len(query) == 0:
          # Avoid DEFINE query name\nSELECT ... being seen as an empty DEFINE followed by SELECT
          continue

        # Save the query
        statement = datalab.data.SqlStatement(query, module)
        module.__dict__[name] = statement
        # And set the 'last' query to be this too
        module.__dict__[datalab.data._utils._SQL_MODULE_LAST] = statement

      # Get the query name and strip off our syntactic sugar if appropriate.
      if define_match:
        name = define_match.group(1)
        lines[i] = define_match.group(2)
      else:
        name = datalab.data._utils._SQL_MODULE_MAIN

      # Save the starting line index of the new query
      last_def = i
    else:
      define_wild_match = define_wild_re.match(line)
      if define_wild_match:
github googledatalab / pydatalab / datalab / data / commands / _sql.py View on Github external
name = define_match.group(1)
        lines[i] = define_match.group(2)
      else:
        name = datalab.data._utils._SQL_MODULE_MAIN

      # Save the starting line index of the new query
      last_def = i
    else:
      define_wild_match = define_wild_re.match(line)
      if define_wild_match:
        raise Exception('Expected "DEFINE QUERY "')

  if last_def >= 0:
    # We were in a query so save this tail query.
    query = '\n'.join([line for line in lines[last_def:] if len(line)]).strip()
    statement = datalab.data.SqlStatement(query, module)
    module.__dict__[name] = statement
    module.__dict__[datalab.data._utils._SQL_MODULE_LAST] = statement

  if code is None:
    code = ''
  module.__dict__[datalab.data._utils._SQL_MODULE_ARGPARSE] = _arguments(code, module)
  return module.__dict__.get(datalab.data._utils._SQL_MODULE_LAST, None)