How to use the datalab.bigquery.Schema.from_data function in datalab

To help you get started, we’ve selected a few datalab examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github googledatalab / pydatalab / legacy_tests / bigquery / schema_tests.py View on Github external
{'name': 'Column2', 'type': 'FLOAT'},
      {'name': 'Column3', 'type': 'BOOLEAN'},
      {'name': 'Column4', 'type': 'RECORD', 'fields': [
          {'name': 'Column1', 'type': 'STRING'},
          {'name': 'Column2', 'type': 'STRING'},
          {'name': 'Column3', 'type': 'RECORD', 'fields': [
              {'name': 'Column1', 'type': 'INTEGER'},
              {'name': 'Column2', 'type': 'RECORD', 'fields': []}
          ]}
      ]}
    ]

    schema_master = datalab.bigquery.Schema(master)

    with self.assertRaises(Exception) as error1:
      datalab.bigquery.Schema.from_data(variant1)
    if sys.version_info[0] == 3:
      self.assertEquals('Cannot create a schema from heterogeneous list [3, 2.0, True, ' +
                        '[\'cow\', \'horse\', [0, []]]]; perhaps you meant to use ' +
                        'Schema.from_record?', str(error1.exception))
    else:
      self.assertEquals('Cannot create a schema from heterogeneous list [3, 2.0, True, ' +
                        '[u\'cow\', u\'horse\', [0, []]]]; perhaps you meant to use ' +
                        'Schema.from_record?', str(error1.exception))
    with self.assertRaises(Exception) as error2:
      datalab.bigquery.Schema.from_data(variant2)
    if sys.version_info[0] == 3:
      self.assertEquals('Cannot create a schema from dict OrderedDict([(\'Column1\', 3), ' +
                        '(\'Column2\', 2.0), (\'Column3\', True), (\'Column4\', ' +
                        'OrderedDict([(\'Column1\', \'cow\'), (\'Column2\', \'horse\'), ' +
                        '(\'Column3\', OrderedDict([(\'Column1\', 0), (\'Column2\', ' +
                        'OrderedDict())]))]))]); perhaps you meant to use Schema.from_record?',
github googledatalab / pydatalab / legacy_tests / bigquery / schema_tests.py View on Github external
]

    schema_master = datalab.bigquery.Schema(master)

    with self.assertRaises(Exception) as error1:
      datalab.bigquery.Schema.from_data(variant1)
    if sys.version_info[0] == 3:
      self.assertEquals('Cannot create a schema from heterogeneous list [3, 2.0, True, ' +
                        '[\'cow\', \'horse\', [0, []]]]; perhaps you meant to use ' +
                        'Schema.from_record?', str(error1.exception))
    else:
      self.assertEquals('Cannot create a schema from heterogeneous list [3, 2.0, True, ' +
                        '[u\'cow\', u\'horse\', [0, []]]]; perhaps you meant to use ' +
                        'Schema.from_record?', str(error1.exception))
    with self.assertRaises(Exception) as error2:
      datalab.bigquery.Schema.from_data(variant2)
    if sys.version_info[0] == 3:
      self.assertEquals('Cannot create a schema from dict OrderedDict([(\'Column1\', 3), ' +
                        '(\'Column2\', 2.0), (\'Column3\', True), (\'Column4\', ' +
                        'OrderedDict([(\'Column1\', \'cow\'), (\'Column2\', \'horse\'), ' +
                        '(\'Column3\', OrderedDict([(\'Column1\', 0), (\'Column2\', ' +
                        'OrderedDict())]))]))]); perhaps you meant to use Schema.from_record?',
                        str(error2.exception))
    else:
      self.assertEquals('Cannot create a schema from dict OrderedDict([(u\'Column1\', 3), ' +
                        '(u\'Column2\', 2.0), (u\'Column3\', True), (u\'Column4\', ' +
                        'OrderedDict([(u\'Column1\', u\'cow\'), (u\'Column2\', u\'horse\'), ' +
                        '(u\'Column3\', OrderedDict([(u\'Column1\', 0), (u\'Column2\', ' +
                        'OrderedDict())]))]))]); perhaps you meant to use Schema.from_record?',
                        str(error2.exception))
    schema3 = datalab.bigquery.Schema.from_data([variant1])
    schema4 = datalab.bigquery.Schema.from_data([variant2])
github googledatalab / pydatalab / datalab / utils / commands / _utils.py View on Github external
def _get_data_from_dataframe(source, fields='*', first_row=0, count=-1, schema=None):
  """ Helper function for _get_data that handles Pandas DataFrames. """
  if schema is None:
    schema = datalab.bigquery.Schema.from_data(source)
  fields = get_field_list(fields, schema)
  rows = []
  if count < 0:
    count = len(source.index)
  df_slice = source.reset_index(drop=True)[first_row:first_row + count]
  for index, data_frame_row in df_slice.iterrows():
    row = data_frame_row.to_dict()
    for key in list(row.keys()):
      val = row[key]
      if isinstance(val, pandas.Timestamp):
        row[key] = val.to_pydatetime()

    rows.append({'c': [{'v': row[c]} if c in row else {} for c in fields]})
  cols = _get_cols(fields, schema)
  return {'cols': cols, 'rows': rows}, len(source)
github googledatalab / pydatalab / datalab / utils / commands / _utils.py View on Github external
def _get_data_from_list_of_dicts(source, fields='*', first_row=0, count=-1, schema=None):
  """ Helper function for _get_data that handles lists of dicts. """
  if schema is None:
    schema = datalab.bigquery.Schema.from_data(source)
  fields = get_field_list(fields, schema)
  gen = source[first_row:first_row + count] if count >= 0 else source
  rows = [{'c': [{'v': row[c]} if c in row else {} for c in fields]} for row in gen]
  return {'cols': _get_cols(fields, schema), 'rows': rows}, len(source)
github googledatalab / pydatalab / datalab / utils / commands / _utils.py View on Github external
def _get_data_from_list_of_lists(source, fields='*', first_row=0, count=-1, schema=None):
  """ Helper function for _get_data that handles lists of lists. """
  if schema is None:
    schema = datalab.bigquery.Schema.from_data(source)
  fields = get_field_list(fields, schema)
  gen = source[first_row:first_row + count] if count >= 0 else source
  cols = [schema.find(name) for name in fields]
  rows = [{'c': [{'v': row[i]} for i in cols]} for row in gen]
  return {'cols': _get_cols(fields, schema), 'rows': rows}, len(source)