How to use the datalab.bigquery.Query function in datalab

To help you get started, we’ve selected a few datalab examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github googledatalab / pydatalab / legacy_tests / bigquery / udf_tests.py View on Github external
def test_sql_building(self):
    context = self._create_context()
    table = datalab.bigquery.Table('test:requestlogs.today', context=context)

    udf = self._create_udf()
    query = datalab.bigquery.Query('SELECT * FROM foo($t)', t=table, udfs=[udf], context=context)

    expected_js = '\nfoo=function(r,emit) { emit({output1: r.field2, output2: r.field1 }); };\n' +\
                  'bigquery.defineFunction(\'foo\', ["field1", "field2"], ' +\
                  '[{"name": "output1", "type": "integer"}, ' +\
                  '{"name": "output2", "type": "string"}], foo);'
    self.assertEqual(query.sql, 'SELECT * FROM '
                                '(SELECT output1, output2 FROM foo([test:requestlogs.today]))')
    self.assertEqual(udf._code, expected_js)
github googledatalab / pydatalab / legacy_tests / bigquery / query_tests.py View on Github external
def _create_query(sql=None):
    if sql is None:
      sql = 'SELECT * ...'
    return datalab.bigquery.Query(sql, context=TestCases._create_context())
github googledatalab / pydatalab / datalab / ml / _feature_slice_view.py View on Github external
Args:
      data: Can be one of:
            A string of sql query.
            A sql query module defined by "%%sql --module module_name".
            A pandas DataFrame.
          Regardless of data type, it must include the following columns:
            "feature": identifies a slice of features. For example: "petal_length:4.0-4.2".
            "count": number of instances in that slice of features.
          All other columns are viewed as metrics for its feature slice. At least one is required.
    """    
    import IPython

    if isinstance(data, ModuleType) or isinstance(data, basestring):
      item, _ = datalab.data.SqlModule.get_sql_statement_with_environment(data, {})
      query = datalab.bigquery.Query(item)
      df = query.results().to_dataframe()
      data = self._get_lantern_format(df)
    elif isinstance(data, pd.core.frame.DataFrame):
      data = self._get_lantern_format(data)
    else:
      raise Exception('data needs to be a sql query, or a pandas DataFrame.')
      
    HTML_TEMPLATE = """
github googledatalab / pydatalab / datalab / data / _csv.py View on Github external
def _get_gcs_csv_row_count(self, federated_table):
    import datalab.bigquery as bq
    results = bq.Query('SELECT count(*) from data',
                       data_sources={'data': federated_table}).results()
    return results[0].values()[0]
github googledatalab / pydatalab / datalab / bigquery / commands / _bigquery.py View on Github external
args: the dictionary of magic arguments.
    cell: the cell contents which can be variable value overrides (if args has a 'query'
        value) or inline SQL otherwise.
    env: a dictionary that is used for looking up variable values.
  Returns:
    A Query object.
  """
  sql_arg = args.get('query', None)
  if sql_arg is None:
    # Assume we have inline SQL in the cell
    if not isinstance(cell, basestring):
      raise Exception('Expected a --query argument or inline SQL')
    return datalab.bigquery.Query(cell, values=env)

  item = datalab.utils.commands.get_notebook_item(sql_arg)
  if isinstance(item, datalab.bigquery.Query):  # Queries are already expanded.
    return item

  # Create an expanded BQ Query.
  config = datalab.utils.commands.parse_config(cell, env)
  item, env = datalab.data.SqlModule.get_sql_statement_with_environment(item, config)
  if cell:
    env.update(config)  # config is both a fallback and an override.
  return datalab.bigquery.Query(item, values=env)
github googledatalab / pydatalab / datalab / data / commands / _sql.py View on Github external
DEFINE QUERY 

  on a line by itself.

  Args:
    args: the optional arguments following '%%sql'.
    cell: the contents of the cell; Python code for arguments followed by SQL queries.
  """
  name = args['module'] if args['module'] else '_sql_cell'
  module = imp.new_module(name)
  query = _split_cell(cell, module)
  ipy = IPython.get_ipython()
  if not args['module']:
      # Execute now
      if query:
        return datalab.bigquery.Query(query, values=ipy.user_ns) \
          .execute(dialect=args['dialect'], billing_tier=args['billing']).results
  else:
    # Add it as a module
    sys.modules[name] = module
    exec('import %s' % name, ipy.user_ns)