Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if self.samplemethod == u'sample':
command = u'{}.sample(False, {})'.format(command, self.samplefraction)
if self.maxrows >= 0:
command = u'{}.take({})'.format(command, self.maxrows)
else:
command = u'{}.collect()'.format(command)
# Unicode support has improved in Python 3 so we don't need to encode.
if encode_result:
print_command = '{}.encode("{}")'.format(constants.LONG_RANDOM_VARIABLE_NAME,
conf.pyspark_sql_encoding())
else:
print_command = constants.LONG_RANDOM_VARIABLE_NAME
command = u'for {} in {}: print({})'.format(constants.LONG_RANDOM_VARIABLE_NAME,
command,
print_command)
return Command(command)
def _scala_command(self, sql_context_variable_name):
command = u'{}.sql("""{}""").toJSON'.format(sql_context_variable_name, self.query)
if self.samplemethod == u'sample':
command = u'{}.sample(false, {})'.format(command, self.samplefraction)
if self.maxrows >= 0:
command = u'{}.take({})'.format(command, self.maxrows)
else:
command = u'{}.collect'.format(command)
return Command(u'{}.foreach(println)'.format(command))
def _r_command(self, sql_context_variable_name):
if sql_context_variable_name == 'spark':
command = u'sql("{}")'.format(self.query)
else:
command = u'sql({}, "{}")'.format(sql_context_variable_name, self.query)
if self.samplemethod == u'sample':
command = u'sample({}, FALSE, {})'.format(command, self.samplefraction)
if self.maxrows >= 0:
command = u'take({},{})'.format(command, self.maxrows)
else:
command = u'collect({})'.format(command)
command = u'jsonlite:::toJSON({})'.format(command)
command = u'for ({} in ({})) {{cat({})}}'.format(constants.LONG_RANDOM_VARIABLE_NAME, command, constants.LONG_RANDOM_VARIABLE_NAME)
return Command(command)
def __init__(self, code, spark_events=None):
super(Command, self).__init__()
self.code = textwrap.dedent(code)
self.logger = SparkLog(u"Command")
if spark_events is None:
spark_events = SparkEvents()
self._spark_events = spark_events
def _scala_command(self, sql_context_variable_name):
command = u'{}.sql("""{}""").toJSON'.format(sql_context_variable_name, self.query)
if self.samplemethod == u'sample':
command = u'{}.sample(false, {})'.format(command, self.samplefraction)
if self.maxrows >= 0:
command = u'{}.take({})'.format(command, self.maxrows)
else:
command = u'{}.collect'.format(command)
return Command(u'{}.foreach(println)'.format(command))
if self.samplemethod == u'sample':
command = u'{}.sample(False, {})'.format(command, self.samplefraction)
if self.maxrows >= 0:
command = u'{}.take({})'.format(command, self.maxrows)
else:
command = u'{}.collect()'.format(command)
# Unicode support has improved in Python 3 so we don't need to encode.
if encode_result:
print_command = '{}.encode("{}")'.format(constants.LONG_RANDOM_VARIABLE_NAME,
conf.pyspark_dataframe_encoding())
else:
print_command = constants.LONG_RANDOM_VARIABLE_NAME
command = u'for {} in {}: print({})'.format(constants.LONG_RANDOM_VARIABLE_NAME,
command,
print_command)
return Command(command)