Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _scala_command(self, spark_context_variable_name):
command = u'{}.toJSON'.format(spark_context_variable_name)
if self.samplemethod == u'sample':
command = u'{}.sample(false, {})'.format(command, self.samplefraction)
if self.maxrows >= 0:
command = u'{}.take({})'.format(command, self.maxrows)
else:
command = u'{}.collect'.format(command)
return Command(u'{}.foreach(println)'.format(command))
def _r_command(self, spark_context_variable_name):
command = spark_context_variable_name
if self.samplemethod == u'sample':
command = u'sample({}, FALSE, {})'.format(command,
self.samplefraction)
if self.maxrows >= 0:
command = u'take({},{})'.format(command, self.maxrows)
else:
command = u'collect({})'.format(command)
command = u'jsonlite::toJSON({})'.format(command)
command = u'for ({} in ({})) {{cat({})}}'.format(constants.LONG_RANDOM_VARIABLE_NAME,
command,
constants.LONG_RANDOM_VARIABLE_NAME)
return Command(command)
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
from sparkmagic.livyclientlib.command import Command
from sparkmagic.livyclientlib.exceptions import BadUserDataException
import sparkmagic.utils.constants as constants
from abc import abstractmethod
class SendToSparkCommand(Command):
def __init__(self, input_variable_name, input_variable_value, output_variable_name, spark_events=None):
super(SendToSparkCommand, self).__init__("", spark_events)
self.input_variable_name = input_variable_name
self.input_variable_value = input_variable_value
self.output_variable_name = output_variable_name
def execute(self, session):
try:
command = self.to_command(session.kind, self.input_variable_name, self.input_variable_value, self.output_variable_name)
return command.execute(session)
except Exception as e:
raise e
def to_command(self, kind, input_variable_name, input_variable_value, output_variable_name):
if kind == constants.SESSION_KIND_PYSPARK:
return self._pyspark_command(input_variable_name, input_variable_value, output_variable_name)
def _scala_command(self, input_variable_name, pandas_df, output_variable_name):
self._assert_input_is_pandas_dataframe(input_variable_name, pandas_df)
pandas_json = self._get_dataframe_as_json(pandas_df)
scala_code = u'''
val rdd_json_array = spark.sparkContext.makeRDD("""{}""" :: Nil)
val {} = spark.read.json(rdd_json_array)'''.format(pandas_json, output_variable_name)
return Command(scala_code)
if self.samplemethod == u'sample':
command = u'{}.sample(False, {})'.format(command, self.samplefraction)
if self.maxrows >= 0:
command = u'{}.take({})'.format(command, self.maxrows)
else:
command = u'{}.collect()'.format(command)
# Unicode support has improved in Python 3 so we don't need to encode.
if encode_result:
print_command = '{}.encode("{}")'.format(constants.LONG_RANDOM_VARIABLE_NAME,
conf.pyspark_dataframe_encoding())
else:
print_command = constants.LONG_RANDOM_VARIABLE_NAME
command = u'for {} in {}: print({})'.format(constants.LONG_RANDOM_VARIABLE_NAME,
command,
print_command)
return Command(command)
def _r_command(self, input_variable_name, pandas_df, output_variable_name):
self._assert_input_is_pandas_dataframe(input_variable_name, pandas_df)
pandas_json = self._get_dataframe_as_json(pandas_df)
r_code = u'''
fileConn<-file("temporary_pandas_df_sparkmagics.txt")
writeLines('{}', fileConn)
close(fileConn)
{} <- read.json("temporary_pandas_df_sparkmagics.txt")
{}.persist()
file.remove("temporary_pandas_df_sparkmagics.txt")'''.format(pandas_json, output_variable_name, output_variable_name)
return Command(r_code)
def _r_command(self, input_variable_name, input_variable_value, output_variable_name):
self._assert_input_is_string_type(input_variable_name, input_variable_value)
escaped_input_variable_value = input_variable_value.replace(u'\\', u'\\\\').replace(u'"',u'\\"')
r_code = u'''assign("{}","{}")'''.format(output_variable_name, escaped_input_variable_value)
return Command(r_code)
def execute_spark(self, cell, output_var, samplemethod, maxrows, samplefraction, session_name, coerce):
(success, out, mimetype) = self.spark_controller.run_command(Command(cell), session_name)
if not success:
if conf.spark_statement_errors_are_fatal():
if conf.shutdown_session_on_spark_statement_errors():
self.spark_controller.cleanup()
raise SparkStatementException(out)
self.ipython_display.send_error(out)
else:
if isinstance(out, string_types):
if mimetype == MIMETYPE_TEXT_HTML:
self.ipython_display.html(out)
else:
self.ipython_display.write(out)
else:
self.ipython_display.display(out)
def _scala_command(self, input_variable_name, input_variable_value, output_variable_name):
self._assert_input_is_string_type(input_variable_name, input_variable_value)
scala_code = u'var {} = """{}"""'.format(output_variable_name, input_variable_value)
return Command(scala_code)