Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def calculate_xgboost_classifier_output_shapes(operator):
check_input_and_output_numbers(operator, input_count_range=1, output_count_range=2)
check_input_and_output_types(operator, good_input_types=[FloatTensorType, Int64TensorType])
N = operator.inputs[0].type.shape[0]
xgb_node = operator.raw_operator
params = get_xgb_params(xgb_node)
booster = xgb_node.get_booster()
atts = booster.attributes()
ntrees = len(booster.get_dump(with_stats=True, dump_format = 'json'))
objective = params["objective"]
if objective == "binary:logistic":
ncl = 2
else:
ncl = ntrees // params['n_estimators']
if objective == "reg:logistic" and ncl == 1:
ncl = 2
def calculate_sparkml_polynomial_expansion_output_shapes(operator):
check_input_and_output_numbers(operator, output_count_range=1)
check_input_and_output_types(operator, good_input_types=[
FloatTensorType, Int64TensorType])
N = operator.inputs[0].type.shape[0]
C = get_combinations_count(operator.inputs[0].type.shape[1], operator.raw_operator.getDegree())
operator.outputs[0].type = copy.deepcopy(operator.inputs[0].type)
operator.outputs[0].type.shape = [N, C]
def calculate_sklearn_scaler_output_shapes(operator):
'''
Allowed input/output patterns are
1. [N, C_1], ..., [N, C_n] ---> [N, C_1 + ... + C_n]
Similar to imputer, this operator can take multiple input feature tensors and concatenate them along C-axis.
'''
check_input_and_output_numbers(operator, input_count_range=[1, None], output_count_range=1)
check_input_and_output_types(operator, good_input_types=[FloatTensorType, Int64TensorType],
good_output_types=[FloatTensorType])
# Inputs: multiple float- and integer-tensors
# Output: one float tensor
for variable in operator.inputs:
if len(variable.type.shape) != 2:
raise RuntimeError('Only 2-D tensor(s) can be input(s)')
if len(set(variable.type.shape[0] for variable in operator.inputs)) > 1:
raise RuntimeError('Batch size must be identical across inputs')
N = operator.inputs[0].type.shape[0]
C = 0
for variable in operator.inputs:
if isinstance(variable.type.shape[1], numbers.Integral):
C += variable.type.shape[1]
else:
def calculate_sparkml_string_indexer_output_shapes(operator):
'''
This function just copy the input shape to the output because label encoder only alters input features' values, not
their shape.
'''
check_input_and_output_numbers(operator, output_count_range=1)
check_input_and_output_types(operator, good_input_types=[Int64TensorType, StringTensorType])
input_shape = copy.deepcopy(operator.inputs[0].type.shape)
operator.outputs[0].type = Int64TensorType(input_shape)
def calculate_sparkml_naive_bayes_output_shapes(operator):
check_input_and_output_numbers(operator, output_count_range=2)
check_input_and_output_types(operator,
good_input_types=[FloatTensorType],
good_output_types=[FloatTensorType,FloatTensorType])
N = operator.inputs[0].type.shape[0]
C = operator.raw_operator.numClasses
operator.outputs[0].type = FloatTensorType([N, 1])
operator.outputs[1].type = FloatTensorType([N, C])
def calculate_word2vec_output_shapes(operator):
check_input_and_output_numbers(operator, output_count_range=1)
check_input_and_output_types(operator, good_input_types=[StringTensorType])
N = operator.inputs[0].type.shape[0]
if N != 1:
raise SparkMlConversionError('Word2Vec converter cannot handle batch size of more than 1')
C = operator.raw_operator.getOrDefault('vectorSize')
operator.outputs[0].type = FloatTensorType([N, C])
def calculate_gbt_classifier_output_shapes(operator):
check_input_and_output_numbers(operator, input_count_range=1, output_count_range=[1, 2])
check_input_and_output_types(operator, good_input_types=[FloatTensorType, Int64TensorType])
if len(operator.inputs[0].type.shape) != 2:
raise RuntimeError('Input must be a [N, C]-tensor')
N = operator.inputs[0].type.shape[0]
operator.outputs[0].type = Int64TensorType(shape=[N])
if isinstance(operator.raw_operator, GBTClassificationModel):
class_count = 2
operator.outputs[1].type = FloatTensorType([N, class_count])
def calculate_sparkml_scaler_output_shapes(operator):
check_input_and_output_numbers(operator, output_count_range=1)
check_input_and_output_types(operator, good_input_types=[FloatTensorType, Int64TensorType])
input_shape = copy.deepcopy(operator.inputs[0].type.shape)
operator.outputs[0].type = FloatTensorType(input_shape)
def calculate_sparkml_pca_output_shapes(operator):
check_input_and_output_numbers(operator, output_count_range=1)
check_input_and_output_types(operator, good_input_types=[FloatTensorType])
N = operator.inputs[0].type.shape[0]
operator.outputs[0].type = FloatTensorType([N, operator.raw_operator.getOrDefault('k')])
def calculate_decision_tree_regressor_output_shapes(operator):
check_input_and_output_numbers(operator, input_count_range=1, output_count_range=1)
N = operator.inputs[0].type.shape[0]
operator.outputs[0].type = FloatTensorType(shape=[N, 1])