How to use the onnxruntime.python.tools.quantization.quantize.QuantizedValue function in onnxruntime

To help you get started, we’ve selected a few onnxruntime examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github microsoft / onnxruntime / onnxruntime / python / tools / quantization / quantize.py View on Github external
qlinear_matmul_inputs.append(scale_names[0])
        qlinear_matmul_inputs.append(zero_point_names[0])
        # Input 1
        qlinear_matmul_inputs.append(quantized_input_names[1])
        qlinear_matmul_inputs.append(scale_names[1])
        qlinear_matmul_inputs.append(zero_point_names[1])
        # Output
        qlinear_matmul_inputs.append(output_scale_name)
        qlinear_matmul_inputs.append(output_zp_name)

        qlinear_matmul_node = onnx.helper.make_node("QLinearMatMul", qlinear_matmul_inputs,
            [qlinear_matmul_output], qlinear_matmul_name)
        nodes.append(qlinear_matmul_node)

        # Create an entry for this quantized value
        q_output = QuantizedValue(node.output[0], qlinear_matmul_output, output_scale_name, output_zp_name, QuantizedValueType.Input)        
        self.quantized_value_map[node.output[0]] = q_output
        
        return nodes
github microsoft / onnxruntime / onnxruntime / python / tools / quantization / quantize.py View on Github external
quantized_per_channel_data_list.append(quantized_per_channel_data)
        channel_index = 0 # (M x C/group x kH x kW)
        # combine per_channel_data into one
        reshape_dims = list(initializer.dims)  # deep copy
        reshape_dims[channel_index] = 1  # only one per channel for reshape
        quantized_weights = np.asarray(quantized_per_channel_data_list[0]).reshape(reshape_dims)
        for i in range(1, len(quantized_per_channel_data_list)):
            channel_weights = np.asarray(quantized_per_channel_data_list[i]).reshape(reshape_dims)
            quantized_weights = np.concatenate((quantized_weights, channel_weights), axis=0)

        weight = QuantizedInitializer(initializer.name, initializer, rmin_list, rmax_list, zero_point_list,
                        scale_list, weights, quantized_weights.flatten().tolist(), channel_index, qType)
        
        # Make entry for this quantized weight
        assert(weight.name not in self.quantized_value_map)
        quantized_value = QuantizedValue(weight.name, weight.name + "_quantized", weight.name + "_scale", weight.name + "_zero_point", QuantizedValueType.Initializer, None, qType)
        self.quantized_value_map[weight.name] = quantized_value

        return weight
github microsoft / onnxruntime / onnxruntime / python / tools / quantization / quantize.py View on Github external
qlinear_conv_inputs.append(scale_names[1])
        qlinear_conv_inputs.append(zero_point_names[1])

        # Output
        qlinear_conv_inputs.append(output_scale_name)
        qlinear_conv_inputs.append(output_zp_name)

        if bias_present:
            qlinear_conv_inputs.append(quantized_bias_name)

        qlinear_conv_node = onnx.helper.make_node("QLinearConv", qlinear_conv_inputs,
            [qlinear_conv_output], qlinear_conv_name, **kwargs)
        nodes.append(qlinear_conv_node)

        # Create an entry for this quantized value
        q_output = QuantizedValue(node.output[0], qlinear_conv_output, output_scale_name, output_zp_name, QuantizedValueType.Input)        
        self.quantized_value_map[node.output[0]] = q_output
        
        return nodes
github microsoft / onnxruntime / onnxruntime / python / tools / quantization / quantize.py View on Github external
def _get_quantized_weight(self, initializer, qType):
        '''
            :param initializer: TensorProto initializer
            :param qType: type to quantize to
            :return: Weight class with quantization information
        '''
        weights_data = self.find_weight_data(initializer)
        rmin, rmax, zero_point, scale, quantized_weights_data = quantize_data(weights_data.flatten().tolist(),
            _get_qrange_for_qType(qType), qType)
        weight = QuantizedInitializer(initializer.name, initializer, [rmin], [rmax], [zero_point], [scale],
                        weights_data, quantized_weights_data, axis=None, qType=qType)

        # Log entry for this quantized weight
        assert(weight.name not in self.quantized_value_map)
        quantized_value = QuantizedValue(weight.name, weight.name + "_quantized", weight.name + "_scale", weight.name + "_zero_point", QuantizedValueType.Initializer, None, qType)
        self.quantized_value_map[weight.name] = quantized_value

        return weight
github microsoft / onnxruntime / onnxruntime / python / tools / quantization / quantize.py View on Github external
#update bias initializer        
            bias_np_data = np.asarray(quantized_data, dtype=np.int32).reshape(bias_initializer.dims)
            packed_bias_initializer = onnx.numpy_helper.from_array(bias_np_data, quantized_bias_name)
            self.model.graph.initializer.extend([packed_bias_initializer])

            bias_value_info = onnx.helper.make_tensor_value_info(quantized_bias_name, onnx_proto.TensorProto.INT32, bias_initializer.dims)
            self.model.graph.input.extend([bias_value_info])

            # log entries for this quantized bias value
            quantized_bias_entry = QuantizedInitializer(bias_name, bias_initializer, [0], [0], [0], [bias_scale],
                            bias_data, quantized_data, qType=onnx_proto.TensorProto.INT32)
            self._quantized_weights.append(quantized_bias_entry)
        
            assert(bias_name not in self.quantized_value_map)
            quantized_value = QuantizedValue(bias_name, quantized_bias_name, "", "", QuantizedValueType.Initializer, None, onnx_proto.TensorProto.INT32)
            self.quantized_value_map[bias_name] = quantized_value

        return quantized_bias_name
github microsoft / onnxruntime / onnxruntime / python / tools / quantization / quantize.py View on Github external
def _quantize_gather_ops(self, node, new_nodes_list):
        assert (node.op_type == "Gather")
        (quantized_input_names, zero_point_names, scale_names, nodes) = \
            self._quantize_inputs(node, [0], new_nodes_list)
        
        gather_new_output = node.output[0] + "_quantized"

        # Create an entry for this quantized value
        q_output = QuantizedValue(node.output[0], gather_new_output, scale_names[0], zero_point_names[0], QuantizedValueType.Input)        
        self.quantized_value_map[node.output[0]] = q_output

        gather_original_output = node.output[0]
        node.output[0] = gather_new_output
        node.input[0] = quantized_input_names[0]
        nodes.append(node)

        return nodes