How to use the bigml.io.UnicodeWriter function in bigml

To help you get started, we’ve selected a few bigml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bigmlcom / bigmler / bigmler / analyze / k_fold_cv.py View on Github external
def best_first_search(datasets_file, api, args, command_obj,
                      staleness=None, penalty=None, objective_name=None,
                      resume=False):
    """Selecting the fields to be used in the model construction

    """
    counter = 0
    loop_counter = 0
    features_file = os.path.normpath(os.path.join(args.output_dir,
                                                  FEATURES_LOG))
    features_writer = UnicodeWriter(features_file).open_writer()
    features_header = FEATURES_HEADER
    if staleness is None:
        staleness = DEFAULT_STALENESS
    if penalty is None:
        penalty = DEFAULT_PENALTY
    # retrieving the first dataset in the file
    try:
        with open(datasets_file, u.open_mode("r")) as datasets_handler:
            dataset_id = datasets_handler.readline().strip()
    except IOError, exc:
        sys.exit("Could not read the generated datasets file: %s" %
                 str(exc))
    try:
        stored_dataset = u.storage_file_name(args.output_dir, dataset_id)
        with open(stored_dataset, u.open_mode("r")) as dataset_handler:
            dataset = json.loads(dataset_handler.read())
github bigmlcom / bigmler / bigmler / forecast.py View on Github external
"""Writes the final forecast to the required output

    The function creates a new file per field used in the forecast input data.
    The id of the field will be appended to the name provided in the `output`
    parameter.
    """

    for objective_id, forecast_value in forecast.items():
        headers = [f["model"] for f in forecast_value]
        points = []
        if not forecast_value:
            sys.exit("No forecasts available")
        for index in range(len(forecast_value[0]["point_forecast"])):
            points.append([f["point_forecast"][index] for f in forecast_value])
        output_file = "%s_%s.csv" % (output, objective_id)
        with UnicodeWriter(output_file, lineterminator="\n") as out_handler:
            out_handler.writerow(headers)
            for row in points:
                out_handler.writerow(row)
github bigmlcom / bigmler / bigmler / analyze / k_fold_cv.py View on Github external
def best_candidates_number(datasets_file, args, command_obj,
                           penalty=None,
                           resume=False):
    """Selecting the best number of random candidates
       to be used in the ensemble construction

    """
    loop_counter = 0
    candidates_file = os.path.normpath(os.path.join(args.output_dir,
                                                    CANDIDATES_LOG))
    candidates_writer = UnicodeWriter(candidates_file).open_writer()
    candidates_writer.writerow(CANDIDATES_HEADER)
    args.output_dir = os.path.normpath(os.path.join(args.output_dir,
                                                    "random"))
    max_candidates = args.max_candidates + 1

    if args.nodes_step is None:
        args.nodes_step = DEFAULT_CANDIDATES_STEP
    random_candidates = args.min_candidates

    if penalty is None:
        penalty = DEFAULT_CANDIDATES_PENALTY
    best_score = - float('inf')
    metric = args.optimize
    score = best_score
    best_counter = 0
    while random_candidates < max_candidates:
github bigmlcom / bigmler / bigmler / prediction.py View on Github external
prediction_file = UnicodeWriter(prediction_file).open_writer()
    for model in models:
        model = bigml.api.get_model_id(model)
        predictions_file = get_predictions_file_name(model,
                                                     output_path)
        predictions_files.append(predictions_file)
        if (not resume or
                not c.checkpoint(c.are_predictions_created, predictions_file,
                                 test_reader.number_of_tests(),
                                 debug=args.debug)[0]):
            if not message_logged:
                message = u.dated("Creating remote predictions.\n")
                u.log_message(message, log_file=session_file,
                              console=args.verbosity)
            message_logged = True
            with UnicodeWriter(predictions_file) as predictions_file:
                for input_data in raw_input_data_list:
                    input_data_dict = test_reader.dict(input_data)
                    prediction = api.create_prediction(model, input_data_dict,
                                                       args=prediction_args)
                    u.check_resource_error(prediction,
                                           "Failed to create prediction: ")
                    u.log_message("%s\n" % prediction['resource'],
                                  log_file=log)
                    prediction_row = prediction_to_row(prediction)
                    predictions_file.writerow(prediction_row)
                    if single_model:
                        write_prediction(prediction_row[0:2], prediction_file,
                                         args.prediction_info,
                                         input_data, exclude)
    if single_model:
        prediction_file.close_writer()
github bigmlcom / bigmler / bigmler / topicdistribution.py View on Github external
def topic_distribution(topic_models, fields, args, session_file=None):
    """Computes a topic distribution for each entry in the `test_set`.

    """
    test_set = args.test_set
    test_set_header = args.test_header
    output = args.predictions
    test_reader = TestReader(test_set, test_set_header, fields,
                             None,
                             test_separator=args.test_separator)
    with UnicodeWriter(output, lineterminator="\n") as output:
        # columns to exclude if input_data is added to the prediction field
        exclude, headers = use_prediction_headers(
            test_reader, fields, args)

        # Local topic distributions: Topic distributions are computed
        # locally using topic models'
        # method
        message = u.dated("Creating local topic distributions.\n")
        u.log_message(message, log_file=session_file, console=args.verbosity)
        local_topic_distribution(topic_models, test_reader, output,
                                 args, exclude=exclude, headers=headers)
    test_reader.close()
github bigmlcom / bigmler / bigmler / logrprediction.py View on Github external
def prediction(models, fields, args, session_file=None):
    """Computes a supervised model prediction
    for each entry in the `test_set`.

    """
    test_set = args.test_set
    test_set_header = args.test_header
    output = args.predictions
    test_reader = TestReader(test_set, test_set_header, fields,
                             None,
                             test_separator=args.test_separator)
    with UnicodeWriter(output, lineterminator="\n") as output:
        # columns to exclude if input_data is added to the prediction field
        exclude = use_prediction_headers(
            args.prediction_header, output, test_reader, fields, args,
            args.objective_field, quality="probability")

        # Local predictions: Predictions are computed locally
        message = u.dated("Creating local predictions.\n")
        u.log_message(message, log_file=session_file, console=args.verbosity)
        local_prediction(models, test_reader,
                         output, args, exclude=exclude)
    test_reader.close()
github bigmlcom / bigmler / bigmler / prediction.py View on Github external
def combine_votes(votes_files, to_prediction, to_file, method=0,
                  prediction_info=NORMAL_FORMAT, input_data_list=None,
                  exclude=None):
    """Combines the votes found in the votes' files and stores predictions.

       votes_files: should contain the list of file names
       to_prediction: is the Model method that casts prediction to numeric
                      type if needed
       to_file: is the name of the final output file.
    """
    votes = read_votes(votes_files, to_prediction)

    u.check_dir(to_file)
    with UnicodeWriter(to_file) as output:
        number_of_tests = len(votes)
        if input_data_list is None or len(input_data_list) != number_of_tests:
            input_data_list = None
        for index in range(0, number_of_tests):
            multivote = votes[index]
            input_data = (None if input_data_list is None
                          else input_data_list[index])
            write_prediction(multivote.combine(method, full=True), output,
                             prediction_info, input_data, exclude)
github bigmlcom / bigmler / bigmler / prediction.py View on Github external
"""
    prediction_args = {
        "tags": args.tag,
        "combiner": args.method
    }
    if output_path is None:
        output_path = u.check_dir(prediction_file)

    if (not resume or not c.checkpoint(
            c.are_predictions_created, prediction_file,
            test_reader.number_of_tests(), debug=args.debug)[0]):
        message = u.dated("Creating remote predictions.")
        u.log_message(message, log_file=session_file,
                      console=args.verbosity)

        with UnicodeWriter(prediction_file) as predictions_file:
            for input_data in test_reader:
                input_data_dict = test_reader.dict(input_data)
                prediction = api.create_prediction(ensemble_id,
                                                   input_data_dict,
                                                   wait_time=0,
                                                   args=prediction_args)
                prediction = u.check_resource(prediction,
                                              api.get_prediction)
                u.check_resource_error(prediction,
                                       "Failed to create prediction: ")
                u.log_message("%s\n" % prediction['resource'], log_file=log)
                prediction_row = prediction_to_row(prediction,
                                                   args.prediction_info)
                write_prediction(prediction_row, predictions_file,
                                 args.prediction_info, input_data, exclude)
github bigmlcom / python / bigml / model.py View on Github external
headers_names.append("error")
            for index in range(0, self._max_bins):
                headers_names.append("bin%s_value" % index)
                headers_names.append("bin%s_instances" % index)
        else:
            headers_names.append(
                self.fields[self.tree.objective_id]['name'])
            headers_names.append("confidence")
            headers_names.append("impurity")
            for category, _ in self.tree.distribution:
                headers_names.append(category)

        nodes_generator = self.get_nodes_info(headers_names,
                                              leaves_only=leaves_only)
        if file_name is not None:
            with UnicodeWriter(file_name) as writer:
                writer.writerow([header.encode("utf-8")
                                 for header in headers_names])
                for row in nodes_generator:
                    writer.writerow([item if not isinstance(item, basestring)
                                     else item.encode("utf-8")
                                     for item in row])
        else:
            rows = []
            rows.append(headers_names)
            for row in nodes_generator:
                rows.append(row)
            return rows
github bigmlcom / python / bigml / fields.py View on Github external
def summary_csv(self, filename=None):
        """Summary of the contents of the fields

        """

        summary = []
        writer = None
        if filename is not None:
            writer = UnicodeWriter(filename,
                                   quoting=csv.QUOTE_NONNUMERIC).open_writer()
            writer.writerow(SUMMARY_HEADERS)
        else:
            summary.append(SUMMARY_HEADERS)

        for field_column in self.fields_columns:
            field_id = self.field_id(field_column)
            field = self.fields.get(field_id)
            field_summary = []
            field_summary.append(field.get('column_number'))
            field_summary.append(field_id)
            field_summary.append(field.get('name'))
            field_summary.append(field.get('label'))
            field_summary.append(field.get('description'))
            field_summary.append(field.get('optype'))
            field_summary_value = field.get('summary', {})