How to use the h2o.dump_json function in h2o

To help you get started, we’ve selected a few h2o examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / h2o-2 / py / testdir_multi_jvm / test_GBM_cancel_model_reuse.py View on Github external
for j in range(5):
                    # FIX! apparently we can't reuse a model key after a cancel
                    kwargs['destination_key'] = 'GBMBad' + str(j)
                    # rjson error in poll_url: Job was cancelled by user!
                    GBMFirstResult = h2o_cmd.runGBM(parseResult=parseResult, noPoll=True, **kwargs)
                    jobids.append(GBMFirstResult['job_key'])
                    h2o.check_sandbox_for_errors()

                    # try ray's 'models' request to see if anything blows up
                    modelsParams = {
                        'key': None,
                        'find_compatible_frames': 0,
                        'score_frame': None
                    }
                    modelsResult = h2o.nodes[0].models(timeoutSecs=10, **modelsParams)
                    print "modelsResult:", h2o.dump_json(modelsResult)

                    
                # have to pass the job id
                # for j in jobids:
                #     h2o.nodes[0].jobs_cancel(key=j)

                h2o_jobs.cancelAllJobs()
                # PUB-361. going to wait after cancel before reusing keys
                time.sleep(3)
                # am I getting a subsequent parse job cancelled?
                h2o_jobs.showAllJobs()

            if DELETE_KEYS:
                h2i.delete_keys_from_import_result(pattern=csvFilename, importResult=importResult)
github h2oai / h2o-2 / py / testdir_single_jvm / test_exec2_runif.py View on Github external
rSummary = h2o_cmd.runSummary(key='s0.hex', cols='0')
        # h2o_cmd.infoFromSummary(rSummary)

        sSummary = h2o_cmd.runSummary(key='s1.hex', cols='0')
        # h2o_cmd.infoFromSummary(sSummary)

        sSummary = h2o_cmd.runSummary(key='s2.hex', cols='0')
        # h2o_cmd.infoFromSummary(sSummary)

        # since there are no NAs in covtype, r.hex and s.hex should be identical?
        if 1==0:
            print "Comparing summary of r.hex to summary of s.hex"
            df = h2o_util.JsonDiff(rSummary, sSummary, with_values=True)
            # time can be different
            print "df.difference:", h2o.dump_json(df.difference)
            self.assertLess(len(df.difference), 2)
        

            print "results from the individual exec expresssions (ignore last which was an apply)"
            print "results:", results
            self.assertEqual(results, [0.0, 0.0, 0.0, 1859.0, 581012.0, 581012.0, 2959.365300544567, 1859.0, 1859.0])
github h2oai / h2o-2 / py / testdir_single_jvm / test_tree_view.py View on Github external
def test_tree_view(self):
        parseResult = h2i.import_parse(bucket='smalldata', path='poker/poker1000', hex_key='poker1000.hex', schema='put')
        h2o_cmd.runRF(parseResult=parseResult, trees=50, model_key="model0", timeoutSecs=10)

        for n in range(1):
            a = h2o_cmd.runRFTreeView(n=n, data_key='poker1000.hex', model_key="model0", timeoutSecs=10)
            print (h2o.dump_json(a))
github h2oai / h2o-2 / py / testdir_single_jvm / test_rf_predict_fvec.py View on Github external
hex_key = 'iris2.csv.hex'
        parseResult = h2i.import_parse(bucket='smalldata', path='iris/iris2.csv', schema='put', hex_key=hex_key)
        h2o_cmd.runRF(parseResult=parseResult, ntrees=trees, destination_key="iris_rf_model", timeoutSecs=timeoutSecs)

        print "Use H2O GeneratePredictionsPage with a H2O generated model and the same data key. Inspect/Summary result"

        start = time.time()
        predict = h2o.nodes[0].generate_predictions(model_key="iris_rf_model", data_key=hex_key, 
            prediction='predict.hex')
        print "generate_predictions end on ", hex_key, " took", time.time() - start, 'seconds'
        print "predict:", h2o.dump_json(predict)
        csvPredictPathname = SYNDATASETS_DIR + "/" + "iris2.predict.csv"
        h2o.nodes[0].csv_download(src_key='predict.hex', csvPathname=csvPredictPathname)

        inspect = h2o_cmd.runInspect(key='predict.hex')
        print "inspect:", h2o.dump_json(inspect)

        # print h2o.dump_json(predict)
        # no min/max any more with enums?

        expectedCols = {
              # "max": 2.0, 
              # "mean": 1.0, 
              # "min": 0.0, 
              "naCnt": 0, 
              # "name": 0, 
              # Enum or real?
              # "type": "Real", 
        }

        predictCols = inspect['cols'][0]
        diffKeys = [k for k in expectedCols if predictCols[k] != expectedCols[k]]
github h2oai / h2o-2 / py / testdir_single_jvm / notest_exec2_fast_locks_overlap.py View on Github external
def test_exec2_fast_locks_overlap(self):
        csvPathname = 'iris/iris2.csv'
        src_key='iris.csv'
        if not AVOID_BUG:
            # need the key name (pattern) to feed to parse)
            (importResult, importPattern)  = h2i.import_only(bucket='smalldata', path=csvPathname, schema='put', 
                src_key=src_key, timeoutSecs=10)
            # just as a reminder of what these returns look like
            print "importResult:", h2o.dump_json(importResult)
            print "importPattern:", h2o.dump_json(importPattern)
        y = 4

        lastHexKey = None
        for trial in range (1, 100):
            if AVOID_BUG:
                # need the key name (pattern) to feed to parse)
                (importResult, importPattern)  = h2i.import_only(bucket='smalldata', path=csvPathname, schema='put', 
                    src_key=src_key, timeoutSecs=10)
                # just as a reminder of what these returns look like
                print "importResult:", h2o.dump_json(importResult)
                print "importPattern:", h2o.dump_json(importPattern)

            # make sure each parse is unique dest key (not in use)
            hex_key = "iris2_" + str(trial) + ".hex"
            # what if we kicked off another parse without waiting for it? I think the src key gets locked
            # so we'd get lock issues on the src_key
github h2oai / h2o-2 / py / testdir_multi_jvm / test_iostatus.py View on Github external
def test_iostatus(self):
        # wait a bit first?
        time.sleep(5)
        # Ask each node for iostatus statistics
        for node in h2o.nodes:
            stats = node.iostatus()
            h2o.verboseprint(h2o.dump_json(stats))
            histogram = stats['histogram'] 
# {
# u'i_o': u'TCP', 
# u'peak_bytes_/_sec': 199690496.78920883, 
# u'effective_bytes_/_sec': 21850666.666666668, 
# u'r_w': u'write', 
# u'cloud_node_idx': 2, 
# u'window': 10
# }
            print "\nProbing node:", str(node.h2o_addr) + ":" + str(node.port)
            for k in histogram:
                ### print k
                if k['window'] == 10:
                    i_o = k['i_o']
                    node = k['cloud_node_idx']
                    r_w = k['r_w']
github h2oai / h2o-2 / py / testdir_single_jvm / test_rf_model_key_unique_fvec.py View on Github external
else:
                csvPathname = 'iris/iris2.csv'
            start = time.time()
            parseResult = h2i.import_parse(bucket='smalldata', path=csvPathname, schema='put')
            rfResult = h2o_cmd.runRF(parseResult=parseResult, trees=6, timeoutSecs=10, rfView=False)
            print "RF #%d" % trial,  "started on ", csvPathname, 'took', time.time() - start, 'seconds'
            print "rfResult", h2o.dump_json(rfResult)
            model_key = rfResult['destination_key']
            print "model_key:", model_key
            if model_key in modelKeyDict:
                raise Exception("same model_key used in RF #%d that matches prior RF #%d" % (trial, modelKeyDict[model_key]))
            modelKeyDict[model_key] = trial

        # just show the jobs still going, if any. maybe none, because short (iris)
        a = h2o.nodes[0].jobs_admin()
        print "jobs_admin():", h2o.dump_json(a)
github h2oai / h2o-2 / py / testdir_release / c6 / test_c6_maprfs_fvec.py View on Github external
csvFilenameList = random.sample(csvFilenameAll,8)
        # Alternatively: do the list in order! Note the order is easy to hard
        else:
            csvFilenameList = csvFilenameAll

        # save the first, for all comparisions, to avoid slow drift with each iteration
        importFolderPath = "datasets"
        trial = 0
        for csvFilename in csvFilenameList:
            # creates csvFilename.hex from file in hdfs dir 
            csvPathname = importFolderPath + "/" + csvFilename

            timeoutSecs = 1000
            # do an import first, because we want to get the size of the file
            (importResult, importPattern) = h2i.import_only(path=csvPathname, schema="maprfs", timeoutSecs=timeoutSecs)
            print "importResult:", h2o.dump_json(importResult)
            succeeded = importResult['files']
            fails = importResult['fails']

            if len(succeeded) < 1:
                raise Exception("Should have imported at least 1 key for %s" % csvPathname)

            # just do a search
            foundIt = None
            for f in succeeded:
                if csvPathname in f:
                    foundIt = f
                    break

            if not foundIt:
                raise Exception("Should have found %s in the imported keys for %s" % (importPattern, csvPathname))
github h2oai / h2o-2 / py / testdir_kevin / test_parse_specific_case1.py View on Github external
def test_parse_specific_case1(self):
        SYNDATASETS_DIR = h2o.make_syn_dir()
        hex_key = "a.hex"

        for (dataset, expNumRows, expNumCols, expNaCnt, expType) in tryList:
            csvFilename = 'specific_' + str(expNumRows) + "x" + str(expNumCols) + '.csv'
            csvPathname = SYNDATASETS_DIR + '/' + csvFilename
            write_syn_dataset(csvPathname, dataset)

            parseResult = h2i.import_parse(path=csvPathname, schema='put', header=0,
                hex_key=hex_key, timeoutSecs=10, doSummary=False)
            inspect = h2o_cmd.runInspect(None, parseResult['destination_key'], timeoutSecs=60)
            
            print "inspect:", h2o.dump_json(inspect)
            numRows = inspect['numRows']
            self.assertEqual(numRows, expNumRows, msg='Wrong numRows: %s Expected: %s' % (numRows, expNumRows))
            numCols = inspect['numCols']
            self.assertEqual(numCols, expNumCols, msg='Wrong numCols: %s Expected: %s' % (numCols, expNumCols))

            # this is required for the test setup
            assert(len(expNaCnt)>=expNumCols)
            assert(len(expType)>=expNumCols)

            for k in range(expNumCols):
                naCnt = inspect['cols'][k]['naCnt']
                self.assertEqual(expNaCnt[k], naCnt, msg='col %s naCnt %d should be %s' % (k, naCnt, expNaCnt[k]))
                stype = inspect['cols'][k]['type']
                self.assertEqual(expType[k], stype, msg='col %s type %s should be %s' % (k, stype, expType[k]))
github h2oai / h2o-2 / py / testdir_single_jvm / test_bayes_rand2.py View on Github external
'response': response, 
                }

            colX = h2o_util.pickRandParams(paramDict, params)
            kwargs = params.copy()

            timeoutSecs = 120
            # chagne response to factor
            execExpr = 'covtype.hex[,54+1] = factor(covtype.hex[,54+1] != 5)' # turn 7-class problem into binomial such that AUC can work below..
            resultExec, ncols = h2e.exec_expr(execExpr=execExpr)

            start = time.time()
            bayesResult = h2o.nodes[0].naive_bayes(timeoutSecs=timeoutSecs, source='covtype.hex', **kwargs)
            print "bayes end on ", csvPathname, 'took', time.time() - start, 'seconds'

            print "bayes result:", h2o.dump_json(bayesResult)

            nb_model = bayesResult['nb_model']
            ncats = nb_model['ncats']
            nnums = nb_model['nnums']
            pcond = nb_model['pcond']
            pprior = nb_model['pprior']
            rescnt = nb_model['rescnt']
            modelClassDist = nb_model['_modelClassDist']
            names = nb_model['_names']
            domains = nb_model['_domains']
            priorClassDist = nb_model['_priorClassDist']
            model_key = nb_model['_key']


            # is it an error to get std dev of 0 after predicting?
            print "Doing predict with same dataset, and the bayes model"