How to use the h2o.nodes function in h2o

To help you get started, we’ve selected a few h2o examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / h2o-2 / py / testdir_single_jvm / test_R_KMeans_basic.py View on Github external
def test_R_C_kmeans_prostate(self):
        print "\nStarting prostate.csv"
        rScript = h2o.find_file('R/tests/test_R_C_kmeans_prostate.R')
        rLibrary = h2o.find_file('R/h2o-package/R/H2O.R')

        # Run k-means with k = 5 on column 2 (Age)
        # Loop to see if we get same centers
        shCmdString = "R -f " + rScript + " --args " + rLibrary + " " + h2o.nodes[0].http_addr + ":" + str(h2o.nodes[0].port)

        (ps, outpath, errpath) =  h2o.spawn_cmd('rtest_with_h2o', shCmdString.split())
        h2o.spawn_wait(ps, outpath, errpath, timeout=10)
github h2oai / h2o-2 / py / testdir_single_jvm / test_exec2_sum.py View on Github external
csvPathname = '1B/reals_100000x1000_15f.data'
            csvPathname = '1B/reals_1B_15f.data'
            csvPathname = '1B/reals_1000000x1000_15f.data'

        hex_key = 'r1'
        parseResult = h2i.import_parse(bucket=bucket, path=csvPathname, schema='local', 
            hex_key=hex_key, timeoutSecs=3000, retryDelaySecs=2)
        inspect = h2o_cmd.runInspect(key=hex_key)
        print "numRows:", inspect['numRows']
        print "numCols:", inspect['numCols']
        inspect = h2o_cmd.runInspect(key=hex_key, offset=-1)
        print "inspect offset = -1:", h2o.dump_json(inspect)

        for execExpr in exprList:
            start = time.time()
            execResult, result = h2e.exec_expr(h2o.nodes[0], execExpr, resultKey=None, timeoutSecs=300)
            print 'exec took', time.time() - start, 'seconds'
            print "result:", result

        h2o.check_sandbox_for_errors()
github h2oai / h2o-2 / py / testdir_single_jvm / test_randomFilter.py View on Github external
def test_randomFilter(self):
        SYNDATASETS_DIR = h2o.make_syn_dir()
        # use SEED so the file isn't cached?
        csvFilenameAll = [
            ('syn_1mx8_' + str(SEED) + '.csv', 'cA', 5),
            ]

        ### csvFilenameList = random.sample(csvFilenameAll,1)
        csvFilenameList = csvFilenameAll
        ### h2b.browseTheCloud()
        lenNodes = len(h2o.nodes)
        for (csvFilename, key2, timeoutSecs) in csvFilenameList:
            SEEDPERFILE = random.randint(0, sys.maxint)
            csvPathname = SYNDATASETS_DIR + '/' + csvFilename
            print "Creating random 1mx8 csv"
            write_syn_dataset(csvPathname, 1000000, SEEDPERFILE)
            # creates csvFilename.hex from file in importFolder dir 
            parseKey = h2o_cmd.parseFile(None, csvPathname, key2=key2, timeoutSecs=2000)

            print csvFilename, 'parse time:', parseKey['response']['time']
            print "Parse result['destination_key']:", parseKey['destination_key']
            inspect = h2o_cmd.runInspect(None, parseKey['destination_key'])
            h2o_cmd.infoFromInspect(inspect, csvPathname)

            print "\n" + csvFilename
            h2e.exec_zero_list(zeroList)
            # does n+1 so use maxCol 6
github h2oai / h2o-2 / py / testdir_multi_jvm / notest_exec2_multi_node.py View on Github external
def test_exec2_multi_node(self):
        for node in h2o.nodes:
            # get this key known to this node
            execExpr = "r0 = c(0); r1 = c(0); r2 = c(0);"
            print "Sending request to node: %s" % node
            h2e.exec_expr(node=node, execExpr=execExpr, timeoutSecs=30)

            # test the store expression
            execExpr = "(r1==0) ? c(0) : c(1)"
            print "Sending request to node: %s" % node
            h2e.exec_expr(node=node, execExpr=execExpr, timeoutSecs=30)

        global OUTSTANDING
        if not OUTSTANDING:
            OUTSTANDING = min(10, len(h2o.nodes))

        execTrial = 0
        worker_resultq = multiprocessing.Queue()
        while execTrial <= TRIALMAX:
            start = time.time()
            workers = []
            for o in range(OUTSTANDING):
                np = execTrial % len(h2o.nodes)
                retryDelaySecs = 5
                timeoutSecs = 60
                bucket = None
                csvPathname = None
                src_key = None
                hex_key = 'a'
                tmp = multiprocessing.Process(target=function_no_keyboard_intr,
                    args=(worker_resultq, execit, np, bucket, csvPathname, src_key, hex_key, timeoutSecs, retryDelaySecs))
github h2oai / h2o-2 / py / testdir_0xdata_only / test_hdfs_cdh4_fvec.py View on Github external
start = time.time()
            hex_key = "a.hex"
            csvPathname = "datasets/" + csvFilename
            parseResult = h2i.import_parse(path=csvPathname, schema='hdfs', hex_key=hex_key, header=0, timeoutSecs=1000)
            print "hdfs parse of", csvPathname, "took", time.time() - start, 'secs'

            start = time.time()
            print "Saving", csvFilename, 'to HDFS'

            print "Using /tmp2 to avoid the '.' prefixed files in /tmp2 (kills import)"
            print "Unique per-user to avoid permission issues"
            username = getpass.getuser()
            # reuse the file name to avoid running out of space
            csvPathname = "tmp2/a%s.%s.csv" % ('_h2o_export_files', username)

            path = "hdfs://"+ h2o.nodes[0].hdfs_name_node + "/" + csvPathname
            h2o.nodes[0].export_files(src_key=hex_key, path=path, force=1, timeoutSecs=timeoutSecs)
            print "export_files of", hex_key, "to", path, "took", time.time() - start, 'secs'
            trial += 1

            print "Re-Loading", csvFilename, 'from HDFS'
            start = time.time()
            hex_key = "a2.hex"
            time.sleep(2)
            d = h2i.import_only(path=csvPathname, schema='hdfs', timeoutSecs=1000)
            print h2o.dump_json(d)
            parseResult = h2i.import_parse(path=csvPathname, schema='hdfs', hex_key=hex_key, header=0, timeoutSecs=1000)
            print "hdfs re-parse of", csvPathname, "took", time.time() - start, 'secs'
github h2oai / h2o-2 / py / testdir_single_jvm / test_import_only_loop.py View on Github external
csvPathname = csvDirname + "/" + csvFilename
            trialStart = time.time()
            # import***************************************** 
            hex_key =  csvFilename + "_" + str(trial) + ".hex"
            start = time.time()
                
            # the import has to overwrite existing keys. no parse
            h2i.import_only(bucket='home-0xdiag-datasets', path=csvPathname, schema='put', hex_key=hex_key,
                timeoutSecs=timeoutSecs, retryDelaySecs=10, pollTimeoutSecs=120, doSummary=False)
            elapsed = time.time() - start
            print "import", trial, "end ", 'took', elapsed, 'seconds',\
                "%d pct. of timeout" % ((elapsed*100)/timeoutSecs)

            # STOREVIEW***************************************
            print "\nTrying StoreView after the import"
            for node in h2o.nodes:
                h2o_cmd.runStoreView(node=node, timeoutSecs=30, view=10000)

            # exec does read lock on all existing keys
            if DO_EXEC:
                # fails
                execExpr="A.hex=c(0,1)"
                # execExpr="A.hex=0;"
                h2e.exec_expr(execExpr=execExpr, timeoutSecs=20)
                h2o_cmd.runInspect(key='A.hex')

            print "\nTrying StoreView after the exec "
            h2o_cmd.runStoreView(timeoutSecs=30, view=10000)
            # for node in h2o.nodes:
            #    h2o_cmd.runStoreView(node=node, timeoutSecs=30, view=10000)

            print "Trial #", trial, "completed in", time.time() - trialStart, "seconds."
github h2oai / h2o-2 / py / testdir_single_jvm / test_rf_predict3_10pct_fvec.py View on Github external
def predict_and_compare_csvs(model_key, translate=None):
            start = time.time()
            predict = h2o.nodes[0].generate_predictions(model_key=model_key,
                data_key=hexKey, destination_key=predictHexKey)
            print "generate_predictions end on ", hexKey, " took", time.time() - start, 'seconds'
            h2o.check_sandbox_for_errors()
            inspect = h2o_cmd.runInspect(key=predictHexKey)
            h2o_cmd.infoFromInspect(inspect, 'predict.hex')

            h2o.nodes[0].csv_download(src_key=predictHexKey, csvPathname=csvPredictPathname)
            h2o.check_sandbox_for_errors()

            print "Do a check of the original output col against predicted output"
            (rowNum1, originalOutput) = compare_csv(csvFullname, col=-1,
                msg="Original", translate=translate, skipHeader=skipSrcHeader)
            (rowNum2, predictOutput)  = compare_csv(csvPredictPathname,  col=0,
                msg="Predicted", skipHeader=True)

            # both source and predict have headers, so no expected mismatch?
            expHeaderMismatch = 0 if skipSrcHeader else 1
            if ((rowNum1+expHeaderMismatch) != rowNum2):
                raise Exception("original rowNum1: %s + %s not same as downloaded predict rowNum2: %s" \
                % (rowNum1, expHeaderMismatch, rowNum2))

            wrong = 0
            for rowNum,(o,p) in enumerate(zip(originalOutput, predictOutput)):
github h2oai / h2o-2 / py / testdir_hosts / glm_bench_gaussian.py View on Github external
def parse_file(f):
    v = h2o.nodes[0].import_files(f)['succeeded'][0]
    return h2o.nodes[0].parse(v['key'],timeoutSecs=3600)['destination_key']
github h2oai / h2o-2 / py / testdir_single_jvm / test_speedrf_covtype.py View on Github external
print "minLeaves:", treeStats['minLeaves']
            print "meanLeaves:", treeStats['meanLeaves']
            print "meanDepth:", treeStats['meanDepth']

            print "errs[0]:", errs[0]
            print "errs[-1]:", errs[-1]
            print "errs:", errs

            (classification_error, classErrorPctList, totalScores) = h2o_rf.simpleCheckRFView(rfv=rfView)
            # we iterate over params, so can't really do this check
            # self.assertAlmostEqual(classification_error, 0.03, delta=0.5, msg="Classification error %s differs too much" % classification_error)

            print "classErrorPctList:", classErrorPctList
            self.assertEqual(len(classErrorPctList), 7, "Should be 7 output classes, so should have 7 class error percentages from a reasonable predict")
            # FIX! should update this expected classification error
            predict = h2o.nodes[0].generate_predictions(model_key=model_key, data_key=data_key)

            eList.append(classErrorPctList[4])
            fList.append(trainElapsed)
            if DO_PLOT:
                if TRY == 'max_depth':
                    xLabel = 'max_depth'
                elif TRY == 'ntrees':
                    xLabel = 'ntrees'
                elif TRY == 'nbins':
                    xLabel = 'nbins'
                else:
                    raise Exception("huh? %s" % TRY)
                xList.append(paramDict[xLabel])

        if DO_PLOT:
            eLabel = 'class 4 pctWrong'
github h2oai / h2o-2 / py / testdir_hosts / mnist8m_RF_bench.py View on Github external
row.update({'trainViewTime':trainViewTime})
        
        h2o_rf.simpleCheckRFView(None, rfView, **kwargs)
        modelKey = rfView['model_key']
        
        #Test File Parsing#
        testParseWallStart = time.time()
        print "Testing file is: ", files['test']
        csvPathname = files['test']
        destKey = files['test'] + '.hex'
        parseKey = h2i.parseImportFolderFile(None,csvPathname,
                           importFolderPath,key2=destKey,
                           timeoutSecs=300,retryDelaySecs=5,pollTimeoutSecs=120)
        testParseWallTime = time.time() - testParseWallStart
        #End Test File Parse#
        inspect = h2o.nodes[0].inspect(parseKey['destination_key'])
        row.update({'nTestRows':inspect['num_rows']})
        row.update({'testParseWallTime':testParseWallTime})
        modelKey = rfView['model_key']
        
        #RFView (score on test)#
        kwargs = configs.copy()
        testRFStart = time.time()
        kwargs.update({'model_key':modelKey,'ntree':10})
        rfView = h2o_cmd.runRFView(data_key=destKey,timeoutSecs=180,
                                       doSimpleCheck=False,**kwargs)
        testViewTime = time.time() - testRFStart
        #End RFView (score on test)#
        pprint(rfView)
        errRate = rfView['confusion_matrix']['classification_error']
        row.update({'testViewTime':testViewTime})
        overallWallTime = time.time() - overallWallStart