How to use the h2o.find_file function in h2o

To help you get started, we’ve selected a few h2o examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / h2o-2 / py / test_junit.py View on Github external
def test_A_all_junit(self):
        try:
            h2o.build_cloud(node_count=2, java_heap_GB=2)

            # we don't have the port or ip configuration here
            # that util/h2o.py does? Keep this in synch with spawn_h2o there.
            # also don't have --nosigar here?
            (ps, stdout, stderr) = h2o.spawn_cmd('junit', [
                    'java',
                    '-Xms2G',
                    '-Xmx2G',
                    '-Dh2o.arg.ice_root='+h2o.tmp_dir('ice.'),
                    '-Dh2o.arg.name='+h2o.cloud_name(),
                    '-Dh2o.arg.ip='+h2o.get_ip_address(),
                    '-Dh2o.arg.port=54666',
                    '-ea', '-jar', h2o.find_file('target/h2o.jar'),
                    '-mainClass', 'org.junit.runner.JUnitCore',
                    # The all test suite
                    'water.suites.AllTestsSuite'
                   ])

            rc = ps.wait(None)
            out = file(stdout).read()
            err = file(stderr).read()
            if rc is None:
                ps.terminate()
                raise Exception("junit timed out.\nstdout:\n%s\n\nstderr:\n%s" % (out, err))
            elif rc != 0:
                raise Exception("junit failed.\nstdout:\n%s\n\nstderr:\n%s" % (out, err))

        finally:
            h2o.tear_down_cloud()
github h2oai / h2o-2 / py / testdir_hosts / test_rf_parity_fvec.py View on Github external
def test_rf_parity_fvec(self):
        # Create a directory for the created dataset files. ok if already exists
        SYNDATASETS_DIR = h2o.make_syn_dir()
        # always match the run below!
        print "\nGenerating some large row count parity datasets in", SYNDATASETS_DIR,
        print "\nmay be a minute.........."
        for x in xrange (161,240,20):
            # more rows!
            y = 10000 * x
            # Have to split the string out to list for pipe
            shCmdString = "perl " + h2o.find_file("syn_scripts/parity.pl") + " 128 4 "+ str(y) + " quad " + SYNDATASETS_DIR
            # FIX! as long as we're doing a couple, you'd think we wouldn't have to 
            # wait for the last one to be gen'ed here before we start the first below.
            # UPDATE: maybe EC2 takes a long time to spawn a process?
            h2o.spawn_cmd_and_wait('parity.pl', shCmdString.split(),timeout=90)
            # the algorithm for creating the path and filename is hardwired in parity.pl..i.e
            csvFilename = "parity_128_4_" + str(x) + "_quad.data"  
            sys.stdout.write('.')
            sys.stdout.flush()
        print "\nDatasets generated. Using."

        # always match the gen above!
        # Let's try it twice!
        for trials in xrange(1,7):
            # prime
            trees = 2
github h2oai / h2o-2 / py / testdir_single_jvm / test_R_KMeans_basic.py View on Github external
def test_R_B_kmeans_benign(self):
        print "\nStarting benign.csv"
        rScript = h2o.find_file('R/tests/test_R_B_kmeans_benign.R')
        rLibrary = h2o.find_file('R/h2o-package/R/H2O.R')

        # Run k-means with k = 3 on all columns
        # Loop to see if we get same centers
        shCmdString = "R -f " + rScript + " --args " + rLibrary + " " + h2o.nodes[0].http_addr + ":" + str(h2o.nodes[0].port)

        (ps, outpath, errpath) =  h2o.spawn_cmd('rtest_with_h2o', shCmdString.split())
        h2o.spawn_wait(ps, outpath, errpath, timeout=10)
github h2oai / h2o-2 / py / testdir_single_jvm / test_R_RF_basic.py View on Github external
def test_R_RF_diff_class(self):
        print "\nStarting iris.csv class weight test"
        rScript = h2o.find_file('R/tests/test_R_RF_diff_class.R')
        rLibrary = h2o.find_file('R/H2O_Load.R')

        # Compare results from different class weights
        shCmdString = "R -f " + rScript + " --args " + rLibrary + " " + h2o.nodes[0].http_addr + ":" + str(h2o.nodes[0].port)
        
        (ps, outpath, errpath) =  h2o.spawn_cmd('rtest_with_h2o', shCmdString.split())
        rc = h2o.spawn_wait(ps, outpath, errpath, timeout=10)
        if(rc != 0): raise Exception("R exited with non-zero return code %s" % rc)
github h2oai / h2o-2 / py / testdir_single_jvm / test_R_RF_basic.py View on Github external
def test_R_RF_diff_ignore(self):
        print "\nStarting iris.csv ignore predictor(s) test"
        rScript = h2o.find_file('R/tests/test_R_RF_diff_ignore.R')
        rLibrary = h2o.find_file('R/H2O_Load.R')

        # Ignore successively more predictor columns
        shCmdString = "R -f " + rScript + " --args " + rLibrary + " " + h2o.nodes[0].http_addr + ":" + str(h2o.nodes[0].port)
        
        (ps, outpath, errpath) =  h2o.spawn_cmd('rtest_with_h2o', shCmdString.split())
        rc = h2o.spawn_wait(ps, outpath, errpath, timeout=10)
        if(rc != 0): raise Exception("R exited with non-zero return code %s" % rc)
github h2oai / h2o-2 / py / testdir_multi_jvm / test_1ktrees_job_cancel_many.py View on Github external
def test_1ktrees_job_cancel_many(self):
        SYNDATASETS_DIR = h2o.make_syn_dir()

        # always match the run below!
        # just using one file for now
        for x in [1000]:
            shCmdString = "perl " + h2o.find_file("syn_scripts/parity.pl") + " 128 4 "+ str(x) + " quad " + SYNDATASETS_DIR
            h2o.spawn_cmd_and_wait('parity.pl', shCmdString.split(),4)
            csvFilename = "parity_128_4_" + str(x) + "_quad.data"  

        csvFilename = "parity_128_4_" + str(1000) + "_quad.data"  
        csvPathname = SYNDATASETS_DIR + '/' + csvFilename
        hex_key = csvFilename + ".hex"
        parseResult = h2o_cmd.parseResult = h2i.import_parse(path=csvPathname, schema='put', hex_key=hex_key, timeoutSecs=30)

        print "kick off jobs, then cancel them"
        for trial in range (1,50):
            # random 0 or 1 delay
            delay = random.uniform(0,1)
            time.sleep(delay)

            h2o.verboseprint("Trial", trial)
            start = time.time()
github h2oai / h2o-2 / py / testdir_single_jvm / test_import_file.py View on Github external
'smalldata/jira/v-3.csv',
            'smalldata/jira/v-3.csv',
            'smalldata/jira/v-3.csv',
            ]

        # pop open a browser on the cloud
        # h2b.browseTheCloud()

        for c in cAll:

            for i in range(10):
                # interesting. this will pass ../.. to h2o which is legal for h2o
                # but if this is run on a remote machine, we were assuming matching absolute paths
                # not relative to a current wd. I suppose we should test ../.. relative paths
                # but it would be local machine only (means you can't use this with -cj config.json ??
                csvPathname = h2o.find_file('smalldata/jira/v-3.csv')
                # http://172.16.2.222:54321/2/ImportFiles2.json?path=../../smalldata/jira/v-3.csv

                # race between remove and import?
                h2o.nodes[0].remove_all_keys()
                importResult = h2o.nodes[0].import_files(csvPathname, timeoutSecs=15)
                h2o.verboseprint(h2o.dump_json(importResult))
                files = importResult['files']
                keys = importResult['keys']
                fails = importResult['fails']
                dels = importResult['dels']

                if len(files) == 0:
                    raise Exception("empty files: %s after import" % files)
                if len(keys) == 0:
                    raise Exception("empty keys: %s after import" % keys)
                if len(fails) != 0:
github h2oai / h2o-2 / py / testdir_single_jvm / test_small_parse_sequential_same_dest.py View on Github external
def test_small_parse_overlap_same_dest(self):
		noPoll = False
		timeoutSecs = 180
		num_trials = 0
		trial_max = 100
		while num_trials < trial_max:
			num_trials += 1
			csvPathname = h2o.find_file('smalldata/poker')
			csvFilename = csvPathname + '/' + 'poker-hand-testing.data'
			key = csvFilename
			key2 = csvFilename + '.hex'
			parseKey = h2o_cmd.parseFile(csvPathname=csvFilename, 
    			key=key, key2=key2, timeoutSecs=timeoutSecs, noPoll=noPoll,
    			doSummary=False)
github h2oai / h2o-2 / py / testdir_multi_jvm / test_putfile_fvec.py View on Github external
def file_to_put():
#TODO handle command line options to allow put arbitratry file
    return h2o.find_file('smalldata/poker/poker-hand-testing.data')
github h2oai / h2o-2 / py / testdir_single_jvm / test_big_parse_overlap_same_dest_del_nopoll.py View on Github external
def test_big_parse_overlap_same_dest_del_nopoll(self):
        noPoll = True
        timeoutSecs = 180
        num_trials = 0
        trial_max = 100
        while num_trials < trial_max:
            num_trials += 1
            csvPathname = h2o.find_file('smalldata/mnist')
            csvFilename = csvPathname + '/' + 'mnist8m-test-1.csv'
            key = csvFilename
            key2 = csvFilename + '.hex'
            parseKey = h2o_cmd.parseFile(csvPathname=csvFilename, 
                key=key, key2=key2, timeoutSecs=timeoutSecs, noPoll=noPoll,
                doSummary=False)
            node = h2o.nodes[0]
            node.remove_key(key)
            node.remove_key(key2)