How to use the toil.realtimeLogger.RealtimeLogger.info function in toil

To help you get started, we’ve selected a few toil examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github edraizen / molmimic / molmimic / generate_data / get_inferred_structural_interactome.py View on Github external
# except (SystemExit, KeyboardInterrupt):
    #     raise
    # except:
    #     try:
    #         observed_interactome = filter_hdf_chunks("IBIS_observed.h5", "table", "obs_int_id", float(mol_sfam_id))
    #     except (SystemExit, KeyboardInterrupt):
    #         raise
    #     except:
    #         job.log("Failed reading IBIS_observed.h5")
    #         return
    sfamFileStoreID = sfamFileStoreIDs[mol_sfam_id]
    obsFilePath = get_file(job, "{}_obs.h5".format(int(mol_sfam_id)),
        sfamFileStoreID, work_dir=work_dir)

    observed_interactome = pd.read_hdf(obsFilePath, "table")
    RealtimeLogger.info("Obs has {} rows".format(observed_interactome.shape))

    # obsFilePath = os.path.join(work_dir, "{0}.observed_interactome".format(int(mol_sfam_id)))
    # out_store.read_input_file("{0}/{0}.observed_interactome".format(int(mol_sfam_id)), obsPath)

    tableInfPath = get_file(job, "IBIS_inferred_{}.h5".format(table), tableInfStoreID)
    # skip_int = set([tuple(map(int, os.path.basename(f)[:-3].split("_"))) for f in out_store.list_input_directory(
    #     "{}/_infrows/Intrac{}".format(int(mol_sfam_id),  table)) if f.endswith(".h5")])
    try:
        inf_int_ids = filter_hdf_chunks(tableInfPath, "Intrac{}".format(table), chunksize=100,
            nbr_superfam_id=mol_sfam_id)
    except (RuntimeError, TypeError):
        job.log("Unable to find sfam {} in table {}, Skipping".format(mol_sfam_id, table))
        return

    #inf_int_ids = set([tuple(row) for row in inf_int_ids.itertuples()])
    #inf_int_ids -= skip_int
github edraizen / molmimic / molmimic / generate_data / calculate_features.py View on Github external
else:
        #pdb_or_key is key
        assert pdb_or_key.count("_") == 3
        key = os.path.splitext(pdb_or_key)[0]
        pdb, chain, sdi, domNo = os.path.basename(key).split("_")
        sdi, domNo = sdi[3:], domNo[1:]

    try:
        pdb_path = os.path.join(work_dir, os.path.basename(key)+".pdb")
        in_store.read_input_file(key+".pdb", pdb_path)

        s = ProteinFeaturizer(pdb_path, pdb, chain, sdi=sdi, domNo=domNo,
            work_dir=work_dir, job=job)

        _, atom_features = s.calculate_flat_features()
        RealtimeLogger.info("Finished atom features")
        _, residue_features = s.calculate_flat_features(course_grained=True)
        RealtimeLogger.info("Finished residue features")
        graph_features = s.calculate_graph()
        RealtimeLogger.info("Finished edge features")

        out_store.write_output_file(atom_features, key+"_atom.npy")
        out_store.write_output_file(residue_features, key+"_residue.npy")
        out_store.write_output_file(graph_features, key+"_edges.gz")

        for f in (pdb_path, atom_features, residue_features, graph_features):
            try:
                os.remove(f)
            except OSError:
                pass
    except (SystemExit, KeyboardInterrupt):
        raise
github edraizen / molmimic / molmimic / generate_data / calculate_features.py View on Github external
assert pdb_or_key.count("_") == 3
        key = os.path.splitext(pdb_or_key)[0]
        pdb, chain, sdi, domNo = os.path.basename(key).split("_")
        sdi, domNo = sdi[3:], domNo[1:]

    try:
        pdb_path = os.path.join(work_dir, os.path.basename(key)+".pdb")
        in_store.read_input_file(key+".pdb", pdb_path)

        s = ProteinFeaturizer(pdb_path, pdb, chain, sdi=sdi, domNo=domNo,
            work_dir=work_dir, job=job)

        _, atom_features = s.calculate_flat_features()
        RealtimeLogger.info("Finished atom features")
        _, residue_features = s.calculate_flat_features(course_grained=True)
        RealtimeLogger.info("Finished residue features")
        graph_features = s.calculate_graph()
        RealtimeLogger.info("Finished edge features")

        out_store.write_output_file(atom_features, key+"_atom.npy")
        out_store.write_output_file(residue_features, key+"_residue.npy")
        out_store.write_output_file(graph_features, key+"_edges.gz")

        for f in (pdb_path, atom_features, residue_features, graph_features):
            try:
                os.remove(f)
            except OSError:
                pass
    except (SystemExit, KeyboardInterrupt):
        raise
    except Exception as e:
        raise
github edraizen / molmimic / molmimic / parsers / json.py View on Github external
rerun = True

            try:
                os.remove(fname)
            except (OSError, FileNotFoundError):
                pass

            RealtimeLogger.info("Donwlaod step 6 {}".format(rerun))    

            if rerun and attempts > 0:
                return self.get(key, attempts=attempts-1, last_source=source)
            else:
                RealtimeLogger.info("Not restarting")
                raise KeyError("Key '{}' is an invalid file".format(key))
        except Exception as e:
            RealtimeLogger.info("API Failed parsing json ({}): {}".format(type(e), e))
            raise KeyError("Key '{}' Not found; {} is an invalid file".format(key, fname))

        if key not in self.files:
            self.files[key] = (fname, should_remove)

        if should_remove and self._clean:
            self.clean()

        return result
github edraizen / molmimic / molmimic / generate_data / calculate_ddi.py View on Github external
print (mol_sfam, int_sfam)
        #if i<30: continue
        # and (max_sfams is None or num_ddi
github edraizen / molmimic / molmimic / generate_data / ibis_old / cluster_proteins.py View on Github external
#Save all domains to fasta
        domain_ids = {}
        with open(domain_fasta, "w") as fasta:
            if jobStoreIDs is not None:
                for pdb_fname, jobStoreID in jobStoreIDs:
                    pdb_file = os.path.join(work_dir, pdb_fname)
                    job.fileStore.readGlobalFile(jobStoreID, userPath=pdb_file+".tmp")
                    remove_ter_lines(pdb_file+".tmp", pdb_file)
                    with open(pdb_file) as f:
                        pdb2seq(pdb_fname, sfam_id, f, fasta)
                        domain_ids[pdb_fname] = pdb_file
            else:
                for i, key in enumerate(in_store.list_input_directory(str(int(sfam_id)))):
                    if not key.endswith(".pdb"): continue
                    if i%10==0:
                        RealtimeLogger.info("{} {}".format(i, key))
                    fname = os.path.basename(key)
                    try:
                        in_store.read_input_file(key, fname)
                    except (KeyboardInterrupt, SystemExit):
                        raise
                    except Exception as e:
                        continue

                    with open(fname) as f:
                        pdb2seq(fname, sfam_id, f, fasta)
                        domain_ids[fname] = fname

                    try:
                        os.remove(fname)
                    except OSError:
                        pass
github ComparativeGenomicsToolkit / cactus / src / cactus / shared / common.py View on Github external
def cactus_realtime_log_info(msg, max_len = 1000):
    if len(msg) > max_len:
        msg = msg[:max_len] + "..."
    RealtimeLogger.info("{}: {}".format(datetime.now(), msg))
github edraizen / molmimic / molmimic / parsers / json.py View on Github external
with open(fname) as f:
                pass
        except IOError as e:
            #Might be empty
            try:
                os.remove(fname)
            except OSError:
                pass

            RealtimeLogger.info("Failed reading, {} bc {}".format(fname, e))

            if attempts > 0:
                return self.get(key, attempts=attempts-1, last_source=source)
            else:
                raise KeyError("Key '{}' is an invalid file".format(key))
        RealtimeLogger.info("Donwlaod step 5")
        try:
            result = self.parse(fname, key)
        except (SystemExit, KeyboardInterrupt) as e:
            raise
        except ValueError as e:
            rerun = False
            try:
                with open(fname) as f:
                    for line in f:
                        rerun = self.check_line(key, line, attempts)
            except Exception:
                rerun = True

            try:
                os.remove(fname)
            except (OSError, FileNotFoundError):
github edraizen / molmimic / molmimic / generate_data / ibis_old / cluster_proteins.py View on Github external
import itertools as it
import glob
import re
from collections import defaultdict

from toil.realtimeLogger import RealtimeLogger

try:
    import pandas as pd
    from molmimic.generate_data.iostore import IOStore
    from molmimic.generate_data.job_utils import map_job
    from molmimic.generate_data.util import get_file, PDB_TOOLS, izip_missing, get_pdb_residues, natural_keys, remove_ter_lines
    from molmimic.parsers.USEARCH import run_usearch
    from molmimic.parsers import superpose
except ImportError as e:
    RealtimeLogger.info(e)

def merge_all_sfam_clusters(job, sfam_id, interaction_type, id):
    assert interaction_type in ("observed", "inferred")
    work_dir = job.fileStore.getLocalTempDir()
    cluster_store = IOStore.get("aws:us-east-1:molmimic-clustered-structures")

    cluster_interfaces = None

    to_delete = []
    for cluster_key in cluster_store.list_input_directory("{}/interface_clusters_{}_{}".format(sfam_id, id, interaction_type)):
        cluster_file = os.path.join(work_dir, os.path.basename(cluster_key))

        try:
            cluster_store.read_input_file(cluster_key, cluster_file)
        except (KeyboardInterrupt, SystemExit):
            raise
github edraizen / molmimic / molmimic / parsers / eppic.py View on Github external
def check_line(self, key, line, attempts):
        RealtimeLogger.info("EPPIC Failed {}".format(line))
        rerun = False
        if "" in line or "" in line or "HTTP 404 Not Found" in line:
            RealtimeLogger.info("Is restarting")
            self.store.remove_file(key)
            rerun = attempts > 0

        if "Too many submissions" in line:
            RealtimeLogger.info("Is changing IP")
            try:
                reset_ip()
            except (SystemExit, KeyboardInterrupt):
                raise
            except:
                pass

        try:
            RealtimeLogger.info("Start line {}".format(line))
            result = json.loads(line)
            RealtimeLogger.info("Result {}".format(result))
            if len(result) == 1 and isinstance(result[0], dict) and "uid" in result[0]:
                rerun = True
        except (SystemExit, KeyboardInterrupt):
            raise
        except: