How to use the toil.realtimeLogger.RealtimeLogger function in toil

To help you get started, we’ve selected a few toil examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github edraizen / molmimic / molmimic / generate_data / get_eppic_interactome.py View on Github external
if not force:
        keys = list(data_stores.eppic_interfaces.list_input_directory())

        all_files = list(data_stores.eppic_interfaces.list_input_directory("pdb"))
        done_pdbs = [f.split("/")[1] for f in all_files if "status.json" in f]

        # done_pdbs = []
        # for pdbId, files in groupby(data_stores.eppic_interfaces.list_input_directory(), lambda k: k.split("/")[1]):
        #     files = [os.path.splitext("".join(k.split("/")[2:]))[0] for k in files]
        #     if len(files) > 1 and "status.json" in files:
        #         done_pdbs.append(pdbId)

        total_size = len(pdb)
        pdb = pdb[~pdb["pdb"].isin(done_pdbs)]
        RealtimeLogger.info("Filtered CATH ({}/{} domains)".format(len(pdb), total_size))
    else:
        RealtimeLogger.info("Running CATH ({} domains)".format(len(pdb)))

    if split_groups:
        pdb = pdb.assign(group=pdb["pdb"].str[:3])
        pdb_groups = pdb.groupby("group")["pdb"].apply(list)
        map_job(job, process_pdb_group, pdb_groups, cathFileStoreID, further_parallelize)
    else:
        map_job(job, process_pdb, pdb["pdb"], cathFileStoreID)
github edraizen / molmimic / molmimic / generate_data / BioUnit.py View on Github external
for line in it:
        if line.startswith('BIOMOLECULE:'):
            if seenbiomolecule:
                break
            seenbiomolecule = True
        elif line.startswith('APPLY THE FOLLOWING TO CHAINS:'):
            chains =[chain.strip() for chain in line[30:].split(',')]
        elif line.startswith('                   AND CHAINS:'):
            chains += [chain.strip() for chain in line[30:].split(',')]
        elif line.startswith('  BIOMT'):
            current_M = np.eye(3)
            current_t = np.zeros(3)

            for i in range(3):
                l = next(it) if i > 0 else line
                RealtimeLogger("LINE IS {}".format(l))
                row = int(l[7])
                num = int(l[8:12])
                vec = l[12:].split()
                vec = map(float, vec)
                current_M[i, :] = vec[:-1]
                current_t[i] = vec[-1]
            biomt.append((current_M.T, current_t))

    return biomt
github edraizen / molmimic / molmimic / parsers / container.py View on Github external
try:
                idx = self.params_to_update[k]
                param_func = self.param_funcs[idx]
            except KeyError as e:
                try:
                    idx = self.optional_params_to_update[k]
                    param_func = self.optional_param_funcs[idx]
                except KeyError as e:
                    #Invalid parameter, skip
                    RealtimeLogger.info("ignoring parameter {}".format(k))
                    continue

            val = param_func(k, v)

            if val is None:
                RealtimeLogger.info("ignoring parameter {} since val is None".format(k))
                continue

            try:
                formatter = self.parameter_formatters[k]
                if formatter is not None:
                    parameters[idx] = formatter.format(val)
                else:
                    parameters[idx] = None
            except KeyError:
                try:
                    formatter = self.optional_parameter_formatters[k]
                    if formatter is not None:
                        parameters[idx] = formatter.format(val)
                    else:
                        parameters[idx] = None
                except KeyError:
github edraizen / molmimic / molmimic / util / cath.py View on Github external
"table",
            columns=cath_names,
            drop_duplicates=True,
            **cathcode)[cath_names]

        safe_remove(cath_file, warn=True)

    else:
        cathcodes = pd.DataFrame([cathcode], columns=cath_names)

    if cathcodes.shape[1] < level:
        map_job(job, run_cath_hierarchy, cathcodes.values.tolist(), func,
            cathFileStoreID, **kwds)
    else:
        sfams = (cathcodes.astype(int).astype(str)+"/").sum(axis=1).str[:-1].tolist()
        RealtimeLogger.info("Running sfam {}".format(cathcode))
        kwds.pop("further_parallelize", None)
        kwds.pop("level", None)
        if "cathCodeStoreID" in kwds:
            del kwds["cathCodeStoreID"]
        map_job(job, func, sfams, cathFileStoreID, **kwds)
github edraizen / molmimic / molmimic / util / iostore.py View on Github external
def __connect(self):
        """
        Make sure we have an S3 Bucket connection, and set one up if we don't.
        Creates the S3 bucket if it doesn't exist.
        """

        if self.s3 is None:
            RealtimeLogger.debug("Connecting to bucket {} in region {}".format(
                self.bucket_name, self.region))
            print("Connecting to bucket {} in region {}".format(
                self.bucket_name, self.region))

            # Connect to the s3 bucket service where we keep everything
            self.s3 = boto3.client('s3', self.region, config=
                botocore.client.Config(signature_version='s3v4', retries={"max_attempts":20}))
            self.s3r = boto3.resource('s3', self.region, config=
                botocore.client.Config(signature_version='s3v4', retries={"max_attempts":20}))
            try:
                self.s3.head_bucket(Bucket=self.bucket_name)
            except:
                if self.region == 'us-east-1':
                    self.s3.create_bucket(
                        Bucket=self.bucket_name,
                    )
github edraizen / molmimic / molmimic / util / toil.py View on Github external
:param function func: Function to spawn dynamically, passes one sample as first argument
    :param list inputs: Array of samples to be batched
    :param list args: any arguments to be passed to the function
    """
    # num_partitions isn't exposed as an argument in order to be transparent to the user.
    # The value for num_partitions is a tested value
    num_partitions = 100
    partition_size = int(ceil(len(inputs)/num_partitions))
    if partition_size > 1:
        RealtimeLogger.info("MAP_JOB: total: {}; paritions_size: {}".format(
            len(inputs), partition_size
        ))
        for partition in partitions(inputs, partition_size):
            job.addChildJobFn(map_job, func, partition, *args, **kwds)
    else:
        RealtimeLogger.info("MAP_JOB: Running: {}".format(len(inputs)))
        for sample in inputs:
            job.addChildJobFn(func, sample, *args, **kwds)
github edraizen / molmimic / molmimic / parsers / container.py View on Github external
kwds = params.update(kwds)
            else:
                raise RuntimeError

        parameters = self.parameters[:]
        for k, v in kwds.items():
            try:
                idx = self.params_to_update[k]
                param_func = self.param_funcs[idx]
            except KeyError as e:
                try:
                    idx = self.optional_params_to_update[k]
                    param_func = self.optional_param_funcs[idx]
                except KeyError as e:
                    #Invalid parameter, skip
                    RealtimeLogger.info("ignoring parameter {}".format(k))
                    continue

            val = param_func(k, v)

            if val is None:
                RealtimeLogger.info("ignoring parameter {} since val is None".format(k))
                continue

            try:
                formatter = self.parameter_formatters[k]
                if formatter is not None:
                    parameters[idx] = formatter.format(val)
                else:
                    parameters[idx] = None
            except KeyError:
                try:
github DataBiosphere / toil / src / toil / common.py View on Github external
def _runMainLoop(self, rootJob):
        """
        Runs the main loop with the given job.
        :param toil.job.Job rootJob: The root job for the workflow.
        :rtype: Any
        """
        logProcessContext(self.config)

        with RealtimeLogger(self._batchSystem,
                            level=self.options.logLevel if self.options.realTimeLogging else None):
            # FIXME: common should not import from leader
            from toil.leader import Leader
            return Leader(config=self.config,
                          batchSystem=self._batchSystem,
                          provisioner=self._provisioner,
                          jobStore=self._jobStore,
                          rootJob=rootJob,
                          jobCache=self._jobCache).run()
github edraizen / molmimic / molmimic / common / featurizer.py View on Github external
raise RuntimeError("Input must be Atom or Residue: {}".format(type(atom_or_residue)))

        if not hasattr(self, "_pqr") or (not only_charge and len(list(self._pqr.values())[0])==1):
            try:
                if only_charge:
                    pdb2pqr = Pdb2Pqr(work_dir=self.work_dir, job=self.job)
                    self._pqr = pdb2pqr.get_charge_from_pdb_file(self.path, with_charge=False)
                else:
                    apbs = APBS(work_dir=self.work_dir, job=self.job)
                    self._pqr = apbs.atom_potentials_from_pdb(self.path)
            except (SystemExit, KeyboardInterrupt):
                raise
            except Exception as e:
                raise
                self._pqr = {}
                RealtimeLogger.info("ELECTROSTATICS failed ({}): {}".format(type(e), e))

        atom_id = atom.get_full_id()[3:5]

        if atom_id[1][1] != " ":
            #pdb2pqr removes alternate conformations and only uses the first
            atom_id = (atom_id[0], (atom_id[1][0], " "))

        if only_charge:
            charge_value = self._pqr.get(atom_id, np.nan)
            electrostatic_pot_value = np.nan
        else:
            try:
                charge_value, electrostatic_pot_value = self._pqr[atom_id]
            except KeyError:
                charge_value, electrostatic_pot_value = np.NaN, np.NaN
github edraizen / molmimic / molmimic / generate_data / iostore.py View on Github external
def __connect(self):
        """
        Make sure we have an Azure connection, and set one up if we don't.
        """

        if self.connection is None:
            RealtimeLogger.debug("Connecting to account {}, using "
                "container {} and prefix {}".format(self.account_name,
                self.container_name, self.name_prefix))

            # Connect to the blob service where we keep everything
            self.connection = BlobService(
                account_name=self.account_name, account_key=self.account_key)