Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def start_toil(job, dataset_name, name="bsa"):
path = os.path.join(get_interfaces_path(dataset_name), "by_superfamily")
for cdd, sfam_id in iter_cdd(use_id=True, group_superfam=True):
sfam_path = os.path.join(path, str(int(sfam_id)), str(int(sfam_id)))
if not os.path.isfile(sfam_path+".observed_interactome"):
continue
cjob = job.addChildJobFn(observed_bsa, dataset_name, sfam_id)
if not os.path.isfile(sfam_path+".inferred_interactome"):
continue
cjob.addFollowOnJobFn(inferred_bsa, dataset_name, sfam_id)
if __name__ == "__main__":
from toil.common import Toil
from toil.job import Job
parser = Job.Runner.getDefaultArgumentParser()
options = parser.parse_args()
options.logLevel = "DEBUG"
options.clean = "always"
dataset_name = options.jobStore.split(":")[-1]
job = Job.wrapJobFn(start_toil, dataset_name)
with Toil(options) as toil:
toil.start(job)
# except (SystemExit, KeyboardInterrupt):
# raise
# except:
# try:
# observed_interactome = filter_hdf_chunks("IBIS_observed.h5", "table", "obs_int_id", float(mol_sfam_id))
# except (SystemExit, KeyboardInterrupt):
# raise
# except:
# job.log("Failed reading IBIS_observed.h5")
# return
sfamFileStoreID = sfamFileStoreIDs[mol_sfam_id]
obsFilePath = get_file(job, "{}_obs.h5".format(int(mol_sfam_id)),
sfamFileStoreID, work_dir=work_dir)
observed_interactome = pd.read_hdf(obsFilePath, "table")
RealtimeLogger.info("Obs has {} rows".format(observed_interactome.shape))
# obsFilePath = os.path.join(work_dir, "{0}.observed_interactome".format(int(mol_sfam_id)))
# out_store.read_input_file("{0}/{0}.observed_interactome".format(int(mol_sfam_id)), obsPath)
tableInfPath = get_file(job, "IBIS_inferred_{}.h5".format(table), tableInfStoreID)
# skip_int = set([tuple(map(int, os.path.basename(f)[:-3].split("_"))) for f in out_store.list_input_directory(
# "{}/_infrows/Intrac{}".format(int(mol_sfam_id), table)) if f.endswith(".h5")])
try:
inf_int_ids = filter_hdf_chunks(tableInfPath, "Intrac{}".format(table), chunksize=100,
nbr_superfam_id=mol_sfam_id)
except (RuntimeError, TypeError):
job.log("Unable to find sfam {} in table {}, Skipping".format(mol_sfam_id, table))
return
#inf_int_ids = set([tuple(row) for row in inf_int_ids.itertuples()])
#inf_int_ids -= skip_int
else:
#pdb_or_key is key
assert pdb_or_key.count("_") == 3
key = os.path.splitext(pdb_or_key)[0]
pdb, chain, sdi, domNo = os.path.basename(key).split("_")
sdi, domNo = sdi[3:], domNo[1:]
try:
pdb_path = os.path.join(work_dir, os.path.basename(key)+".pdb")
in_store.read_input_file(key+".pdb", pdb_path)
s = ProteinFeaturizer(pdb_path, pdb, chain, sdi=sdi, domNo=domNo,
work_dir=work_dir, job=job)
_, atom_features = s.calculate_flat_features()
RealtimeLogger.info("Finished atom features")
_, residue_features = s.calculate_flat_features(course_grained=True)
RealtimeLogger.info("Finished residue features")
graph_features = s.calculate_graph()
RealtimeLogger.info("Finished edge features")
out_store.write_output_file(atom_features, key+"_atom.npy")
out_store.write_output_file(residue_features, key+"_residue.npy")
out_store.write_output_file(graph_features, key+"_edges.gz")
for f in (pdb_path, atom_features, residue_features, graph_features):
try:
os.remove(f)
except OSError:
pass
except (SystemExit, KeyboardInterrupt):
raise
assert pdb_or_key.count("_") == 3
key = os.path.splitext(pdb_or_key)[0]
pdb, chain, sdi, domNo = os.path.basename(key).split("_")
sdi, domNo = sdi[3:], domNo[1:]
try:
pdb_path = os.path.join(work_dir, os.path.basename(key)+".pdb")
in_store.read_input_file(key+".pdb", pdb_path)
s = ProteinFeaturizer(pdb_path, pdb, chain, sdi=sdi, domNo=domNo,
work_dir=work_dir, job=job)
_, atom_features = s.calculate_flat_features()
RealtimeLogger.info("Finished atom features")
_, residue_features = s.calculate_flat_features(course_grained=True)
RealtimeLogger.info("Finished residue features")
graph_features = s.calculate_graph()
RealtimeLogger.info("Finished edge features")
out_store.write_output_file(atom_features, key+"_atom.npy")
out_store.write_output_file(residue_features, key+"_residue.npy")
out_store.write_output_file(graph_features, key+"_edges.gz")
for f in (pdb_path, atom_features, residue_features, graph_features):
try:
os.remove(f)
except OSError:
pass
except (SystemExit, KeyboardInterrupt):
raise
except Exception as e:
raise
bucket_name = s3_dir.split('/')[0]
bucket_dir = '/'.join(s3_dir.split('/')[1:])
# I/O
uuid_tar = return_input_paths(job, work_dir, ids, 'uuid.tar.gz')
# Upload to S3
conn = boto.connect_s3()
bucket = conn.get_bucket(bucket_name)
k = Key(bucket)
k.key = os.path.join(bucket_dir, uuid + '.tar.gz')
k.set_contents_from_filename(uuid_tar)
if __name__ == "__main__":
# Define Parser object and add to toil
parser = build_parser()
Job.Runner.addToilOptions(parser)
args = parser.parse_args()
# Store input_URLs for downloading
inputs = {'config': args.config,
'unc.bed': args.unc,
'hg19.transcripts.fa': args.fasta,
'composite_exons.bed': args.composite_exons,
'normalize.pl': args.normalize,
'output_dir': args.output_dir,
'rsem_ref.zip': args.rsem_ref,
'chromosomes.zip': args.chromosomes,
'ebwt.zip': args.ebwt,
'ssec': args.ssec,
's3_dir': args.s3_dir,
'uuid': None,
'samples.zip': None,
help='Path to the (filled in) manifest file, generated with "generate-manifest". '
'\nDefault value: "%(default)s"')
parser_run.add_argument('--fq', default=None, type=str,
help='URL for the sample BAM. URLs can take the form: http://, file://, s3://, '
'and gnos://. The UUID for the sample must be given with the "--uuid" flag.')
parser_run.add_argument('--uuid', default=None, type=str, help='Provide the UUID of a sample when using the'
'"--bam" option')
# If no arguments provided, print full help menu
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
writeToDebug('Debug log')
Job.Runner.addToilOptions(parser)
args = parser.parse_args()
cwd = os.getcwd()
if args.command == 'generate-config' or args.command == 'generate':
generate_file(os.path.join(cwd, 'config-toil-defuse.yaml'), generate_config)
if args.command == 'generate-manifest' or args.command == 'generate':
generate_file(os.path.join(cwd, 'manifest-toil-defuse.tsv'), generate_manifest)
if 'generate' in args.command:
sys.exit()
if args.command == 'run':
# Read in config yaml file and set the default value to None
config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()}
check_for_required_parameters(config)
# Program checks
help='Path to the (filled in) manifest file, generated with "generate-manifest". '
'\nDefault value: "%(default)s"')
parser_run.add_argument('--normal', default=None, type=str,
help='URL for the normal BAM. URLs can take the form: http://, ftp://, file://, s3://, '
'and gnos://. The UUID for the sample must be given with the "--uuid" flag.')
parser_run.add_argument('--tumor', default=None, type=str,
help='URL for the tumor BAM. URLs can take the form: http://, ftp://, file://, s3://, '
'and gnos://. The UUID for the sample must be given with the "--uuid" flag.')
parser_run.add_argument('--uuid', default=None, type=str, help='Provide the UUID of a sample when using the'
'"--tumor" and "--normal" option')
# If no arguments provided, print full help menu
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
# Add Toil options
Job.Runner.addToilOptions(parser_run)
args = parser.parse_args()
# Parse subparsers related to generation of config and manifest
cwd = os.getcwd()
if args.command == 'generate-config' or args.command == 'generate':
generate_file(os.path.join(cwd, 'config-toil-exome.yaml'), generate_config)
if args.command == 'generate-manifest' or args.command == 'generate':
generate_file(os.path.join(cwd, 'manifest-toil-exome.tsv'), generate_manifest)
# Pipeline execution
elif args.command == 'run':
require(os.path.exists(args.config), '{} not found. Please run '
'"toil-rnaseq generate-config"'.format(args.config))
if args.normal or args.tumor or args.uuid:
require(args.normal and args.tumor and args.uuid, '"--tumor", "--normal" and "--uuid" must all be supplied')
samples = [[args.uuid, args.normal, args.tumor]]
else:
samples = parse_manifest(args.manifest)
'\nDefault value: "%(default)s"')
group.add_argument('--manifest', default='manifest-toil-rnaseq.tsv', type=str,
help='Path to the (filled in) manifest file, generated with "generate-manifest". '
'\nDefault value: "%(default)s"')
group.add_argument('--samples', default=None, nargs='+', type=str,
help='Space delimited sample URLs (any number). Samples must be tarfiles/tarballs that contain '
'fastq files. URLs follow the format: http://foo.com/sample.tar, '
'file:///full/path/to/file.tar. The UUID for the sample will be derived from the file.'
'Samples passed in this way will be assumed to be paired end, if using single-end data, '
'please use the manifest option.')
# If no arguments provided, print full help menu
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
# Add Toil options
Job.Runner.addToilOptions(parser_run)
args = parser.parse_args()
# Parse subparsers related to generation of config and manifest
cwd = os.getcwd()
if args.command == 'generate-config' or args.command == 'generate':
generate_file(os.path.join(cwd, 'config-toil-rnaseq.yaml'), generate_config)
if args.command == 'generate-manifest' or args.command == 'generate':
generate_file(os.path.join(cwd, 'manifest-toil-rnaseq.tsv'), generate_manifest)
# Pipeline execution
elif args.command == 'run':
require(os.path.exists(args.config), '{} not found. Please run '
'"toil-rnaseq generate-config"'.format(args.config))
if not args.samples:
require(os.path.exists(args.manifest), '{} not found and no samples provided. Please '
'run "toil-rnaseq generate-manifest"'.format(args.manifest))
samples = parse_samples(path_to_manifest=args.manifest)
else:
j2.addFollowOnJobFn(create_data_loader, dataset_name, cdd)
j2.addFollowOnJobFn(convert_pdb_to_mmtf, dataset_name, cdd)
if __name__ == "__main__":
from toil.common import Toil
from toil.job import Job
parser = Job.Runner.getDefaultArgumentParser()
options = parser.parse_args()
options.logLevel = "DEBUG"
options.clean = "always"
dataset_name = options.jobStore.split(":")[-1]
job = Job.wrapJobFn(start_toil, dataset_name)
with Toil(options) as toil:
toil.start(job)
action="store_true",
default=False)
options = parser.parse_args()
options.logLevel = "DEBUG"
#options.clean = "always"
options.targetTime = 1
if options.cathcode is not None:
options.cathcode = [c.split(".") for c in options.cathcode]
sfam_file = os.path.abspath("cath.h5")
if not os.path.isfile(sfam_file):
store = IOStore.get("aws:us-east-1:molmimic-cath")
store.read_input_file("cath-domain-description-file-small.h5", sfam_file)
with Toil(options) as workflow:
if not workflow.options.restart:
cathFileStoreID = workflow.importFile("file://" + os.path.abspath(sfam_file))
job = Job.wrapJobFn(start_toil, cathFileStoreID, cathcode=options.cathcode,
update_features=options.features, force=options.force)
workflow.start(job)
else:
workflow.restart()