How to use the toolz.get_in function in toolz

To help you get started, we’ve selected a few toolz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bcbio / bcbio-nextgen / bcbio / structural / cn_mops.py View on Github external
def run(items, background=None):
    """Detect copy number variations from batched set of samples using cn.mops.
    """
    if not background: background = []
    names = [tz.get_in(["rgnames", "sample"], x) for x in items + background]
    work_bams = [x["align_bam"] for x in items + background]
    if len(items + background) < 2:
        raise ValueError("cn.mops only works on batches with multiple samples")
    data = items[0]
    work_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "structural", names[0],
                                               "cn_mops"))
    parallel = {"type": "local", "cores": data["config"]["algorithm"].get("num_cores", 1),
                "progs": ["delly"]}
    with pysam.Samfile(work_bams[0], "rb") as pysam_work_bam:
        chroms = [None] if _get_regional_bed_file(items[0]) else pysam_work_bam.references
        out_files = run_multicore(_run_on_chrom, [(chrom, work_bams, names, work_dir, items)
                                                  for chrom in chroms],
                                  data["config"], parallel)
    out_file = _combine_out_files(out_files, work_dir, data)
    out = []
    for data in items:
github bcbio / bcbio-nextgen / bcbio / variation / vcfanno.py View on Github external
def is_human(data, builds=None):
    """Check if human, optionally with build number, search by name or extra GL contigs.
    """
    def has_build37_contigs(data):
        for contig in ref.file_contigs(dd.get_ref_file(data)):
            if contig.name.startswith("GL") or contig.name.find("_gl") >= 0:
                if contig.name in naming.GMAP["hg19"] or contig.name in naming.GMAP["GRCh37"]:
                    return True
        return False
    if not builds and tz.get_in(["genome_resources", "aliases", "human"], data):
        return True
    if not builds or "37" in builds:
        target_builds = ["hg19", "GRCh37"]
        if any([dd.get_genome_build(data).startswith(b) for b in target_builds]):
            return True
        elif has_build37_contigs(data):
            return True
    if not builds or "38" in builds:
        target_builds = ["hg38"]
        if any([dd.get_genome_build(data).startswith(b) for b in target_builds]):
            return True
    return False
github bcbio / bcbio-nextgen / bcbio / cwl / inspect.py View on Github external
def _compare_dicts(self, orig, new, ns):
        out = {}
        for key, val in new.items():
            nskey = ns + [key]
            orig_val = tz.get_in([key], orig)
            if isinstance(val, dict) and isinstance(orig_val, dict):
                for nkey, nval in self._compare_dicts(orig_val or {}, val or {}, nskey).items():
                    out = self._merge(out, {nkey: nval})
            elif val != orig_val:
                out = tz.update_in(out, nskey, lambda x: copy.deepcopy(val))
        return out
github bcbio / bcbio-nextgen / bcbio / variation / genotype.py View on Github external
def vc_output_record(samples):
    """Prepare output record from variant calling to feed into downstream analysis.

    Prep work handles reformatting so we return generated dictionaries.

    For any shared keys that are calculated only once for a batch, like variant calls
    for the batch, we assign to every sample.
    """
    shared_keys = [["vrn_file"], ["validate", "summary"],
                   ["validate", "tp"], ["validate", "fp"], ["validate", "fn"]]
    raw = cwlutils.samples_to_records([utils.to_single_data(x) for x in samples])
    shared = {}
    for key in shared_keys:
        cur = list(set([x for x in [tz.get_in(key, d) for d in raw] if x]))
        if len(cur) > 0:
            assert len(cur) == 1, (key, cur)
            shared[tuple(key)] = cur[0]
        else:
            shared[tuple(key)] = None
    out = []
    for d in raw:
        for key, val in shared.items():
            d = tz.update_in(d, key, lambda x: val)
        out.append([d])
    return out
github bcbio / bcbio-nextgen / bcbio / variation / ploidy.py View on Github external
def _configured_genders(items):
    return set([str(tz.get_in(["metadata", "sex"], data, "")).lower() for data in items])
github bcbio / bcbio-nextgen / bcbio / qc / samtools.py View on Github external
def _get_stats_files(data, out_dir=None):
    """Retrieve stats files from pre-existing dictionary or filesystem.
    """
    if not out_dir:
        out_dir = utils.safe_makedir(os.path.join(dd.get_work_dir(data),
                                                  "qc", dd.get_sample_name(data), "samtools"))
    stats_file = tz.get_in(["depth", "samtools", "stats"], data)
    idxstats_file = tz.get_in(["depth", "samtools", "idxstats"], data)
    if not stats_file:
        stats_file = os.path.join(out_dir, "%s.txt" % dd.get_sample_name(data))
    if not idxstats_file:
        idxstats_file = os.path.join(out_dir, "%s-idxstats.txt" % dd.get_sample_name(data))
    return stats_file, idxstats_file
github bcbio / bcbio-nextgen / bcbio / ngsalign / alignprep.py View on Github external
del cur_data["align_split"]
        for x in mgroup[1:]:
            cur_data["combine"][file_key]["extras"].append(x[file_key])
        ready_merge.append([cur_data])
        cur_hla = None
        for d in mgroup:
            hla_files = tz.get_in(["hla", "fastq"], d)
            if hla_files:
                if not cur_hla:
                    cur_hla = {"rgnames": {"sample": dd.get_sample_name(cur_data)},
                               "config": cur_data["config"], "dirs": cur_data["dirs"],
                               "hla": {"fastq": []}}
                cur_hla["hla"]["fastq"].append(hla_files)
        if cur_hla:
            hla_merges.append([cur_hla])
    if not tz.get_in(["config", "algorithm", "kraken"], data):
        # kraken requires fasta filenames from data['files'] as input.
        # We don't want to remove those files if kraken qc is required.
        _save_fastq_space(samples)
    merged = run_parallel("delayed_bam_merge", ready_merge)
    hla_merge_raw = run_parallel("merge_split_alignments", hla_merges)
    hla_merges = {}
    for hla_merge in [x[0] for x in hla_merge_raw]:
        hla_merges[dd.get_sample_name(hla_merge)] = tz.get_in(["hla", "fastq"], hla_merge)

    # Add stable 'align_bam' target to use for retrieving raw alignment
    out = []
    for data in [x[0] for x in merged + ready]:
        if data.get("work_bam"):
            data["align_bam"] = data["work_bam"]
        if dd.get_sample_name(data) in hla_merges:
            data["hla"]["fastq"] = hla_merges[dd.get_sample_name(data)]
github bcbio / bcbio-nextgen-vm / bcbiovm / provider / aws / icel.py View on Github external
"m3.medium")
            tree["Mappings"]["AWSNATAMI"]["us-east-1"]["AMI"] = "ami-184dc970"
        resources = tree['Resources']

        # We don't need the demo Lustre client instance.
        for section in ('ClientInstanceProfile', 'ClientLaunchConfig',
                        'ClientNodes', 'ClientRole'):
            resources.pop(section)

        for item in resources['BasePolicy']['Properties']['Roles'][:]:
            if item['Ref'] == 'ClientRole':
                resources['BasePolicy']['Properties']['Roles'].remove(item)

        for section_name in ['MDS', 'MDS', 'MGS']:
            section = resources['{}LaunchConfig'.format(section_name)]
            cf_params = toolz.get_in(['Metadata', 'AWS::CloudFormation::Init',
                                      'config', 'files', '/etc/loci.conf',
                                      'content', 'Fn::Join'], section)[1]

            index, _ = self._get_index(cf_params, 'OssCount:')
            cf_params[index + 1] = oss_count

            index, _ = self._get_index(cf_params, 'OstVolumeCount:')
            cf_params[index + 1] = ost_vol_count

            index, _ = self._get_index(cf_params, 'OstVolumeSize:')
            cf_params[index + 1] = ost_vol_size

        resources['OSSNodes']['Properties']['DesiredCapacity'] = oss_count
        resources['OSSNodes']['Properties']['MaxSize'] = oss_count
        resources['OSSNodes']['Properties']['MinSize'] = oss_count
        resources['OssWaitCondition']['Properties']['Count'] = oss_count
github rackerlabs / otter / otter / json_schema / group_schemas.py View on Github external
def validate_launch_config_servicenet(lc):
    """
    Validate that if CLBs are provided, ServiceNet is also provided.
    """
    clb = any([lb.get('type', 'CloudLoadBalancer') == 'CloudLoadBalancer'
               for lb in get_in(('args', 'loadBalancers'), lc, default=())])
    networks = get_in(('args', 'server', 'networks'), lc, default=None)

    if (clb and
            networks is not None and
            {'uuid': '11111111-1111-1111-1111-111111111111'} not in networks):
        raise ValidationError("ServiceNet network must be present if one or "
                              "more Cloud Load Balancers are configured.")
github bcbio / bcbio-nextgen / bcbio / variation / gatk.py View on Github external
def _joint_calling(items):
    """Determine if this call feeds downstream into joint calls.
    """
    jointcaller = tz.get_in(("config", "algorithm", "jointcaller"), items[0])
    if jointcaller:
        assert len(items) == 1, "Can only do joint calling preparation with GATK with single samples"
        assert tz.get_in(("metadata", "batch"), items[0]) is not None, \
            "Joint calling requires batched samples, %s has no metadata batch." % dd.get_sample_name(items[0])
    return jointcaller