How to use the pandas.DataFrame.from_dict function in pandas

To help you get started, we’ve selected a few pandas examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github AntonelliLab / seqcap_processor / secapr / find_target_contigs.py View on Github external
keep_duplicates = True
    counter_sequence_dict = {}
    counter = 0
    for line in reference_fasta:
        if line.startswith('>'):
            old_header = line.replace('>','').strip()
            counter_sequence_dict.setdefault(counter,old_header)
            new_header = '>%i\n' %(counter)
            counter += 1
            new_reference_fasta.write(new_header)
        else:
            new_reference_fasta.write(line)
    new_reference_fasta.close()
    # write the translation dictionary between new numerical identifiers and previous fasta headers to file
    header_info_file = os.path.join(args.output,'reference_fasta_header_info.txt')
    header_info = pd.DataFrame.from_dict(counter_sequence_dict, orient='index')
    header_info.to_csv(header_info_file,sep='\t',header=False,index=True)
    # get the fasta headers from the new formatted reference file
    exons = [seq.id for seq in SeqIO.parse(open(new_fasta, 'r'), 'fasta')]
    sorted_exon_list = list(exons)
    # Get the paths to the contig fasta files for all samples
    fasta_files = glob.glob(os.path.join(args.contigs, '*.fa*'))
    if len(fasta_files) == 0: # multiple subfolders with contigs
        fasta_files_dict = {}
        for subdir in next(os.walk(args.contigs))[1]:
            fasta_files_sub = glob.glob(os.path.join(os.path.join(args.contigs,subdir), '*.fa*'))
            for fasta in fasta_files_sub:
                sample_id = os.path.basename(fasta).split('.fa')[0]
                fasta_files_dict.setdefault(sample_id,[])
                fasta_files_dict[sample_id].append(fasta)
        sample_ids = list(fasta_files_dict.keys())
    else: # single folder with contig
github oxai / deepsaber / scripts / feature_extraction / features_base.py View on Github external
obstacles = obstacles.append(df_new)

                # make second obstacle features
                time.append(duration[i] + time[i])
                duration.append(duration[i])
                width.append(1)
                type.append(type[i])
                if(lineIndex[i] == 1):
                    lineIndex.append(2)
                elif(lineIndex[i] == 2):
                    lineIndex.append(1)
                i = i + 1
                # adding second obstacle
                new_obstacle2 = {'_time': [time[i]], '_lineIndex': [lineIndex[i]], '_type': [type[i]],
                                 '_duration': [duration[i]], '_width': [width[i]]}
                df_new2 = pd.DataFrame.from_dict(new_obstacle2)
                obstacles = obstacles.append(df_new2)
                numObstacles = numObstacles + 1

        # creating our new obstacle if not already made
        if (type[i] == 1 or blockType == 1):
            new_obstacle = {'_time': [time[i]], '_lineIndex': [lineIndex[i]], '_type': [type[i]], '_duration': [duration[i]], '_width': [width[i]]}
            df_new = pd.DataFrame.from_dict(new_obstacle)
            obstacles = obstacles.append(df_new)

    return obstacles # time, lineIndex, type, duration, width
github rsethur / MLOps / models / risk-model / score / score.py View on Github external
def run(data):
    try:
        input_df = pd.DataFrame.from_dict(data)
        proba = model.predict_proba(input_df)
        result = {"predict_proba":proba.tolist()}
        return result
    except Exception as e:
        error = str(e)
        return error
github MolSSI / QCFractal / qcfractal / interface / client.py View on Github external
query: Dict[str, str] = {}
        if collection_type is not None:
            query = {"collection": collection_type.lower()}

        payload = {"meta": {"include": ["name", "collection", "tagline", "visibility", "group", "tags"]}, "data": query}
        response: List[Dict[str, Any]] = self._automodel_request("collection", "get", payload, full_return=False)

        # Rename collection names
        repl_name_map = collections_name_map()
        for item in response:
            item.pop("id", None)
            if item["collection"] in repl_name_map:
                item["collection"] = repl_name_map[item["collection"]]

        df = pd.DataFrame.from_dict(response)
        if not show_hidden:
            df = df[df["visibility"]]
        if group is not None:
            df = df[df["group"].str.lower() == group.lower()]
        if tag is not None:
            if isinstance(tag, str):
                tag = [tag]
            tag = {t.lower() for t in tag}
            df = df[df.apply(lambda x: len({t.lower() for t in x["tags"]} & tag) > 0, axis=1)]

        df.drop(["visibility", "group", "tags"], axis=1, inplace=True)
        if not aslist:
            df.set_index(["collection", "name"], inplace=True)
            df.sort_index(inplace=True)
            return df
        else:
github KienVu2368 / tabint / tabint / utils.py View on Github external
def feature_value_to_df(features, values):
    df_dict = {}
    for feature, value in zip(features, values): df_dict[feature] = value
    df = pd.DataFrame.from_dict(df_dict)
    return df
github luca-fiorito-11 / sandy / sandy / formats / endf6.py View on Github external
condition = reduce(lambda x,y: np.logical_or(x, y), conditions)
            tape = tape[condition]
        DictLpc =  {}
        for ix,text in tape.TEXT.iteritems():
            X = self.read_section(*ix)
            if "LPC" not in X: continue
            if X["LPC"]["INT"] != [2]:
                if verbose:
                    logging.warn("found non-linlin interpolation, skip angular distr. for MAT{}/MF{}/MT{}".format(*ix))
                continue
            for e,v in X["LPC"]["E"].items():
                DictLpc.update({(X["MAT"], X["MT"],e) : pd.Series([1]+v["COEFF"])})
        if not DictLpc:
            logging.warn("no angular distribution in Legendre expansion was found")
            return pd.DataFrame()
        frame = pd.DataFrame.from_dict(DictLpc, orient="index")
        return Lpc(frame)
github SohierDane / BigQuery_Helper / bq_helper.py View on Github external
def table_schema(self, table_name):
        """
        Get the schema for a specific table from a dataset.
        Unrolls nested field names into the format that can be copied
        directly into queries. For example, for the `github.commits` table,
        the this will return `committer.name`.

        This is a very different return signature than BigQuery's table.schema.
        """
        self.__fetch_table(table_name)
        raw_schema = self.tables[table_name].schema
        schema = pd.DataFrame.from_dict([x.to_api_repr() for x in raw_schema])
        # the api_repr only has the fields column for tables with nested data
        if 'fields' in schema.columns:
            schema = self.__unpack_all_schema_fields(schema)
        # Set the column order
        schema = schema[['name', 'type', 'mode', 'description']]
        return schema
github exa-analytics / exatomic / exatomic / algorithms / neighbors.py View on Github external
# Identify the nearest molecules
            nearest_atoms = uu.atom_two[uu.atom_two['atom0'].isin(source_atom_idxs) |
                                        uu.atom_two['atom1'].isin(source_atom_idxs)].sort_values('dr')[['atom0', 'atom1']].copy()
            nearest_atoms['molecule0'] = nearest_atoms['atom0'].map(uu.atom['molecule'])
            nearest_atoms['molecule1'] = nearest_atoms['atom1'].map(uu.atom['molecule'])
            nearest_molecules = nearest_atoms[['molecule0', 'molecule1']].stack()
            nearest_molecules = nearest_molecules[~nearest_molecules.isin(source_molecule_idxs)].drop_duplicates(keep='first')
            # Build the appropriate universes
            for nn in sizes:
                atom1 = uu.atom.loc[uu.atom['molecule'].isin(nearest_molecules.iloc[:nn].tolist()+source_molecule_idxs),
                                   ['symbol', 'x', 'y', 'z']]
                adxs, x, y, z, prj = _worker(atom1.index.values.astype(int),
                                             atom1['x'].values.astype(float),
                                             atom1['y'].values.astype(float),
                                             atom1['z'].values.astype(float), a)
                patom = pd.DataFrame.from_dict({'atom': adxs, 'x': x, 'y': y, 'z': z, 'prj': prj})
                patom['frame'] = patom['atom'].map(uu.atom['frame'])
                patom['symbol'] = patom['atom'].map(uu.atom['symbol'])
                sliced_u = Universe(atom=patom)
                sliced_u.compute_atom_two(dmax=a)
                sliced_u.compute_molecule()
                source_adxs1 = sliced_u.atom[(sliced_u.atom['prj'] == 13) & sliced_u.atom['atom'].isin(source_atom_idxs)].index
                source_mdxs1 = sliced_u.atom.loc[source_adxs1, 'molecule'].unique().tolist()
                nearest_atoms1 = sliced_u.atom_two[sliced_u.atom_two['atom0'].isin(source_adxs1) |
                                                   sliced_u.atom_two['atom1'].isin(source_adxs1)].sort_values('dr')[['atom0', 'atom1']].copy()
                nearest_atoms1['molecule0'] = nearest_atoms1['atom0'].map(sliced_u.atom['molecule'])
                nearest_atoms1['molecule1'] = nearest_atoms1['atom1'].map(sliced_u.atom['molecule'])
                nearest_molecules1 = nearest_atoms1[['molecule0', 'molecule1']].stack()
                nearest_molecules1 = nearest_molecules1[~nearest_molecules1.isin(source_mdxs1)].drop_duplicates(keep='first')
                # Its fine to overwrite atom1 above since the uu.atom slice is not necessarily clustered
                atom1 = sliced_u.atom.loc[sliced_u.atom['molecule'].isin(nearest_molecules1.iloc[:nn].tolist()+source_mdxs1)].copy()
                dct[nn].append(atom1)
github tuetschek / ratpred / experiments / input / convert_oneperline_data.py View on Github external
def convert(args):
    src = lines_to_list(args.src_file)
    if args.das:
        src = [DA.parse(da_text).to_cambridge_da_string() for da_text in src]
    ref = lines_to_list(args.ref_file)
    columns = ['mr', 'orig_ref']
    df = pd.DataFrame.from_dict({'mr': src, 'orig_ref': ref})

    if args.system_output:
        sys = lines_to_list(args.system_output)
        df['system_ref'] = sys
        columns.append('system_ref')

    if args.score:
        score = [float(score) for score in lines_to_list(args.score)]
        df['quality'] = score
        columns.append('quality')

    df.to_csv(args.out_file, columns=columns, sep=b"\t", index=False, encoding='UTF-8')
github neuropsychology / NeuroKit / neurokit2 / complexity / complexity_optimize.py View on Github external
signal_entropy = _complexity_optimize_get_differential(signal_embedded, k=1)

            # calculate average of surrogates entropy
            for i in range(surrogate_iter):
                surrogate, iterations, rmsd = _complexity_optimize_iaaft(signal)
                surrogate_embedded = complexity_embedding(surrogate, delay=tau, dimension=dimension)
                surrogate_entropy = _complexity_optimize_get_differential(surrogate_embedded, k=1)
                surrogate_list.append(surrogate_entropy)
                surrogate_entropy_average = sum(surrogate_list) / len(surrogate_list)

            # entropy ratio for each set of d and tau
            entropy_ratio = signal_entropy / surrogate_entropy_average + (dimension * np.log(N)) / N
            optimal[dimension].append(entropy_ratio)

    # optimal dimension and tau is where entropy_ratio is minimum
    optimal_df = pd.DataFrame.from_dict(optimal)
    optimal_delay, optimal_dimension = np.unravel_index(np.nanargmin(optimal_df.values), optimal_df.shape)

    optimal_delay = optimal_delay + 1  # accounts for zero indexing

    return optimal_dimension, optimal_delay