How to use the gseapy.parser function in gseapy

To help you get started, we’ve selected a few gseapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pathwayforte / pathway-forte / src / pathway_forte / prediction / binary.py View on Github external
# Check size of df
        assert previous_df_shape[0] == enrichment_score.shape[0] + len(drop_indices), 'Problem removing random rows'
        assert previous_df_shape[1] == enrichment_score.shape[1], 'Columns should have not changed after removing rows'

    # Transpose dataFrame to arrange columns as pathways and rows as genes
    enrichment_score_df = enrichment_score.transpose()

    # Set column index to the first row in the dataframe
    enrichment_score_df.columns = enrichment_score_df.iloc[0]

    # Remove the first row because it is already set as column index
    enrichment_score_df = enrichment_score_df.drop("Term|NES")

    # Get class labels
    _, _, class_vector = gseapy.parser.gsea_cls_parser(classes_file)

    class_labels = []

    for label in class_vector:
        if label == 'Normal':
            class_labels.append(0)
        elif label == 'Tumor':
            class_labels.append(1)

    # Get list of pathways as features
    feature_cols = list(enrichment_score_df.columns.values)

    # split dataset into features and target variable (i.e., normal vs tumor sample labels)
    pathways = enrichment_score_df[feature_cols]  # Features
    pathways.reset_index(drop=True, inplace=True)
github iseekwonderful / PyPathway / pypathway / analysis / gsea / __init__.py View on Github external
def parse_class_vector(path):
        return gp.parser.gsea_cls_parser(path)
github pathwayforte / pathway-forte / src / pathway_forte / pathway_enrichment / functional_class.py View on Github external
def filter_gene_exp_data(expression_data: pd.DataFrame, gmt_file: str):
    """Filter gene expression data file to include only gene names which are found in the gene set files.

    :param expression_data: gene expression values for samples
    :param gmt_file: .gmt file containing gene sets
    :return: Filtered gene expression data with genes with no correspondences in gene sets removed
    :rtype: pandas.core.frame.DataFrame kegg_xml_parser.py
    """
    filtered_expression_data = expression_data.copy()

    # Gene universe from gene set
    gene_sets = gseapy.parser.gsea_gmt_parser(gmt_file, max_size=40000)

    # All the genes in gene set files
    gene_universe = set(itt.chain(*gene_sets.values()))

    genes_to_remove = [
        gene
        for gene in filtered_expression_data.index.values
        if gene not in gene_universe
    ]
    # Genes to be removed because they are not present in the gene sets
    counter = len(genes_to_remove)

    logger.info(f'Expression data has {len(filtered_expression_data.index.values)}')
    logger.info(f'Gene universe has {len(gene_universe)}')
    logger.info(f'{counter} were removed in expression data')
    logger.info(