How to use the pandas.read_excel function in pandas

To help you get started, we’ve selected a few pandas examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github asrhou / scMatch / scMatch.py View on Github external
if len(refTypeList) == 2:
        print('##########merging annotation data')
        rstFolder = 'annotation_result'
        if keepZeros:
            rstFolder = rstFolder + '_keep_all_genes'
        else:
            rstFolder = rstFolder + '_keep_expressed_genes'
        savefolder = os.path.join(savefolder, rstFolder)
        for testMethod in testMethodList:
            tomergeList = glob.glob(os.path.join(savefolder, "human*%s*.xlsx" % testMethod))
            tomergeList = [i for i in tomergeList if "_Avg" not in i]
            topAnnList = []
            for tomerge in tomergeList:
                #merge results
                humanAnn = pd.read_excel(tomerge, header=[0,1,2])
                humanAnn.columns = pd.MultiIndex.from_arrays([list(humanAnn.columns.get_level_values(0)),list(humanAnn.columns.get_level_values(2))], names=('identifier', 'annotation'))
                mouseAnn = pd.read_excel(tomerge.replace('human_','mouse_'), header=[0,1,2])
                mouseAnn.columns = pd.MultiIndex.from_arrays([list(mouseAnn.columns.get_level_values(0)),list(mouseAnn.columns.get_level_values(2))], names=('identifier', 'annotation'))
                mergedAnn = pd.concat([humanAnn, mouseAnn])
                
                #sort merged results
                #split merged results to single-cell merged results
                mergedAnnList = np.split(mergedAnn, len(mergedAnn.columns)/2, axis=1)
                
                #annotate single-cell expression profiles in parallel
                p = multiprocessing.Pool(coreNum)
                resultList = p.map(SortAnno, mergedAnnList)
                p.close()
                p.join()
                
                mergedAnn = pd.concat([i[0] for i in resultList], axis=1)
github se-curriculum-design-group / 2019-Software-Engineering-Curriculum-Design / EMS / utils / database_utils / add_major_course.py View on Github external
import os
import re
import math
import pandas as pd
from random import choice, randint, choices
from backstage.models import College, Major, AdmClass, Student,\
    Teacher, ClassRoom, MajorPlan
from scoreManagement.models import Course, Teaching, MajorCourses
from django.db.utils import IntegrityError


base_dir = '../others/'
xls_file = '2016-CS-courses.xlsx'
data_frame = pd.read_excel(os.path.join(base_dir, xls_file))


def get_index():
    print(data_frame.columns)


college = College.objects.get(name='信息科学与技术学院')

cs_courses = []
for item1, item2, item3 in zip(data_frame['课程代码'], data_frame['课程名称'], data_frame['学分']):
    try:
        c = Course.objects.filter(
            cno=item1
        )[0]
        cs_courses.append(c)
    except:
github apachecn / python_data_analysis_and_mining_action / chapter4 / code.py View on Github external
def programmer_2():
    datafile = path + '/data/normalization_data.xls'
    data = pd.read_excel(datafile, header=None)

    print((data - data.min()) / (data.max() - data.min()))
    print((data - data.mean()) / data.std())
    print(data / 10**np.ceil(np.log10(data.abs().max())))
github Finance-Hub / FinanceHub / trackers / GovBonds / Brazil / ntnbvna.py View on Github external
from calendars import DayCounts

dc = DayCounts('BUS/252', calendar='anbima')

# BW path
# file_path = r'C:\Users\gamarante\Dropbox\Aulas\Insper - Financas Quantitativas\VNA Raw.xlsx'

# macbook path
# file_path = r'/Users/gusamarante/Dropbox/Aulas/Insper - Financas Quantitativas/VNA Raw.xlsx'

# mac path
file_path = r'/Users/gustavoamarante/Dropbox/Aulas/Insper - Financas Quantitativas/VNA Raw.xlsx'

df_mensal = pd.read_excel(file_path, 'Mensal', index_col=0)
df_diario = pd.read_excel(file_path, 'Diario', index_col=0, na_values=['#N/A N/A'])
df_release = pd.read_excel(file_path, 'Release')
df_release.columns = ['Date', 'IPCA']

df = pd.DataFrame(index=pd.date_range('2003-03-18', 'today', freq='D'),
                  columns=['dia util', 'ultima virada', 'DU desde virada', 'DU entre viradas', 'time fraction',
                           'proj anbima', 'saiu IPCA', 'ultimo IPCA', 'proj IPCA', 'ultimo index', 'VNA'])
df.index.name = 'Date'

df['dia util'] = dc.isbus(df.index)

for d in tqdm(df.index, 'Filling "ultima virada"'):
    if d.day >= 15:
        df.loc[d, 'ultima virada'] = pd.datetime(d.year, d.month, 15)
    else:
        if d.month - 1 == 0:
            df.loc[d, 'ultima virada'] = pd.datetime(d.year-1, 12, 15)
        else:
github psychopy / psychopy / psychopy / data / utils.py View on Github external
def _attemptImport(fileName, sep=',', dec='.'):
        """Attempts to import file with specified settings and raises
        ConditionsImportError if fails due to invalid format

        :param filename: str
        :param sep: str indicating the separator for cells (',', ';' etc)
        :param dec: str indicating the decimal point ('.', '.')
        :return: trialList, fieldNames
        """
        if fileName.endswith(('.csv', '.tsv')):
            trialsArr = pd.read_csv(fileName, encoding='utf-8-sig',
                                    sep=sep, decimal=dec)
            logging.debug(u"Read csv file with pandas: {}".format(fileName))
        elif fileName.endswith(('.xlsx', '.xls', '.xlsm')):
            trialsArr = pd.read_excel(fileName)
            logging.debug(u"Read Excel file with pandas: {}".format(fileName))
        # then try to convert array to trialList and fieldnames
        unnamed = trialsArr.columns.to_series().str.contains('^Unnamed: ')
        trialsArr = trialsArr.loc[:, ~unnamed]  # clear unnamed cols
        logging.debug(u"Clearing unnamed columns from {}".format(fileName))
        trialList, fieldNames = pandasToDictList(trialsArr)

        return trialList, fieldNames
github TwoRavens / TwoRavens / tworaven_apps / data_prep_utils / dataset_doc_maker.py View on Github external
print('csv file')
                # csv file
                #
                data = pd.read_csv(path, low_memory=False)

            elif path_ext in [dp_static.EXT_TSV, dp_static.EXT_TAB]:
                print('Tab-delimited')
                # Tab-delimited
                #
                data = pd.read_csv(path, delimiter='\t', low_memory=False)

            elif path_ext in [dp_static.EXT_XLS, dp_static.EXT_XLSX]:
                print('Excel file')
                # Excel file
                #
                data = pd.read_excel(path)
            else:
                return err_resp('File extension not valid: %s' % path_ext)
        except FileNotFoundError as err_obj:
            return err_resp('File not found: %s' % err_obj)
        except pd.errors.ParserError as err_obj:
            return err_resp('Failed to open file: %s' % err_obj)

        if 'd3mIndex' not in data:
            data.insert(0, 'd3mIndex', range(len(data)))

        return ok_resp(data)
github NCBI-Hackathons / Semantic-search-log-analysis-pipeline / 03_Fuzzy_match.py View on Github external
import requests
import json
import lxml.html as lh
from lxml.html import fromstring
import time
import os
from fuzzywuzzy import fuzz, process

# Set working directory
os.chdir('/Users/user/Projects/webDS/_util')

localDir = '03_Fuzzy_match_files/'
dbDir = '_django/loganalysis/'

# Bring in historical file of (somewhat edited) matches
GoldStandard = pd.read_excel('01_Import-transform_files/GoldStandard_master.xlsx')



#%%
# ===========================================================
# 2. FuzzyAutoAdd - When phrase-match score is 90 or higher, 
#    assign without manual checking
# ===========================================================
'''
Isolate terms that might be a minor misspelling or might be a foreign version 
of the term. Some of these matches will be wrong, but overall, it's a good use 
of time to assign to what they look very similar to. Here we set the scorer to 
match whole terms/phrases, and the fuzzy matching score must be 90 or higher.

# Quick test, if you want - punctuation difference
fuzz.ratio('Testing FuzzyWuzzy', 'Testing FuzzyWuzzy!!')
github openelections / openelections-data-ca / src / parse_special_primary_2015.py View on Github external
def parse_alameda():
    sovc_xls = 'https://www.acgov.org/rov/elections/20150317/documents/sovc.xls'
    primary = pd.read_excel(sovc_xls, sheetname='Sheet1')

    # Select only contests of interest and the important columns
    primary = primary.loc[(primary.index > 3) & (primary.index < 385)][
        ['Alameda County', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12']]

    table = prepare_output(primary, 'Alameda', 7,
                           ['Terry Kremin', 'Susan Bonilla', 'Joan Buchanan', 'Michaela M. Hertle', 'Steve Glazer'])
    for x in ['candidate', 'district', 'office', 'precinct', 'county']:
        table = table.sort_values(by=x, kind='mergesort')
    table.to_csv(
        '2015/20150317__ca__special__primary__alameda__precinct.csv', header=output_columns, index=False)
github Snooz82 / robotframework-datadriver / src / DataDriver / xls_reader.py View on Github external
def get_data_from_source(self):
        data_frame = pd.read_excel(self.file, sheet_name=self.sheet_name, dtype=str).replace(np.nan, '', regex=True)
        self._analyse_header(list(data_frame))
        for row in data_frame.values.tolist():
            self._read_data_from_table(row)
        return self.data_table