How to use the openpyxl.load_workbook function in openpyxl

To help you get started, we’ve selected a few openpyxl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github abenassi / xlseries / tests / strategies / clean / View on Github external
params = {"alignment": "vertical",
                  "time_alignment": 0,
                  "continuity": True,
                  "blank_rows": False,
                  "time_header_coord": "A1",
                  "data_starts": 2,
                  "data_ends": None,
                  "frequency": "M",
                  "missings": True,
                  "missing_value": None,
                  "time_multicolumn": False,
                  "time_composed": False}

        end = CleanSingleColumn().clean_time_index(ws, params)

        wb_exp = load_workbook(
            os.path.join(abs_path("expected"), "test_case1.xlsx"))

        self.assertTrue(compare_cells(wb, wb_exp))
        self.assertEqual(end, 256)
github dataiku / dataiku-contrib / excel-importer / python-runnables / macro-excel-importer / View on Github external
# Actions performed
        actions_performed = dict()
        num_files = len(files_list)

        update_time = time.time()

        for file_index, my_file in enumerate(files_list):
            ## Get file path
            file_path = os.path.join(folder_path, my_file)

            ## Get Excel file and load in a pandas dataframe
            sheets_names = pd.ExcelFile(file_path).sheet_names
            for sheet in sheets_names:
                ### Rename sheets by "file_sheet"
                ss = openpyxl.load_workbook(file_path)
                ss_sheet = ss.get_sheet_by_name(sheet)
                title = ss_sheet.title

                if not my_file.split(".")[0] in title:
                    title = '_'.join((my_file.split(".")[0] + "_" + sheet).split())

                title = '_'.join(title.split())
                title = title.replace(')', '')
                title = title.replace('(', '')

                create_dataset = True
                if title in datasets_in_project:
                    if overwrite:
                        actions_performed[title] = "replaced"
github ScienceStacks / SciSheets / mysite / scisheets / plugins / View on Github external
def _importExcelToDataframe(filepath, worksheet=None):
  Reads the excel file into a dataframe.
  :param str filepath: full path to CSV file
  :param str worksheet: worksheet to import. Default is first.
  :return pandas.DataFrame:
  :raises IOError, ValueError:
  wb = openpyxl.load_workbook(filename=filepath, read_only=True)
  if worksheet is None:
    worksheet = wb.sheetnames[0]
  if not worksheet in wb.sheetnames:
    raise ValueError("Worksheet %s not found in file %s"  \
        % (worksheet, filepath))
  ws = wb[worksheet]
  data = {}
  rows = [ [cell.value for cell in row if cell.value is not None]  \
           for row in ws.rows ]
  names = []
  for name in rows[0]:
    if not (isinstance(name, str) or isinstance(name, unicode)):
      raise ValueError("Invalid column header in %s" % filepath)
    uni = unicode(name)
    name = unicodedata.normalize('NFC', uni).encode('ascii', 'ignore')
    name = name.replace(' ', '')
github illacceptanything / illacceptanything / code / View on Github external
def export_task1(filename):
    sheet = "Task-1"
    wb = openpyxl.load_workbook("Output.xlsx")
    ws = wb.get_sheet_by_name(sheet)

    for i, (key, value) in enumerate(output_bins['365day']['days'].items()):
        data = value['B']
        _data = [filename+'_day'+str(i+1), data[0], data[1], data[2], data[3]]
github regro / regolith / regolith / builders / View on Github external
def render_template1(self, person_info, ppl_tab1, ppl_tab3, ppl_tab4, ppl_tab5, **kwargs):
        """Render the nsf template."""
        template = self.template
        wb = openpyxl.load_workbook(template)
        ws = wb.worksheets[0]
        style = copy_cell_style(ws['A17'])
            ws, ppl_tab5, start_row=44, template_cell_style=style
            ws, ppl_tab4, start_row=37, template_cell_style=style
            ws, ppl_tab3, start_row=30, template_cell_style=style
            ws, ppl_tab1, start_row=17, template_cell_style=style
        ), "{}_nsf.xlsx".format(person_info["_id"])))
        return locals()
github ebmdatalab / openprescribing / openprescribing / frontend / management / commands / View on Github external
def add_bnf_codes(source_directory):
    from openpyxl import load_workbook
    # 113.831 rows in the spreadsheet
    wb = load_workbook(
            source_directory, "Converted_DRUG_SNOMED_BNF.xlsx"))
    rows = wb.get_active_sheet().rows
    with connection.cursor() as cursor:
        for row in rows[1:]:  # skip header
            bnf_code = row[0].value
            # atc_code = row[1].value
            snomed_code = row[4].value
            sql = "UPDATE dmd_product SET BNF_CODE = %s WHERE DMDID = %s "
            cursor.execute(sql.lower(), [bnf_code, snomed_code])
            rowcount = cursor.rowcount
            if not rowcount:
                print "When adding BNF codes, could not find", snomed_code
github gil9red / SimplePyScripts / office__excel__openpyxl__xlwt / get_sheets / View on Github external
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

__author__ = 'ipetrash'

import openpyxl

wb = openpyxl.load_workbook('excel.xlsx')
print(wb.get_sheet_names())  # ['Sheet', 'auto', 'Students', 'Students1']

for name in wb.get_sheet_names():
    ws = wb.get_sheet_by_name(name)
    print(name, ws)
github onkursen / predicting-terrorist-attacks / View on Github external
from openpyxl import load_workbook
from time import time

# Read data file into memory as workbook
print 'Reading data file.'
t  = time()
ws = load_workbook(
  filename = raw_input("Enter path of GTD data file: ").strip(),
  use_iterators = True
print 'Done. Time taken: %f secs.\n' % (time()-t)

inputs = []
outputs = []

# Get relevant values from database and put into input and output vectors for SVM classification
print 'Parsing database.'
t = time()

train = open('svm-train.txt', 'w')
test = open('svm-test.txt', 'w')

k = 0
github stanfordjournalism / search-script-scrape / scripts / View on Github external
from lxml import html
from openpyxl import load_workbook

doc = html.fromstring(requests.get(LANDING_PAGE_URL).text)
# instead of using xpath, let's just use a sloppy csscelect
urls = [a.attrib.get('href') for a in doc.cssselect('a')]
# that awkward `get` is because not all anchor tags have hrefs...and
#  this is why we use xpath...
_xurl = next(url for url in urls if url and 'SAT' in url and 'xls' in url) # blargh
xlsx_url = urljoin(LANDING_PAGE_URL, _xurl)
print("Downloading", xlsx_url)
# download the spreadsheet...instead of writing to disk
# let's just keep it in memory and pass it directly to load_workbook()
xlsx = BytesIO(requests.get(xlsx_url).content)
wb = load_workbook(xlsx)
# The above command will print out a warning:
#     /site-packages/openpyxl/workbook/names/ UserWarning:
#              Discarded range with reserved name
#              warnings.warn("Discarded range with reserved name")

### Dealing with the worksheet structure
# The 2014 edition contains two worksheets, the first being "Notes"
#  and the second being "2014 SAT Results"
# Let's write an agnostic function as if we didn't know how each year's
#   spreadsheet was actually structured
sheet = next(s for s in wb.worksheets if "results" in s.title.lower())
# I don't understand openpyxl's API so I'm just going to
#   practice nested list comprehensions
# Note that the first column is just an ID field which we don't care about
rows = [[cell.value for cell in row[1:]] for row in sheet.iter_rows()]
headers = rows[0]
github saranshgupta1995 / The-Iron-Throne / LittleFinger / View on Github external
def open_workbook(self):'Citidel//exp_data',writeback=True)
        self.__wb = openpyxl.load_workbook('Citidel//Exp.xlsx')
        except KeyError: