Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
options.setdefault("skip_trailing_columns", False)
options.setdefault("escape_strings", False)
options.setdefault("hyperlinks", False)
options.setdefault("include_sheet_pattern", ["^.*$"])
options.setdefault("exclude_sheet_pattern", [])
options.setdefault("exclude_hidden_sheets", False)
options.setdefault("merge_cells", False)
options.setdefault("ignore_formats", [''])
options.setdefault("lineterminator", "\n")
options.setdefault("outputencoding", "utf-8")
self.options = options
try:
self.ziphandle = zipfile.ZipFile(xlsxfile)
except (zipfile.BadZipfile, IOError):
raise InvalidXlsxFileException("Invalid xlsx file: " + str(xlsxfile))
self.py3 = sys.version_info[0] == 3
self.content_types = self._parse(ContentTypes, "/[Content_Types].xml")
self.shared_strings = self._parse(SharedStrings, self.content_types.types["shared_strings"])
self.styles = self._parse(Styles, self.content_types.types["styles"])
self.workbook = self._parse(Workbook, self.content_types.types["workbook"])
workbook_relationships = list(filter(lambda r: "book" in r, self.content_types.types["relationships"]))[0]
self.workbook.relationships = self._parse(Relationships, workbook_relationships)
if self.options['escape_strings']:
self.shared_strings.escape_strings()
def xlsx(self):
kwargs = {
'delimiter': '\t',
'skip_empty_lines': True,
'outputencoding': 'utf-8',
'hyperlinks': True,
}
sheetid = 1
try:
xlsx2csv = Xlsx2csv(self.path.as_posix(), **kwargs)
except InvalidXlsxFileException as e:
raise exc.NoDataError(f'{self.path}') from e
ns = len(xlsx2csv.workbook.sheets)
if ns > 1:
message = f'too many sheets ({ns}) in {self.path.as_posix()!r}'
if self.addError(exc.EncodingError(message),
blame='submission',
path=self.path):
logd.error(message)
f = io.StringIO()
try:
xlsx2csv.convert(f, sheetid)
f.seek(0)
gen = csv.reader(f, delimiter='\t')
yield from gen