Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_stream_format_sql_table_is_required_error(database_url):
with pytest.raises(exceptions.TabulatorException) as excinfo:
Stream(database_url).open()
assert 'table' in str(excinfo.value)
Defaults to False.
# Raises
exceptions.TabulatorException: If the stream is closed.
# Returns
Iterator[Union[List[Any], Dict[str, Any], Tuple[int, List[str], List[Any]]]]:
The row itself. The format depends on the values of `keyed` and
`extended` arguments.
"""
# Error if closed
if self.closed:
message = 'Stream is closed. Please call "stream.open()" first.'
raise exceptions.TabulatorException(message)
# Create iterator
iterator = chain(
self.__sample_extended_rows,
self.__parser.extended_rows)
iterator = self.__apply_processors(iterator)
# Yield rows from iterator
for row_number, headers, row in iterator:
if row_number > self.__row_number:
self.__row_number = row_number
if extended:
yield (row_number, headers, row)
elif keyed:
yield dict(zip(headers, row))
else:
def headers(self):
"""None/list: table headers
"""
if not self.__sample_size:
message = 'Headers can\'t be extracted when sample_size=0'
raise exceptions.TabulatorException(message)
return self.__headers_list
keyed_source = True
self.__sample_extended_rows.append((number, headers, row))
except StopIteration:
break
# Detect html content
if not keyed_source:
text = ''
for number, headers, row in self.__sample_extended_rows:
for value in row:
if isinstance(value, six.string_types):
text += value
html_source = helpers.detect_html(text)
if html_source:
msg = 'Source has been detected as HTML (not supported)'
raise exceptions.TabulatorException(msg)
# Extract headers
if self.__headers_row:
for number, headers, row in self.__sample_extended_rows:
if number == self.__headers_row:
if keyed_source:
self.__headers_list = headers
else:
self.__headers_list = row
# Remove headers from sample
if not keyed_source:
self.__sample_extended_rows = self.__sample_extended_rows[
self.__headers_row:]
def __init__(self, table=None, **options):
# Ensure table
if table is None:
raise exceptions.TabulatorException('Format `sql` requires `table` option.')
self.__table = table
pass
class IOError(TabulatorException):
"""Local loading error
"""
pass
class HTTPError(IOError):
"""Remote loading error
"""
pass
class SourceError(TabulatorException):
"""The source file could not be parsed correctly.
"""
pass
class SchemeError(TabulatorException):
"""The file scheme is not supported.
"""
pass
class FormatError(TabulatorException):
"""The file format is unsupported or invalid.
"""
pass
def __extract_headers(self):
# Heders row is not set
if not self.__headers_row:
return
# Sample is too short
if self.__headers_row > self.__sample_size:
message = 'Headers row (%s) can\'t be more than sample_size (%s)'
message = message % (self.__headers_row, self.__sample_size)
raise exceptions.TabulatorException(message)
# Get headers from data
keyed_source = False
for row_number, headers, row in self.__sample_extended_rows:
keyed_source = keyed_source or headers is not None
headers = headers if keyed_source else row
for index, header in enumerate(headers):
if header is not None:
headers[index] = six.text_type(header).strip()
if row_number == self.__headers_row:
self.__headers = headers
if row_number > self.__headers_row:
for index in range(0, len(self.__headers)):
if len(headers) > index and headers[index] is not None:
if not self.__headers[index]:
self.__headers[index] = headers[index]
_stream = tabulator.Stream(__url, **_params,
post_parse=[suffix_remover(format),
add_constants(constant_headers, constant_values, _columns)])
retry = 0
backoff = 2
while True:
try:
_stream.open()
_headers = dedupe(_stream.headers)
__columns = len(_headers)
_headers = dedupe(_headers + constant_headers)
_schema = __resource.get('schema')
if _schema is not None:
_schema = Schema(_schema)
return _schema, _headers, __columns, _stream, _stream.close
except tabulator.exceptions.TabulatorException as e:
logging.warning("Error while opening resource from url %s: %r",
_url, e)
_stream.close()
retry += 1
if retry <= 3:
logging.warning("Retrying after %d seconds (%d/3)", backoff, retry)
time.sleep(backoff)
backoff *= 2
continue
else:
if not _ignore_missing:
raise
return {}, [], 0, [], lambda: None
return opener