Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def divine_format(text):
"""Guess the format of the notebook, based on its content #148"""
try:
nbformat.reads(text, as_version=4)
return 'ipynb'
except nbformat.reader.NotJSONError:
pass
lines = text.splitlines()
for comment in ['', '#'] + _COMMENT_CHARS:
metadata, _, _, _ = header_to_metadata_and_cell(lines, comment)
ext = metadata.get('jupytext', {}).get('text_representation', {}).get('extension')
if ext:
return ext[1:] + ':' + guess_format(text, ext)[0]
# No metadata, but ``` on at least one line => markdown
for line in lines:
if line == '```':
return 'md'
return 'py:' + guess_format(text, '.py')[0]
def read_ipynb(infile, header=None):
with open(infile) as f:
node = nbformat.reader.read(f)
# ipynb format 4 is current as of IPython 3.0; update the data structure
# for consistency if it is an older version
ipynb_version = nbformat.reader.get_version(node)
if ipynb_version < (4, 0):
node = nbformat.convert(node, 4)
notebook_lang = node.metadata.get('language_info', {}).get('name', None)
if not notebook_lang == 'R':
print('Warning: Notebook language "{0}" != R'.format(notebook_lang))
print("Output is unlikely to be a valid Rmd document")
# to allow round-tripping, if no explicit header is specified and
# node.metadata.Rmd_header exists, dump it as a YAML header
if header is None:
if "Rmd_header" in node.metadata:
# header will consist of NotebookNode rather than dict objects
# we added a representer function for these above
header = node.metadata["Rmd_header"]
def _get_notebook_node(self): # pragma: no cover
"Load captured notebook node"
size = len(self._notebook_data)
if size == 0:
raise Exception("Captured buffer size for notebook node is zero.")
node = reader.reads(self._notebook_data)
self.nbversion = reader.get_version(node)
return node
def _get_notebook_node(self): # pragma: no cover
"Load captured notebook node"
size = len(self._notebook_data)
if size == 0:
raise Exception("Captured buffer size for notebook node is zero.")
node = reader.reads(self._notebook_data)
self.nbversion = reader.get_version(node)
return node
logger.debug("converting '{}'".format(file_path))
success_count = result_counter.success_count
fail_count = result_counter.fail_count
source_info_record_base = self.__get_source_info_base(file_path.realpath())
source_info_record_base.source_id = self._fetch_next_source_id()
if self._format_name in IPYNB_FORMAT_NAME_LIST or is_ipynb_file_path(file_path):
import nbformat
try:
changed_table_name_set = self._convert_nb(
nb=load_ipynb_file(file_path, encoding=self._encoding),
source_info=source_info_record_base,
)
except (nbformat.reader.NotJSONError, RuntimeError) as e:
logger.error(e)
return
for table_name in changed_table_name_set:
record = deepcopy(source_info_record_base)
record.format_name = "ipynb"
record.dst_table = table_name
SourceInfo.insert(record)
else:
self.__convert(file_path, source_info_record_base)
if result_counter.fail_count > fail_count:
return
if result_counter.success_count == success_count:
logger.warn(TABLE_NOT_FOUND_MSG_FORMAT.format(file_path))
text = ""
try:
obj = retry_s3(
"get",
bucket,
key,
size,
etag=etag,
s3_client=s3_client,
version_id=version_id
)
data = get_bytes(obj["Body"], compression)
notebook = data.getvalue().decode("utf-8")
try:
text = extract_text(notebook)
except (json.JSONDecodeError, nbformat.reader.NotJSONError):
print(f"Invalid JSON in {key}.")
except (KeyError, AttributeError) as err:
print(f"Missing key in {key}: {err}")
# there might be more errors than covered by test_read_notebook
# better not to fail altogether
except Exception as exc:#pylint: disable=broad-except
print(f"Exception in file {key}: {exc}")
except UnicodeDecodeError as uni:
print(f"Unicode decode error in {key}: {uni}")
return text
def _get_notebook_node(self): # pragma: no cover
"Load captured notebook node"
size = len(self._notebook_data)
if size == 0:
raise Exception("Captured buffer size for notebook node is zero.")
node = reader.reads(self._notebook_data)
self.nbversion = reader.get_version(node)
return node
def has_environment(nb_file):
if nbformat is None:
return False
try:
with open(nb_file) as fb:
data = fb.read()
nb = nbformat.reader.reads(data)
return 'environment' in nb['metadata']
except (AttributeError, KeyError):
return False
except (IOError, nbformat.reader.NotJSONError):
return False
def read_ipynb(infile, header=None):
with open(infile) as f:
node = nbformat.reader.read(f)
# ipynb format 4 is current as of IPython 3.0; update the data structure
# for consistency if it is an older version
ipynb_version = nbformat.reader.get_version(node)
if ipynb_version < (4, 0):
node = nbformat.convert(node, 4)
notebook_lang = node.metadata.get('language_info', {}).get('name', None)
if not notebook_lang == 'R':
print('Warning: Notebook language "{0}" != R'.format(notebook_lang))
print("Output is unlikely to be a valid Rmd document")
# to allow round-tripping, if no explicit header is specified and
# node.metadata.Rmd_header exists, dump it as a YAML header
if header is None:
if "Rmd_header" in node.metadata:
def load_ipynb_file(file_path, encoding):
with io.open(file_path, encoding=encoding) as f:
try:
return nbformat.read(f, as_version=4)
except AttributeError as e:
raise nbformat.reader.NotJSONError(msgfy.to_error_message(e))
except IOError as e:
_schema_not_found_error_handler(e)
raise