Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
while start_line < len(lines):
if not lines[start_line].isdigit():
break
end_line = self._find_text_line(start_line, lines)
timing = lines[start_line + 1].split('-->')
start = self._srttomicro(timing[0].strip(' \r\n'))
end = self._srttomicro(timing[1].strip(' \r\n'))
nodes = []
for line in lines[start_line + 2:end_line - 1]:
# skip extra blank lines
if not nodes or line != '':
nodes.append(CaptionNode.create_text(line))
nodes.append(CaptionNode.create_break())
if len(nodes):
# remove last line break from end of caption list
nodes.pop()
caption = Caption(start, end, nodes)
captions.append(caption)
start_line = end_line
caption_set = CaptionSet({lang: captions})
if caption_set.is_empty():
raise CaptionReadNoCaptions("empty caption file")
return caption_set
"""
:param inherit_from: A Layout object extracted from an ancestor tag
to be attached to leaf nodes
"""
# convert text
if isinstance(tag, NavigableString):
# BeautifulSoup apparently handles unescaping character codes
# (e.g. &) automatically. The following variable, therefore,
# should contain a plain unicode string.
# strips indentation whitespace only
pattern = re.compile("^(?:[\n\r]+\s*)?(.+)")
result = pattern.search(tag)
if not result:
return
tag_text = result.groups()[0]
self.line.append(CaptionNode.create_text(tag_text, inherit_from))
# convert line breaks
elif tag.name == 'br':
self.line.append(CaptionNode.create_break(inherit_from))
# convert italics, bold, and underline
elif tag.name == 'i' or tag.name == 'b' or tag.name == 'u':
style_name = self._get_style_name_from_tag(tag.name)
self.line.append(
CaptionNode.create_style(True, {style_name: True})
)
# recursively call function for any children elements
for a in tag.contents:
self._translate_tag(a, inherit_from)
self.line.append(
CaptionNode.create_style(False, {style_name: True}))
elif tag.name == 'span':
self._translate_span(tag, inherit_from)
def _translate_tag(self, tag):
# convert text
if isinstance(tag, NavigableString):
# strips indentation whitespace only
pattern = re.compile("^(?:[\n\r]+\s*)?(.+)")
result = pattern.search(tag)
if result:
# Escaping/unescaping xml entities is the responsibility of the
# xml parser used by BeautifulSoup in its initialization. The
# content of the tag variable at this point should be a plain
# unicode string with xml entities already converted to unicode
# characters.
tag_text = result.groups()[0]
node = CaptionNode.create_text(
tag_text, layout_info=tag.layout_info)
self.nodes.append(node)
# convert line breaks
elif tag.name == 'br':
self.nodes.append(
CaptionNode.create_break(layout_info=tag.layout_info))
# convert italics
elif tag.name == 'span':
# convert span
self._translate_span(tag)
else:
# recursively call function for any children elements
for a in tag.contents:
self._translate_tag(a)
)
# handle clone italics
elif instruction.sets_italics_off():
caption.nodes.append(
CaptionNode.create_style(
False, {'italics': True},
layout_info=_get_layout_from_tuple(
instruction.position)
))
# handle text
elif instruction.is_text_node():
layout_info = _get_layout_from_tuple(instruction.position)
caption.nodes.append(
CaptionNode.create_text(
instruction.get_text(), layout_info=layout_info),
)
caption.layout_info = layout_info
self._collection.extend(self._still_editing)
elif '' == line:
if found_timing:
if not nodes:
raise CaptionReadSyntaxError(
'Cue without content. (line %d)' % timing_line)
else:
found_timing = False
caption = Caption(
start, end, nodes, layout_info=layout_info)
captions.append(caption)
nodes = []
else:
if found_timing:
if nodes:
nodes.append(CaptionNode.create_break())
nodes.append(CaptionNode.create_text(
self._decode(line)))
else:
# it's a comment or some metadata; ignore it
pass
# Add a last caption if there are remaining nodes
if nodes:
caption = Caption(start, end, nodes, layout_info=layout_info)
captions.append(caption)
return captions