Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_ignoring_timing_errors(self):
# Even if timing errors are ignored, this has to raise an exception
self.assertRaises(
CaptionReadSyntaxError,
WebVTTReader().read,
("\nNOTE invalid cue stamp\n"
"00:00:20.000 --> \n"
"foo bar baz\n")
)
# And this too
self.assertRaises(
CaptionReadSyntaxError,
WebVTTReader().read,
("\n00:00:20,000 --> 00:00:22,000\n"
"Note the comma instead of point.\n")
)
try:
WebVTTReader().read(
("\n"
def test_cue_settings_are_kept(self):
caption_set = WebVTTReader().read(SAMPLE_WEBVTT_WITH_CUE_SETTINGS)
webvtt = WebVTTWriter().write(caption_set)
self.assertEqual(SAMPLE_WEBVTT_WITH_CUE_SETTINGS, webvtt)
def test_invalid_files(self):
self.assertRaises(
CaptionReadSyntaxError,
WebVTTReader().read,
("\nNOTE Cues without text are invalid.\n"
"00:00:20.000 --> 00:00:30.000\n"
"\n"
"00:00:40.000 --> 00:00:50.000\n"
"foo bar baz\n")
)
self.assertRaises(
CaptionReadError,
WebVTTReader(ignore_timing_errors=False).read,
("00:00:20.000 --> 00:00:10.000\n"
"Start time is greater than end time.")
)
self.assertRaises(
CaptionReadError,
def test_webvtt_to_srt_conversion(self):
caption_set = WebVTTReader().read(SAMPLE_WEBVTT)
results = SRTWriter().write(caption_set)
self.assertTrue(isinstance(results, unicode))
self.assertSRTEquals(SAMPLE_SRT, results)
def test_positioning_is_kept(self):
caption_set = WebVTTReader().read(
SAMPLE_WEBVTT_FROM_DFXP_WITH_POSITIONING)
results = WebVTTWriter().write(caption_set)
self.assertEquals(
SAMPLE_WEBVTT_FROM_DFXP_WITH_POSITIONING, results)
def get_captions_from_output(self, output: str, language: str = 'en') -> str:
reader = WebVTTReader()
temp_final = ''
for caption in reader.read(output, language).get_captions(language):
stripped = self.remove_time_from_caption(
str(caption).replace(r'\n', "\n"))
temp_final += stripped
final = ''
previous = ''
for line in temp_final.split("\n"):
if previous != line:
final += "\n" + line
previous = line
return final.replace("\n", ' ')[1:]
if p.is_captions():
captions_url = stream_data.get('captions_url')
profile = xbmcaddon.Addon().getAddonInfo('profile')
path = xbmc.translatePath(profile)
if not os.path.isdir(path):
os.makedirs(path)
caption_file = os.path.join(path, 'subtitles.eng.srt')
if os.path.isfile(caption_file):
os.remove(caption_file)
try:
sess = session.Session()
webvtt_data = sess.get(captions_url).text
if webvtt_data:
with io.BytesIO() as buf:
webvtt_captions = WebVTTReader().read(webvtt_data)
srt_captions = SRTWriter().write(webvtt_captions)
srt_unicode = srt_captions.encode('utf-8')
buf.write(srt_unicode)
with io.open(caption_file, "wb") as f:
f.write(buf.getvalue())
if hasattr(listitem, 'setSubtitles'):
listitem.setSubtitles([caption_file])
except Exception as e:
utils.log(
'Subtitles not available for this program: {0}'.format(e))
if hasattr(listitem, 'addStreamInfo'):
listitem.addStreamInfo('audio', p.get_kodi_audio_stream_info())
listitem.addStreamInfo('video', p.get_kodi_video_stream_info())
xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, listitem=listitem)
def extract_captions(cap_fname, lang='en-US'):
""" Reads a list of captions and returns an ordered dictionary of {(start_time, end_time) -> "caption"}
with time in units of seconds.
:param cap_fname: VTT subtitle file to read from. Produces Caption sets with text, and times in microseconds.
"""
assert os.path.isfile(cap_fname)
_getSharedLogger().info("Reading captions from '%s'", cap_fname)
reader = pycaption.WebVTTReader()
res = collections.OrderedDict()
with open(cap_fname) as fin:
captions_raw = fin.read()
assert reader.detect(captions_raw), "Malformed file: '{}'".format(cap_fname)
caption_set = reader.read(captions_raw)
assert not caption_set.is_empty(), "Empty VTT file: '{}'".format(cap_fname)
# REVIEW josephz: We'll need to check what other possibilities there are.
assert lang in caption_set.get_languages()
captions = caption_set.get_captions(lang=lang)
assert len(captions) > 0
_getSharedLogger().info("Detected '%s' captions...", len(captions))
for c in captions:
cap_raw = c.get_text()
def get_captions_from_output(self, output: str, url: str) -> str:
reader = WebVTTReader()
captions = []
for caption in reader.read(output).get_captions('en-US'):
stripped = self.remove_time_from_caption(
url, str(caption).replace(r'\n', " "))
stripped += "\n"
captions.append(stripped)
if self.search_query == '':
return ''.join(item for item in captions)
return self.process_captions(captions, url)