How to use the pycaption.WebVTTReader function in pycaption

To help you get started, we’ve selected a few pycaption examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pbs / pycaption / tests / test_webvtt.py View on Github external
def test_ignoring_timing_errors(self):
        # Even if timing errors are ignored, this has to raise an exception
        self.assertRaises(
            CaptionReadSyntaxError,
            WebVTTReader().read,
            ("\nNOTE invalid cue stamp\n"
             "00:00:20.000 --> \n"
             "foo bar baz\n")
        )

        # And this too
        self.assertRaises(
            CaptionReadSyntaxError,
            WebVTTReader().read,
            ("\n00:00:20,000 --> 00:00:22,000\n"
             "Note the comma instead of point.\n")
        )

        try:
            WebVTTReader().read(
                ("\n"
github pbs / pycaption / tests / test_webvtt_conversion.py View on Github external
def test_cue_settings_are_kept(self):
        caption_set = WebVTTReader().read(SAMPLE_WEBVTT_WITH_CUE_SETTINGS)

        webvtt = WebVTTWriter().write(caption_set)

        self.assertEqual(SAMPLE_WEBVTT_WITH_CUE_SETTINGS, webvtt)
github pbs / pycaption / tests / test_webvtt.py View on Github external
def test_invalid_files(self):
        self.assertRaises(
            CaptionReadSyntaxError,
            WebVTTReader().read,
            ("\nNOTE Cues without text are invalid.\n"
                "00:00:20.000 --> 00:00:30.000\n"
                "\n"
                "00:00:40.000 --> 00:00:50.000\n"
                "foo bar baz\n")
        )

        self.assertRaises(
            CaptionReadError,
            WebVTTReader(ignore_timing_errors=False).read,
            ("00:00:20.000 --> 00:00:10.000\n"
                "Start time is greater than end time.")
        )

        self.assertRaises(
            CaptionReadError,
github pbs / pycaption / tests / test_webvtt_conversion.py View on Github external
def test_webvtt_to_srt_conversion(self):
        caption_set = WebVTTReader().read(SAMPLE_WEBVTT)
        results = SRTWriter().write(caption_set)
        self.assertTrue(isinstance(results, unicode))
        self.assertSRTEquals(SAMPLE_SRT, results)
github pbs / pycaption / tests / test_webvtt_conversion.py View on Github external
def test_positioning_is_kept(self):
        caption_set = WebVTTReader().read(
            SAMPLE_WEBVTT_FROM_DFXP_WITH_POSITIONING)
        results = WebVTTWriter().write(caption_set)
        self.assertEquals(
            SAMPLE_WEBVTT_FROM_DFXP_WITH_POSITIONING, results)
github mkly / youtube-closed-captions / ytcc / download.py View on Github external
def get_captions_from_output(self, output: str, language: str = 'en') -> str:
        reader = WebVTTReader()

        temp_final = ''
        for caption in reader.read(output, language).get_captions(language):
            stripped = self.remove_time_from_caption(
                str(caption).replace(r'\n', "\n"))
            temp_final += stripped

        final = ''
        previous = ''
        for line in temp_final.split("\n"):
            if previous != line:
                final += "\n" + line
            previous = line

        return final.replace("\n", ' ')[1:]
github aussieaddons / plugin.video.abc_iview / resources / lib / play.py View on Github external
if p.is_captions():
            captions_url = stream_data.get('captions_url')
            profile = xbmcaddon.Addon().getAddonInfo('profile')
            path = xbmc.translatePath(profile)
            if not os.path.isdir(path):
                os.makedirs(path)
            caption_file = os.path.join(path, 'subtitles.eng.srt')
            if os.path.isfile(caption_file):
                os.remove(caption_file)

            try:
                sess = session.Session()
                webvtt_data = sess.get(captions_url).text
                if webvtt_data:
                    with io.BytesIO() as buf:
                        webvtt_captions = WebVTTReader().read(webvtt_data)
                        srt_captions = SRTWriter().write(webvtt_captions)
                        srt_unicode = srt_captions.encode('utf-8')
                        buf.write(srt_unicode)
                        with io.open(caption_file, "wb") as f:
                            f.write(buf.getvalue())
                if hasattr(listitem, 'setSubtitles'):
                    listitem.setSubtitles([caption_file])
            except Exception as e:
                utils.log(
                    'Subtitles not available for this program: {0}'.format(e))

        if hasattr(listitem, 'addStreamInfo'):
            listitem.addStreamInfo('audio', p.get_kodi_audio_stream_info())
            listitem.addStreamInfo('video', p.get_kodi_video_stream_info())

        xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, listitem=listitem)
github joseph-zhong / LipReading / src / utils / data / caption.py View on Github external
def extract_captions(cap_fname, lang='en-US'):
  """ Reads a list of captions and returns an ordered dictionary of {(start_time, end_time) -> "caption"}
  with time in units of seconds.

  :param cap_fname: VTT subtitle file to read from. Produces Caption sets with text, and times in microseconds.
  """
  assert os.path.isfile(cap_fname)
  _getSharedLogger().info("Reading captions from '%s'", cap_fname)
  reader = pycaption.WebVTTReader()
  res = collections.OrderedDict()
  with open(cap_fname) as fin:
    captions_raw = fin.read()
    assert reader.detect(captions_raw), "Malformed file: '{}'".format(cap_fname)

    caption_set = reader.read(captions_raw)
    assert not caption_set.is_empty(), "Empty VTT file: '{}'".format(cap_fname)
    # REVIEW josephz: We'll need to check what other possibilities there are.
    assert lang in caption_set.get_languages()

    captions = caption_set.get_captions(lang=lang)
    assert len(captions) > 0

  _getSharedLogger().info("Detected '%s' captions...", len(captions))
  for c in captions:
    cap_raw = c.get_text()
github alexkohler / ytgrep / ytcc / download.py View on Github external
def get_captions_from_output(self, output: str, url: str) -> str:
        reader = WebVTTReader()

        captions = []
        for caption in reader.read(output).get_captions('en-US'):
            stripped = self.remove_time_from_caption(
                url, str(caption).replace(r'\n', " "))
            stripped += "\n"
            captions.append(stripped)
            
        if self.search_query == '':
            return ''.join(item for item in captions)

        return self.process_captions(captions, url)