How to use the ykdl.util.match.match1 function in ykdl

To help you get started, we’ve selected a few ykdl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github coslyk / moonplayer / src / plugins / ykdl_patched.py View on Github external
addr, port = args.socks_proxy.split(':')
            socks.set_default_proxy(socks.SOCKS5, addr, int(port))
            socket.socket = socks.socksocket
        except:
            print('Failed to set socks5 proxy. Please install PySocks.', file=sys.stderr)

    opener = build_opener(*handlers)
    install_opener(opener)

    m, u = url_to_module(args.video_url)
    info = m.parser(u)

    # Is a playlist?
    if m.list_only():
        video_list = m.prepare_list()
        result = [ {'title': match1(get_content(url), r'<title>(.+?)</title>'), 'url': url} for url in video_list ]
    else:
        result = info.jsonlize()
    print(json.dumps(result, indent=4, sort_keys=True, ensure_ascii=False))
github ForgQi / bilibiliupload / ykdl / extractors / iqiyi / live.py View on Github external
def prepare(self):
        info = VideoInfo(self.name, True)
        html = get_content(self.url)
        self.vid = match1(html, '"qipuId":(\d+),')
        title = match1(html, '"roomTitle":"([^"]+)",')
        artist = match1(html, '"anchorNickname":"([^"]+)",')
        info.title = u'{} - {}'.format(title, artist)
        info.artist = artist

        data = getlive(self.vid)
        self.logger.debug('data:\n' + str(data))
        assert data['code'] == 'A00000', data.get('msg', 'can\'t play this live video!!')
        data = data['data']

        for stream in data['streams']:
            stream_type = stream['steamType']  # typo 'streamType' to 'steamType'
            stream_id = self.type_2_id[stream_type]

            if stream['formatType'] == 'HLFLV':
                stream_params = stream['url'].split('?')[-1]
                stream_params_dict = dict((k, v[0]) for k, v in parse_qs(stream_params).items())
                if stream_params_dict['hl_sttp'] != 'flv':
github ForgQi / bilibiliupload / ykdl / util / html.py View on Github external
content_encoding = None
    else:
        content_encoding = None
    if content_encoding == 'gzip':
        data = ungzip(data)
    elif content_encoding == 'deflate':
        data = undeflate(data)

    if charset == 'ignore':
        return data

    # Decode the response body
    if charset is None:
        if 'Content-Type' in resheader:
            charset = match1(resheader['Content-Type'], r'charset=([\w-]+)')
        charset = charset or match1(str(data), r'charset=\"([\w-]+)', 'charset=([\w-]+)') or 'utf-8'
    logger.debug("get_content> Charset: " + charset)
    try:
        data = data.decode(charset, errors='replace')
    except:
        logger.warning("wrong charset for {}".format(url))
    return data
github ForgQi / bilibiliupload / ykdl / extractors / bilibili / __init__.py View on Github external
def get_extractor(url):
    if 'live.bilibili' in url:
        from . import live as s
        return s.site, url
    elif 'vc.bilibili' in url:
        from . import vc as s
        return s.site, url
    elif '/bangumi/' in url:
        from . import bangumi as s
        return s.site, url

    av_id = match1(url, '(?:/av|aid=)(\d+)')
    page_index = match1(url, '(?:page|\?p)=(\d+)', 'index_(\d+)\.') or '1'
    if page_index == '1':
        url = 'https://www.bilibili.com/av{}/'.format(av_id)
    else:
        url = 'https://www.bilibili.com/av{}/?p={}'.format(av_id, page_index)
    add_header('Referer', 'https://www.bilibili.com/')
    url = get_location(url)

    if '/bangumi/' in url:
        from . import bangumi as s
    else:
        from . import video as s

    return s.site, url
github ForgQi / bilibiliupload / ykdl / extractors / qq / video.py View on Github external
def qq_get_final_url(url, vid, fmt_id, filename, fvkey, platform):
    params = {
        'appver': PLAYER_VERSION,
        'otype': 'json',
        'platform': platform,
        'filename': filename,
        'vid': vid,
        'format': fmt_id,
    }

    content = get_content('http://vv.video.qq.com/getkey?' + urlencode(params))
    data = json.loads(match1(content, r'QZOutputJson=(.+);$'))

    vkey = data.get('key', fvkey)
    if vkey:
        url = '{}{}?vkey={}'.format(url, filename, vkey)
    else:
        url = None
    vip = data.get('msg') == 'not pay'

    return url, vip
github ForgQi / bilibiliupload / ykdl / extractors / netease / video.py View on Github external
def prepare(self):
        info = VideoInfo(self.name)

        if not self.vid:
            topicid = match1(self.url, 'topicid=([^&amp;]+)')
            vid = match1(self.url, 'vid=([^&amp;]+)')

            if not topicid or not vid:
                html = get_content(self.url)
                topicid = topicid or match1(html, 'topicid : \"([^\"]+)', 'topicid=([^&amp;]+)')
                vid = vid or match1(html, 'vid : \"([^\"]+)', 'vid=([^&amp;]+)')
        
            self.vid = (topicid, vid)

        topicid, _vid = self.vid
        code = _vid[-2:]
        video_xml = get_content('http://xml.ws.126.net/video/{}/{}/{}_{}.xml'.format(code[0], code[1], topicid, _vid))
        info.title = compact_unquote(match1(video_xml, '<title>([^&lt;]+)'))

        for tp in self.sopported_stream_types:
            searchcode = '&lt;{}Url&gt;&lt;flv&gt;([^&lt;]+)'.format(tp)
            url = match1(video_xml, searchcode)
            if url:
                info.stream_types.append(self.stream_2_id[tp])
                info.streams[self.stream_2_id[tp]] = {'container': 'flv', 'video_profile': self.stream_2_profile[tp], 'src' : [url], 'size': 0}
        return info</title>
github ForgQi / bilibiliupload / ykdl / extractors / weibo.py View on Github external
def get_title(self):
        if self.title_patterns:
            self.info.title = match1(self.html, *self.title_patterns)
        # JSON string escaping in safe
        exec('self.info.title = """%s"""' % self.info.title.replace('"""', ''))
github ForgQi / bilibiliupload / ykdl / extractors / weibo.py View on Github external
# Mobile ver.
        if 'm.weibo.cn' in self.url:
            self.title_patterns = '"content2": "(.+?)",', '"status_title": "(.+?)",'
            self.url_patterns = '"stream_url_hd": "([^"]+)', '"stream_url": "([^"]+)'
            return

        if '/tv/v/' in self.url or 'fid=' not in self.url:
            self.title_patterns = 'class="info_txt \w+"&gt;([^&lt;]+)]+&gt;\s*(?:)?\s*([^&lt;]+)'
            self.url_patterns = 'video-sources\s*=\s*".+?(?:&amp;\d+=http.+?)*&amp;\d+=(http.+?[^=])(?:&amp;\d+=)*&amp;qType=\w+"',
            return

        self.title_patterns = '<title>([^&lt;]+)&lt;/',
        self.url_patterns = r'(?:data-url|controls src)\s*=\s*[\"\']([^\"\']+)',
        html = get_content(self.url)
        url = match1(html, '"page_url": "([^"]+)')
        assert url, 'No url match'
        self.url = url
        self.l_assert()
</title>
github ForgQi / bilibiliupload / ykdl / extractors / bilibili / video.py View on Github external
def get_page_info(self):
        page_index = match1(self.url, '\?p=(\d+)', 'index_(\d+)\.') or '1'
        html = get_content(self.url)
        date = json.loads(match1(html, '__INITIAL_STATE__=({.+?});'))['videoData']
        title = date['title']
        artist = date['owner']['name']
        pages = date['pages']
        for page in pages:
           index = str(page['page'])
           subtitle = page['part']
           if index == page_index:
               vid = page['cid']
               if len(pages) > 1:
                   title = u'{} - {} - {}'.format(title, index, subtitle)
               elif subtitle and subtitle != title:
                   title = u'{} - {}'.format(title, subtitle)
               break

        return vid, title, artist
github ForgQi / bilibiliupload / ykdl / extractors / douyu / live.py View on Github external
def prepare(self):
        info = VideoInfo(self.name, True)
        add_header("Referer", 'https://www.douyu.com')

        html = get_content(self.url)
        self.vid = match1(html, '\$ROOM\.room_id\s*=\s*(\d+)',
                                'room_id\s*=\s*(\d+)',
                                '"room_id.?":(\d+)',
                                'data-onlineid=(\d+)')
        title = match1(html, 'Title-headlineH2"&gt;([^&lt;]+)&lt;')
        artist = match1(html, 'Title-anchorName" title="([^"]+)"')

        if not title or not artist:
            html = get_content('https://open.douyucdn.cn/api/RoomApi/room/' + self.vid)
            room_data = json.loads(html)
            if room_data['error'] == 0:
                room_data = room_data['data']
                title = room_data['room_name']
                artist = room_data['owner_name']

        info.title = u'{} - {}'.format(title, artist)
        info.artist = artist