[yt-dlp.git] / youtube_dl / extractor / vlive.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import (
    dict_get,
    float_or_none,
    int_or_none,
)
from ..compat import compat_urllib_parse


class VLiveIE(InfoExtractor):
    IE_NAME = 'vlive'
    _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
    _TEST = {
        'url': 'http://www.vlive.tv/video/1326',
        'md5': 'cc7314812855ce56de70a06a27314983',
        'info_dict': {
            'id': '1326',
            'ext': 'mp4',
            'title': "[V] Girl's Day's Broadcast",
            'creator': "Girl's Day",
            'view_count': int,
        },
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)

        webpage = self._download_webpage(
            'http://www.vlive.tv/video/%s' % video_id, video_id)

        long_video_id = self._search_regex(
            r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"([^"]+)"',
            webpage, 'long video id')

        key = self._search_regex(
            r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"[^"]+"\s*,\s*"([^"]+)"',
            webpage, 'key')

        title = self._og_search_title(webpage)

        playinfo = self._download_json(
            'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
            % compat_urllib_parse.urlencode({
                'videoId': long_video_id,
                'key': key,
                'ptc': 'http',
                'doct': 'json',  # document type (xml or json)
                'cpt': 'vtt',  # captions type (vtt or ttml)
            }), video_id)

        formats = [{
            'url': vid['source'],
            'format_id': vid.get('encodingOption', {}).get('name'),
            'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
            'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
            'width': int_or_none(vid.get('encodingOption', {}).get('width')),
            'height': int_or_none(vid.get('encodingOption', {}).get('height')),
            'filesize': int_or_none(vid.get('size')),
        } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
        self._sort_formats(formats)

        thumbnail = self._og_search_thumbnail(webpage)
        creator = self._html_search_regex(
            r'<div[^>]+class="info_area"[^>]*>\s*<strong[^>]+class="name"[^>]*>([^<]+)</strong>',
            webpage, 'creator', fatal=False)

        view_count = int_or_none(playinfo.get('meta', {}).get('count'))

        subtitles = {}
        for caption in playinfo.get('captions', {}).get('list', []):
            lang = dict_get(caption, ('language', 'locale', 'country', 'label'))
            if lang and caption.get('source'):
                subtitles[lang] = [{
                    'ext': 'vtt',
                    'url': caption['source']}]

        return {
            'id': video_id,
            'title': title,
            'creator': creator,
            'thumbnail': thumbnail,
            'view_count': view_count,
            'formats': formats,
            'subtitles': subtitles,
        }
Commit	Line	Data
061f62da	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
061f62da	4	from .common import InfoExtractor
061f62da	5	from ..utils import (
52f5889f S	6	dict_get,
	7	float_or_none,
	8	int_or_none,
061f62da	9	)
	10	from ..compat import compat_urllib_parse
	11
	12
	13	class VLiveIE(InfoExtractor):
	14	IE_NAME = 'vlive'
52f5889f	15	_VALID_URL = r'https?://(?:(?:www\|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
061f62da	16	_TEST = {
b8b465af	17	'url': 'http://www.vlive.tv/video/1326',
061f62da	18	'md5': 'cc7314812855ce56de70a06a27314983',
	19	'info_dict': {
	20	'id': '1326',
	21	'ext': 'mp4',
52f5889f S	22	'title': "[V] Girl's Day's Broadcast",
	23	'creator': "Girl's Day",
	24	'view_count': int,
061f62da	25	},
061f62da	26	}
061f62da	27
	28	def _real_extract(self, url):
	29	video_id = self._match_id(url)
	30
	31	webpage = self._download_webpage(
52f5889f	32	'http://www.vlive.tv/video/%s' % video_id, video_id)
061f62da	33
b8b465af	34	long_video_id = self._search_regex(
52f5889f S	35	r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s"[0-9]+"\s,\s"[^"]"\s,\s"([^"]+)"',
52f5889f S	36	webpage, 'long video id')
b8b465af EH	37
b8b465af EH	38	key = self._search_regex(
52f5889f S	39	r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s"[0-9]+"\s,\s"[^"]"\s,\s"[^"]+"\s,\s"([^"]+)"',
52f5889f S	40	webpage, 'key')
b8b465af	41
061f62da	42	title = self._og_search_title(webpage)
08354db4	43
52f5889f S	44	playinfo = self._download_json(
	45	'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
	46	% compat_urllib_parse.urlencode({
	47	'videoId': long_video_id,
	48	'key': key,
	49	'ptc': 'http',
	50	'doct': 'json', # document type (xml or json)
	51	'cpt': 'vtt', # captions type (vtt or ttml)
	52	}), video_id)
061f62da	53
52f5889f S	54	formats = [{
	55	'url': vid['source'],
	56	'format_id': vid.get('encodingOption', {}).get('name'),
	57	'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
	58	'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
	59	'width': int_or_none(vid.get('encodingOption', {}).get('width')),
	60	'height': int_or_none(vid.get('encodingOption', {}).get('height')),
	61	'filesize': int_or_none(vid.get('size')),
	62	} for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
061f62da	63	self._sort_formats(formats)
061f62da	64
52f5889f S	65	thumbnail = self._og_search_thumbnail(webpage)
	66	creator = self._html_search_regex(
	67	r'<div[^>]+class="info_area"[^>]>\s<strong[^>]+class="name"[^>]*>([^<]+)</strong>',
	68	webpage, 'creator', fatal=False)
	69
	70	view_count = int_or_none(playinfo.get('meta', {}).get('count'))
	71
061f62da	72	subtitles = {}
b8b465af	73	for caption in playinfo.get('captions', {}).get('list', []):
52f5889f S	74	lang = dict_get(caption, ('language', 'locale', 'country', 'label'))
	75	if lang and caption.get('source'):
	76	subtitles[lang] = [{
	77	'ext': 'vtt',
	78	'url': caption['source']}]
061f62da	79
	80	return {
	81	'id': video_id,
	82	'title': title,
	83	'creator': creator,
	84	'thumbnail': thumbnail,
52f5889f	85	'view_count': view_count,
061f62da	86	'formats': formats,
061f62da	87	'subtitles': subtitles,
061f62da	88	}