[yt-dlp.git] / youtube_dl / extractor / udn.py

# coding: utf-8
from __future__ import unicode_literals

import json
from .common import InfoExtractor
from ..utils import js_to_json
from ..compat import compat_urlparse


class UDNEmbedIE(InfoExtractor):
    _VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://video.udn.com/embed/news/300040',
        'md5': 'de06b4c90b042c128395a88f0384817e',
        'info_dict': {
            'id': '300040',
            'ext': 'mp4',
            'title': '生物老師男變女 全校挺"做自己"',
            'thumbnail': 're:^https?://.*\.jpg$',
        }
    }, {
        'url': '//video.udn.com/embed/news/300040',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        page = self._download_webpage(url, video_id)

        options = json.loads(js_to_json(self._html_search_regex(
            r'var options\s*=\s*([^;]+);', page, 'video urls dictionary')))

        video_urls = options['video']

        if video_urls.get('youtube'):
            return self.url_result(video_urls.get('youtube'), 'Youtube')

        try:
            del video_urls['youtube']
        except KeyError:
            pass

        formats = [{
            'url': self._download_webpage(
                compat_urlparse.urljoin(url, api_url), video_id,
                'retrieve url for %s video' % video_type),
            'format_id': video_type,
            'preference': 0 if video_type == 'mp4' else -1,
        } for video_type, api_url in video_urls.items()]

        self._sort_formats(formats)

        thumbnail = None

        if options.get('gallery') and len(options['gallery']):
            thumbnail = options['gallery'][0].get('original')

        return {
            'id': video_id,
            'formats': formats,
            'title': options['title'],
            'thumbnail': thumbnail
        }
Commit	Line	Data
418c5cc3 YCH	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import json
	5	from .common import InfoExtractor
0a160363 YCH	6	from ..utils import js_to_json
0a160363 YCH	7	from ..compat import compat_urlparse
418c5cc3 YCH	8
	9
	10	class UDNEmbedIE(InfoExtractor):
	11	_VALID_URL = r'(?:https?:)?//video\.udn\.com/embed/news/(?P<id>\d+)'
	12	_TESTS = [{
	13	'url': 'http://video.udn.com/embed/news/300040',
	14	'md5': 'de06b4c90b042c128395a88f0384817e',
	15	'info_dict': {
	16	'id': '300040',
	17	'ext': 'mp4',
	18	'title': '生物老師男變女全校挺"做自己"',
	19	'thumbnail': 're:^https?://.*\.jpg$',
	20	}
	21	}, {
	22	'url': '//video.udn.com/embed/news/300040',
	23	'only_matching': True,
	24	}]
	25
	26	def _real_extract(self, url):
	27	video_id = self._match_id(url)
	28
	29	page = self._download_webpage(url, video_id)
	30
	31	options = json.loads(js_to_json(self._html_search_regex(
	32	r'var options\s=\s([^;]+);', page, 'video urls dictionary')))
	33
	34	video_urls = options['video']
	35
	36	if video_urls.get('youtube'):
	37	return self.url_result(video_urls.get('youtube'), 'Youtube')
	38
	39	try:
	40	del video_urls['youtube']
	41	except KeyError:
	42	pass
	43
	44	formats = [{
	45	'url': self._download_webpage(
0a160363	46	compat_urlparse.urljoin(url, api_url), video_id,
418c5cc3 YCH	47	'retrieve url for %s video' % video_type),
	48	'format_id': video_type,
	49	'preference': 0 if video_type == 'mp4' else -1,
	50	} for video_type, api_url in video_urls.items()]
	51
	52	self._sort_formats(formats)
	53
	54	thumbnail = None
	55
	56	if options.get('gallery') and len(options['gallery']):
	57	thumbnail = options['gallery'][0].get('original')
	58
	59	return {
	60	'id': video_id,
	61	'formats': formats,
	62	'title': options['title'],
	63	'thumbnail': thumbnail
	64	}