]>
Commit | Line | Data |
---|---|---|
418c5cc3 YCH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import json | |
5 | from .common import InfoExtractor | |
d0eb724e YCH |
6 | from ..utils import ( |
7 | js_to_json, | |
8 | ExtractorError, | |
9 | ) | |
0a160363 | 10 | from ..compat import compat_urlparse |
418c5cc3 YCH |
11 | |
12 | ||
13 | class UDNEmbedIE(InfoExtractor): | |
9b15be97 | 14 | IE_DESC = '聯合影音' |
d0eb724e | 15 | _VALID_URL = r'https?://video\.udn\.com/(?:embed|play)/news/(?P<id>\d+)' |
418c5cc3 YCH |
16 | _TESTS = [{ |
17 | 'url': 'http://video.udn.com/embed/news/300040', | |
18 | 'md5': 'de06b4c90b042c128395a88f0384817e', | |
19 | 'info_dict': { | |
20 | 'id': '300040', | |
21 | 'ext': 'mp4', | |
22 | 'title': '生物老師男變女 全校挺"做自己"', | |
23 | 'thumbnail': 're:^https?://.*\.jpg$', | |
24 | } | |
25 | }, { | |
d0eb724e YCH |
26 | 'url': 'https://video.udn.com/embed/news/300040', |
27 | 'only_matching': True, | |
28 | }, { | |
29 | # From https://video.udn.com/news/303776 | |
30 | 'url': 'https://video.udn.com/play/news/303776', | |
418c5cc3 YCH |
31 | 'only_matching': True, |
32 | }] | |
33 | ||
34 | def _real_extract(self, url): | |
35 | video_id = self._match_id(url) | |
36 | ||
37 | page = self._download_webpage(url, video_id) | |
38 | ||
39 | options = json.loads(js_to_json(self._html_search_regex( | |
40 | r'var options\s*=\s*([^;]+);', page, 'video urls dictionary'))) | |
41 | ||
42 | video_urls = options['video'] | |
43 | ||
44 | if video_urls.get('youtube'): | |
45 | return self.url_result(video_urls.get('youtube'), 'Youtube') | |
46 | ||
47 | try: | |
48 | del video_urls['youtube'] | |
49 | except KeyError: | |
50 | pass | |
51 | ||
52 | formats = [{ | |
53 | 'url': self._download_webpage( | |
0a160363 | 54 | compat_urlparse.urljoin(url, api_url), video_id, |
418c5cc3 YCH |
55 | 'retrieve url for %s video' % video_type), |
56 | 'format_id': video_type, | |
57 | 'preference': 0 if video_type == 'mp4' else -1, | |
d0eb724e YCH |
58 | } for video_type, api_url in video_urls.items() if api_url] |
59 | ||
60 | if not formats: | |
61 | raise ExtractorError('No videos found', expected=True) | |
418c5cc3 YCH |
62 | |
63 | self._sort_formats(formats) | |
64 | ||
65 | thumbnail = None | |
66 | ||
67 | if options.get('gallery') and len(options['gallery']): | |
68 | thumbnail = options['gallery'][0].get('original') | |
69 | ||
70 | return { | |
71 | 'id': video_id, | |
72 | 'formats': formats, | |
73 | 'title': options['title'], | |
74 | 'thumbnail': thumbnail | |
75 | } |