]>
Commit | Line | Data |
---|---|---|
71a1db89 S |
1 | import re |
2 | ||
758a0592 | 3 | from .common import InfoExtractor |
1c45b7a8 | 4 | from ..compat import compat_str |
758a0592 | 5 | from ..utils import ( |
6 | int_or_none, | |
7 | determine_protocol, | |
1c45b7a8 | 8 | try_get, |
111de002 | 9 | unescapeHTML, |
758a0592 | 10 | ) |
11 | ||
12 | ||
13 | class DailyMailIE(InfoExtractor): | |
71a1db89 S |
14 | _VALID_URL = r'https?://(?:www\.)?dailymail\.co\.uk/(?:video/[^/]+/video-|embed/video/)(?P<id>[0-9]+)' |
15 | _TESTS = [{ | |
111de002 DR |
16 | 'url': 'http://www.dailymail.co.uk/video/tvshowbiz/video-1295863/The-Mountain-appears-sparkling-water-ad-Heavy-Bubbles.html', |
17 | 'md5': 'f6129624562251f628296c3a9ffde124', | |
758a0592 | 18 | 'info_dict': { |
111de002 | 19 | 'id': '1295863', |
758a0592 | 20 | 'ext': 'mp4', |
111de002 DR |
21 | 'title': 'The Mountain appears in sparkling water ad for \'Heavy Bubbles\'', |
22 | 'description': 'md5:a93d74b6da172dd5dc4d973e0b766a84', | |
758a0592 | 23 | } |
71a1db89 S |
24 | }, { |
25 | 'url': 'http://www.dailymail.co.uk/embed/video/1295863.html', | |
26 | 'only_matching': True, | |
27 | }] | |
28 | ||
29 | @staticmethod | |
30 | def _extract_urls(webpage): | |
31 | return re.findall( | |
32 | r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//(?:www\.)?dailymail\.co\.uk/embed/video/\d+\.html)', | |
33 | webpage) | |
758a0592 | 34 | |
35 | def _real_extract(self, url): | |
36 | video_id = self._match_id(url) | |
37 | webpage = self._download_webpage(url, video_id) | |
38 | video_data = self._parse_json(self._search_regex( | |
39 | r"data-opts='({.+?})'", webpage, 'video data'), video_id) | |
111de002 | 40 | title = unescapeHTML(video_data['title']) |
1c45b7a8 S |
41 | |
42 | sources_url = (try_get( | |
43 | video_data, | |
44 | (lambda x: x['plugins']['sources']['url'], | |
3089bc74 S |
45 | lambda x: x['sources']['url']), compat_str) |
46 | or 'http://www.dailymail.co.uk/api/player/%s/video-sources.json' % video_id) | |
1c45b7a8 S |
47 | |
48 | video_sources = self._download_json(sources_url, video_id) | |
5e733b06 RA |
49 | body = video_sources.get('body') |
50 | if body: | |
51 | video_sources = body | |
758a0592 | 52 | |
53 | formats = [] | |
54 | for rendition in video_sources['renditions']: | |
55 | rendition_url = rendition.get('url') | |
56 | if not rendition_url: | |
57 | continue | |
58 | tbr = int_or_none(rendition.get('encodingRate'), 1000) | |
59 | container = rendition.get('videoContainer') | |
60 | is_hls = container == 'M2TS' | |
61 | protocol = 'm3u8_native' if is_hls else determine_protocol({'url': rendition_url}) | |
62 | formats.append({ | |
63 | 'format_id': ('hls' if is_hls else protocol) + ('-%d' % tbr if tbr else ''), | |
64 | 'url': rendition_url, | |
65 | 'width': int_or_none(rendition.get('frameWidth')), | |
66 | 'height': int_or_none(rendition.get('frameHeight')), | |
67 | 'tbr': tbr, | |
68 | 'vcodec': rendition.get('videoCodec'), | |
69 | 'container': container, | |
70 | 'protocol': protocol, | |
71 | 'ext': 'mp4' if is_hls else None, | |
72 | }) | |
73 | self._sort_formats(formats) | |
74 | ||
75 | return { | |
76 | 'id': video_id, | |
77 | 'title': title, | |
111de002 | 78 | 'description': unescapeHTML(video_data.get('descr')), |
758a0592 | 79 | 'thumbnail': video_data.get('poster') or video_data.get('thumbnail'), |
80 | 'formats': formats, | |
81 | } |