]>
Commit | Line | Data |
---|---|---|
db1f3888 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | parse_duration, | |
8 | unified_strdate, | |
9 | ) | |
10 | ||
11 | ||
12 | class HuffPostIE(InfoExtractor): | |
13 | IE_DESC = 'Huffington Post' | |
14 | _VALID_URL = r'''(?x) | |
15 | https?://(embed\.)?live\.huffingtonpost\.com/ | |
16 | (?: | |
17 | r/segment/[^/]+/| | |
18 | HPLEmbedPlayer/\?segmentId= | |
19 | ) | |
20 | (?P<id>[0-9a-f]+)''' | |
21 | ||
22 | _TEST = { | |
23 | 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677', | |
24 | 'file': '52dd3e4b02a7602131000677.mp4', | |
25 | 'md5': 'TODO', | |
26 | 'info_dict': { | |
27 | 'title': 'TODO', | |
28 | 'description': 'TODO', | |
29 | 'duration': 1549, | |
30 | } | |
31 | } | |
32 | ||
33 | def _real_extract(self, url): | |
34 | mobj = re.match(self._VALID_URL, url) | |
35 | video_id = mobj.group('id') | |
36 | ||
37 | api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id | |
38 | data = self._download_json(api_url, video_id)['data'] | |
39 | ||
40 | video_title = data['title'] | |
41 | duration = parse_duration(data['running_time']) | |
42 | upload_date = unified_strdate(data['schedule']['started_at']) | |
43 | ||
44 | thumbnails = [] | |
45 | for url in data['images'].values(): | |
46 | m = re.match('.*-([0-9]+x[0-9]+)\.', url) | |
47 | if not m: | |
48 | continue | |
49 | thumbnails.append({ | |
50 | 'url': url, | |
51 | 'resolution': m.group(1), | |
52 | }) | |
53 | ||
54 | formats = [{ | |
55 | 'format': key, | |
56 | 'format_id': key.replace('/', '.'), | |
57 | 'ext': 'mp4', | |
58 | 'url': url, | |
59 | 'vcodec': 'none' if key.startswith('audio/') else None, | |
60 | } for key, url in data['sources']['live'].items()] | |
61 | self._sort_formats(formats) | |
62 | ||
63 | return { | |
64 | 'id': video_id, | |
65 | 'title': video_title, | |
66 | 'formats': formats, | |
67 | 'duration': duration, | |
68 | 'upload_date': upload_date, | |
69 | 'thumbnails': thumbnails, | |
70 | } |