]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/huffpost.py
[ign] Modernize
[yt-dlp.git] / youtube_dl / extractor / huffpost.py
CommitLineData
db1f3888
PH
1from __future__ import unicode_literals
2
3import re
4
5from .common import InfoExtractor
6from ..utils import (
7 parse_duration,
8 unified_strdate,
9)
10
11
12class HuffPostIE(InfoExtractor):
13 IE_DESC = 'Huffington Post'
14 _VALID_URL = r'''(?x)
15 https?://(embed\.)?live\.huffingtonpost\.com/
16 (?:
17 r/segment/[^/]+/|
18 HPLEmbedPlayer/\?segmentId=
19 )
20 (?P<id>[0-9a-f]+)'''
21
22 _TEST = {
23 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
24 'file': '52dd3e4b02a7602131000677.mp4',
d16076ff 25 'md5': '55f5e8981c1c80a64706a44b74833de8',
db1f3888 26 'info_dict': {
d16076ff
PH
27 'title': 'Legalese It! with @MikeSacksHP',
28 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
db1f3888 29 'duration': 1549,
d16076ff 30 'upload_date': '20140124',
db1f3888
PH
31 }
32 }
33
34 def _real_extract(self, url):
35 mobj = re.match(self._VALID_URL, url)
36 video_id = mobj.group('id')
37
38 api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
39 data = self._download_json(api_url, video_id)['data']
40
41 video_title = data['title']
42 duration = parse_duration(data['running_time'])
d16076ff
PH
43 upload_date = unified_strdate(data['schedule']['starts_at'])
44 description = data.get('description')
db1f3888
PH
45
46 thumbnails = []
47 for url in data['images'].values():
48 m = re.match('.*-([0-9]+x[0-9]+)\.', url)
49 if not m:
50 continue
51 thumbnails.append({
52 'url': url,
53 'resolution': m.group(1),
54 })
55
56 formats = [{
57 'format': key,
58 'format_id': key.replace('/', '.'),
59 'ext': 'mp4',
60 'url': url,
61 'vcodec': 'none' if key.startswith('audio/') else None,
62 } for key, url in data['sources']['live'].items()]
d16076ff
PH
63 if data.get('fivemin_id'):
64 fid = data['fivemin_id']
65 fcat = str(int(fid) // 100 + 1)
66 furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
67 formats.append({
68 'format': 'fivemin',
69 'url': furl,
70 'preference': 1,
71 })
db1f3888
PH
72 self._sort_formats(formats)
73
74 return {
75 'id': video_id,
76 'title': video_title,
d16076ff 77 'description': description,
db1f3888
PH
78 'formats': formats,
79 'duration': duration,
80 'upload_date': upload_date,
81 'thumbnails': thumbnails,
82 }