]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/huffpost.py
[nfl] Fix test case - download, but don't check md5
[yt-dlp.git] / youtube_dl / extractor / huffpost.py
CommitLineData
db1f3888
PH
1from __future__ import unicode_literals
2
3import re
4
5from .common import InfoExtractor
6from ..utils import (
7 parse_duration,
8 unified_strdate,
9)
10
11
12class HuffPostIE(InfoExtractor):
13 IE_DESC = 'Huffington Post'
14 _VALID_URL = r'''(?x)
15 https?://(embed\.)?live\.huffingtonpost\.com/
16 (?:
17 r/segment/[^/]+/|
18 HPLEmbedPlayer/\?segmentId=
19 )
20 (?P<id>[0-9a-f]+)'''
21
22 _TEST = {
23 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
d16076ff 24 'md5': '55f5e8981c1c80a64706a44b74833de8',
db1f3888 25 'info_dict': {
2583a030
JMF
26 'id': '52dd3e4b02a7602131000677',
27 'ext': 'mp4',
d16076ff
PH
28 'title': 'Legalese It! with @MikeSacksHP',
29 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
db1f3888 30 'duration': 1549,
d16076ff 31 'upload_date': '20140124',
db1f3888
PH
32 }
33 }
34
35 def _real_extract(self, url):
36 mobj = re.match(self._VALID_URL, url)
37 video_id = mobj.group('id')
38
39 api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
40 data = self._download_json(api_url, video_id)['data']
41
42 video_title = data['title']
43 duration = parse_duration(data['running_time'])
d16076ff
PH
44 upload_date = unified_strdate(data['schedule']['starts_at'])
45 description = data.get('description')
db1f3888
PH
46
47 thumbnails = []
48 for url in data['images'].values():
49 m = re.match('.*-([0-9]+x[0-9]+)\.', url)
50 if not m:
51 continue
52 thumbnails.append({
53 'url': url,
54 'resolution': m.group(1),
55 })
56
57 formats = [{
58 'format': key,
59 'format_id': key.replace('/', '.'),
60 'ext': 'mp4',
61 'url': url,
62 'vcodec': 'none' if key.startswith('audio/') else None,
63 } for key, url in data['sources']['live'].items()]
d16076ff
PH
64 if data.get('fivemin_id'):
65 fid = data['fivemin_id']
66 fcat = str(int(fid) // 100 + 1)
67 furl = 'http://avideos.5min.com/2/' + fcat[-3:] + '/' + fcat + '/' + fid + '.mp4'
68 formats.append({
69 'format': 'fivemin',
70 'url': furl,
71 'preference': 1,
72 })
db1f3888
PH
73 self._sort_formats(formats)
74
75 return {
76 'id': video_id,
77 'title': video_title,
d16076ff 78 'description': description,
db1f3888
PH
79 'formats': formats,
80 'duration': duration,
81 'upload_date': upload_date,
82 'thumbnails': thumbnails,
83 }