]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/huffpost.py
[extractor/common] Document uploader_url
[yt-dlp.git] / youtube_dl / extractor / huffpost.py
CommitLineData
db1f3888
PH
1from __future__ import unicode_literals
2
3import re
4
5from .common import InfoExtractor
6from ..utils import (
7 parse_duration,
8 unified_strdate,
9)
10
11
12class HuffPostIE(InfoExtractor):
13 IE_DESC = 'Huffington Post'
14 _VALID_URL = r'''(?x)
15 https?://(embed\.)?live\.huffingtonpost\.com/
16 (?:
17 r/segment/[^/]+/|
18 HPLEmbedPlayer/\?segmentId=
19 )
20 (?P<id>[0-9a-f]+)'''
21
22 _TEST = {
23 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
d16076ff 24 'md5': '55f5e8981c1c80a64706a44b74833de8',
db1f3888 25 'info_dict': {
2583a030
JMF
26 'id': '52dd3e4b02a7602131000677',
27 'ext': 'mp4',
d16076ff
PH
28 'title': 'Legalese It! with @MikeSacksHP',
29 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
db1f3888 30 'duration': 1549,
d16076ff 31 'upload_date': '20140124',
db1f3888
PH
32 }
33 }
34
35 def _real_extract(self, url):
4c1ce987 36 video_id = self._match_id(url)
db1f3888
PH
37
38 api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
39 data = self._download_json(api_url, video_id)['data']
40
41 video_title = data['title']
398133cf
S
42 duration = parse_duration(data.get('running_time'))
43 upload_date = unified_strdate(
44 data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
d16076ff 45 description = data.get('description')
db1f3888
PH
46
47 thumbnails = []
48 for url in data['images'].values():
49 m = re.match('.*-([0-9]+x[0-9]+)\.', url)
50 if not m:
51 continue
52 thumbnails.append({
53 'url': url,
54 'resolution': m.group(1),
55 })
56
57 formats = [{
58 'format': key,
59 'format_id': key.replace('/', '.'),
60 'ext': 'mp4',
61 'url': url,
62 'vcodec': 'none' if key.startswith('audio/') else None,
398133cf
S
63 } for key, url in data.get('sources', {}).get('live', {}).items()]
64
65 if not formats and data.get('fivemin_id'):
66 return self.url_result('5min:%s' % data['fivemin_id'])
67
db1f3888
PH
68 self._sort_formats(formats)
69
70 return {
71 'id': video_id,
72 'title': video_title,
d16076ff 73 'description': description,
db1f3888
PH
74 'formats': formats,
75 'duration': duration,
76 'upload_date': upload_date,
77 'thumbnails': thumbnails,
78 }