]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/huffpost.py
[cleanup] Upgrade syntax
[yt-dlp.git] / yt_dlp / extractor / huffpost.py
CommitLineData
db1f3888
PH
1import re
2
3from .common import InfoExtractor
4from ..utils import (
f0ec61b5 5 determine_ext,
db1f3888
PH
6 parse_duration,
7 unified_strdate,
8)
9
10
11class HuffPostIE(InfoExtractor):
12 IE_DESC = 'Huffington Post'
13 _VALID_URL = r'''(?x)
14 https?://(embed\.)?live\.huffingtonpost\.com/
15 (?:
16 r/segment/[^/]+/|
17 HPLEmbedPlayer/\?segmentId=
18 )
19 (?P<id>[0-9a-f]+)'''
20
21 _TEST = {
22 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
d16076ff 23 'md5': '55f5e8981c1c80a64706a44b74833de8',
db1f3888 24 'info_dict': {
2583a030
JMF
25 'id': '52dd3e4b02a7602131000677',
26 'ext': 'mp4',
d16076ff
PH
27 'title': 'Legalese It! with @MikeSacksHP',
28 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
db1f3888 29 'duration': 1549,
d16076ff 30 'upload_date': '20140124',
f0ec61b5
YCH
31 },
32 'params': {
33 # m3u8 download
34 'skip_download': True,
35 },
36 'expected_warnings': ['HTTP Error 404: Not Found'],
db1f3888
PH
37 }
38
39 def _real_extract(self, url):
4c1ce987 40 video_id = self._match_id(url)
db1f3888
PH
41
42 api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
43 data = self._download_json(api_url, video_id)['data']
44
45 video_title = data['title']
398133cf
S
46 duration = parse_duration(data.get('running_time'))
47 upload_date = unified_strdate(
48 data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
d16076ff 49 description = data.get('description')
db1f3888
PH
50
51 thumbnails = []
f0ec61b5 52 for url in filter(None, data['images'].values()):
ec85ded8 53 m = re.match(r'.*-([0-9]+x[0-9]+)\.', url)
db1f3888
PH
54 if not m:
55 continue
56 thumbnails.append({
57 'url': url,
58 'resolution': m.group(1),
59 })
60
f0ec61b5
YCH
61 formats = []
62 sources = data.get('sources', {})
63 live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items())
64 for key, url in live_sources:
65 ext = determine_ext(url)
66 if ext == 'm3u8':
67 formats.extend(self._extract_m3u8_formats(
68 url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
69 elif ext == 'f4m':
9154c87f 70 formats.extend(self._extract_f4m_formats(
f0ec61b5
YCH
71 url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False))
72 else:
73 formats.append({
74 'format': key,
75 'format_id': key.replace('/', '.'),
76 'ext': 'mp4',
77 'url': url,
78 'vcodec': 'none' if key.startswith('audio/') else None,
79 })
398133cf 80
db1f3888
PH
81 self._sort_formats(formats)
82
83 return {
84 'id': video_id,
85 'title': video_title,
d16076ff 86 'description': description,
db1f3888
PH
87 'formats': formats,
88 'duration': duration,
89 'upload_date': upload_date,
90 'thumbnails': thumbnails,
91 }