]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/huffpost.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / huffpost.py
CommitLineData
db1f3888
PH
1import re
2
3from .common import InfoExtractor
4from ..utils import (
f0ec61b5 5 determine_ext,
db1f3888
PH
6 parse_duration,
7 unified_strdate,
8)
9
10
11class HuffPostIE(InfoExtractor):
12 IE_DESC = 'Huffington Post'
13 _VALID_URL = r'''(?x)
14 https?://(embed\.)?live\.huffingtonpost\.com/
15 (?:
16 r/segment/[^/]+/|
17 HPLEmbedPlayer/\?segmentId=
18 )
19 (?P<id>[0-9a-f]+)'''
bfd973ec 20 _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>https?://embed\.live\.huffingtonpost\.com/.+?)\1']
db1f3888
PH
21
22 _TEST = {
23 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
d16076ff 24 'md5': '55f5e8981c1c80a64706a44b74833de8',
db1f3888 25 'info_dict': {
2583a030
JMF
26 'id': '52dd3e4b02a7602131000677',
27 'ext': 'mp4',
d16076ff
PH
28 'title': 'Legalese It! with @MikeSacksHP',
29 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
db1f3888 30 'duration': 1549,
d16076ff 31 'upload_date': '20140124',
f0ec61b5
YCH
32 },
33 'params': {
34 # m3u8 download
35 'skip_download': True,
36 },
37 'expected_warnings': ['HTTP Error 404: Not Found'],
db1f3888
PH
38 }
39
40 def _real_extract(self, url):
4c1ce987 41 video_id = self._match_id(url)
db1f3888 42
add96eb9 43 api_url = f'http://embed.live.huffingtonpost.com/api/segments/{video_id}.json'
db1f3888
PH
44 data = self._download_json(api_url, video_id)['data']
45
46 video_title = data['title']
398133cf
S
47 duration = parse_duration(data.get('running_time'))
48 upload_date = unified_strdate(
49 data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
d16076ff 50 description = data.get('description')
db1f3888
PH
51
52 thumbnails = []
f0ec61b5 53 for url in filter(None, data['images'].values()):
ec85ded8 54 m = re.match(r'.*-([0-9]+x[0-9]+)\.', url)
db1f3888
PH
55 if not m:
56 continue
57 thumbnails.append({
58 'url': url,
59 'resolution': m.group(1),
60 })
61
f0ec61b5
YCH
62 formats = []
63 sources = data.get('sources', {})
64 live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items())
65 for key, url in live_sources:
66 ext = determine_ext(url)
67 if ext == 'm3u8':
68 formats.extend(self._extract_m3u8_formats(
69 url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
70 elif ext == 'f4m':
9154c87f 71 formats.extend(self._extract_f4m_formats(
f0ec61b5
YCH
72 url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False))
73 else:
74 formats.append({
75 'format': key,
76 'format_id': key.replace('/', '.'),
77 'ext': 'mp4',
78 'url': url,
79 'vcodec': 'none' if key.startswith('audio/') else None,
80 })
398133cf 81
db1f3888
PH
82 return {
83 'id': video_id,
84 'title': video_title,
d16076ff 85 'description': description,
db1f3888
PH
86 'formats': formats,
87 'duration': duration,
88 'upload_date': upload_date,
89 'thumbnails': thumbnails,
90 }