]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/huffpost.py
Completely change project name to yt-dlp (#85)
[yt-dlp.git] / yt_dlp / extractor / huffpost.py
CommitLineData
db1f3888
PH
1from __future__ import unicode_literals
2
3import re
4
5from .common import InfoExtractor
6from ..utils import (
f0ec61b5 7 determine_ext,
db1f3888
PH
8 parse_duration,
9 unified_strdate,
10)
11
12
13class HuffPostIE(InfoExtractor):
14 IE_DESC = 'Huffington Post'
15 _VALID_URL = r'''(?x)
16 https?://(embed\.)?live\.huffingtonpost\.com/
17 (?:
18 r/segment/[^/]+/|
19 HPLEmbedPlayer/\?segmentId=
20 )
21 (?P<id>[0-9a-f]+)'''
22
23 _TEST = {
24 'url': 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677',
d16076ff 25 'md5': '55f5e8981c1c80a64706a44b74833de8',
db1f3888 26 'info_dict': {
2583a030
JMF
27 'id': '52dd3e4b02a7602131000677',
28 'ext': 'mp4',
d16076ff
PH
29 'title': 'Legalese It! with @MikeSacksHP',
30 'description': 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG\'s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC\'s delay of Kenya\'s President and more. ',
db1f3888 31 'duration': 1549,
d16076ff 32 'upload_date': '20140124',
f0ec61b5
YCH
33 },
34 'params': {
35 # m3u8 download
36 'skip_download': True,
37 },
38 'expected_warnings': ['HTTP Error 404: Not Found'],
db1f3888
PH
39 }
40
41 def _real_extract(self, url):
4c1ce987 42 video_id = self._match_id(url)
db1f3888
PH
43
44 api_url = 'http://embed.live.huffingtonpost.com/api/segments/%s.json' % video_id
45 data = self._download_json(api_url, video_id)['data']
46
47 video_title = data['title']
398133cf
S
48 duration = parse_duration(data.get('running_time'))
49 upload_date = unified_strdate(
50 data.get('schedule', {}).get('starts_at') or data.get('segment_start_date_time'))
d16076ff 51 description = data.get('description')
db1f3888
PH
52
53 thumbnails = []
f0ec61b5 54 for url in filter(None, data['images'].values()):
ec85ded8 55 m = re.match(r'.*-([0-9]+x[0-9]+)\.', url)
db1f3888
PH
56 if not m:
57 continue
58 thumbnails.append({
59 'url': url,
60 'resolution': m.group(1),
61 })
62
f0ec61b5
YCH
63 formats = []
64 sources = data.get('sources', {})
65 live_sources = list(sources.get('live', {}).items()) + list(sources.get('live_again', {}).items())
66 for key, url in live_sources:
67 ext = determine_ext(url)
68 if ext == 'm3u8':
69 formats.extend(self._extract_m3u8_formats(
70 url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
71 elif ext == 'f4m':
9154c87f 72 formats.extend(self._extract_f4m_formats(
f0ec61b5
YCH
73 url + '?hdcore=2.9.5', video_id, f4m_id='hds', fatal=False))
74 else:
75 formats.append({
76 'format': key,
77 'format_id': key.replace('/', '.'),
78 'ext': 'mp4',
79 'url': url,
80 'vcodec': 'none' if key.startswith('audio/') else None,
81 })
398133cf
S
82
83 if not formats and data.get('fivemin_id'):
84 return self.url_result('5min:%s' % data['fivemin_id'])
85
db1f3888
PH
86 self._sort_formats(formats)
87
88 return {
89 'id': video_id,
90 'title': video_title,
d16076ff 91 'description': description,
db1f3888
PH
92 'formats': formats,
93 'duration': duration,
94 'upload_date': upload_date,
95 'thumbnails': thumbnails,
96 }