]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/aenetworks.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / aenetworks.py
CommitLineData
05c7feec 1from .theplatform import ThePlatformIE
d8873d4d 2from ..utils import (
4f1e02ad 3 ExtractorError,
29f7c58a 4 GeoRestrictedError,
4f1e02ad 5 int_or_none,
4823ec9f 6 remove_start,
7 traverse_obj,
d8873d4d 8 update_url_query,
29f7c58a 9 urlencode_postdata,
42362fdb
RA
10)
11
12
6368e2e6 13class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
29f7c58a 14 _BASE_URL_REGEX = r'''(?x)https?://
15 (?:(?:www|play|watch)\.)?
16 (?P<domain>
17 (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
18 fyi\.tv
19 )/'''
3b34e388
W
20 _THEPLATFORM_KEY = '43jXaGRQud'
21 _THEPLATFORM_SECRET = 'S10BPXHMlb'
29f7c58a 22 _DOMAIN_MAP = {
23 'history.com': ('HISTORY', 'history'),
24 'aetv.com': ('AETV', 'aetv'),
25 'mylifetime.com': ('LIFETIME', 'lifetime'),
26 'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
27 'fyi.tv': ('FYI', 'fyi'),
28 'historyvault.com': (None, 'historyvault'),
29 'biography.com': (None, 'biography'),
30 }
b9c7a973 31
4f1e02ad 32 def _extract_aen_smil(self, smil_url, video_id, auth=None):
1cddfdc5
JT
33 query = {
34 'mbr': 'true',
35 'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
36 }
4f1e02ad
RA
37 if auth:
38 query['auth'] = auth
39 TP_SMIL_QUERY = [{
40 'assetTypes': 'high_video_ak',
1cddfdc5 41 'switch': 'hls_high_ak',
4f1e02ad 42 }, {
1cddfdc5 43 'assetTypes': 'high_video_s3',
4f1e02ad
RA
44 }, {
45 'assetTypes': 'high_video_s3',
29f7c58a 46 'switch': 'hls_high_fastly',
4f1e02ad
RA
47 }]
48 formats = []
49 subtitles = {}
50 last_e = None
51 for q in TP_SMIL_QUERY:
52 q.update(query)
53 m_url = update_url_query(smil_url, q)
54 m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
55 try:
56 tp_formats, tp_subtitles = self._extract_theplatform_smil(
57 m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
58 except ExtractorError as e:
29f7c58a 59 if isinstance(e, GeoRestrictedError):
60 raise
4f1e02ad
RA
61 last_e = e
62 continue
63 formats.extend(tp_formats)
64 subtitles = self._merge_subtitles(subtitles, tp_subtitles)
65 if last_e and not formats:
66 raise last_e
4f1e02ad
RA
67 return {
68 'id': video_id,
69 'formats': formats,
70 'subtitles': subtitles,
71 }
72
29f7c58a 73 def _extract_aetn_info(self, domain, filter_key, filter_value, url):
74 requestor_id, brand = self._DOMAIN_MAP[domain]
75 result = self._download_json(
76 'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
4823ec9f 77 filter_value, query={'filter[%s]' % filter_key: filter_value})
78 result = traverse_obj(
79 result, ('results',
80 lambda k, v: k == 0 and v[filter_key] == filter_value),
81 get_all=False)
82 if not result:
83 raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
84 video_id=remove_start(filter_value, '/'))
29f7c58a 85 title = result['title']
86 video_id = result['id']
87 media_url = result['publicUrl']
88 theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
89 r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
90 info = self._parse_theplatform_metadata(theplatform_metadata)
91 auth = None
92 if theplatform_metadata.get('AETN$isBehindWall'):
93 resource = self._get_mvpd_resource(
94 requestor_id, theplatform_metadata['title'],
95 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
014cb577 96 traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
29f7c58a 97 auth = self._extract_mvpd_auth(
98 url, video_id, requestor_id, resource)
99 info.update(self._extract_aen_smil(media_url, video_id, auth))
100 info.update({
101 'title': title,
102 'series': result.get('seriesName'),
103 'season_number': int_or_none(result.get('tvSeasonNumber')),
104 'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
105 })
106 return info
107
b9c7a973 108
42362fdb 109class AENetworksIE(AENetworksBaseIE):
855f90fa 110 IE_NAME = 'aenetworks'
3ad6dabd 111 IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
29f7c58a 112 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
113 shows/[^/]+/season-\d+/episode-\d+|
114 (?:
115 (?:movie|special)s/[^/]+|
116 (?:shows/[^/]+/)?videos
117 )/[^/?#&]+
118 )'''
b9c7a973 119 _TESTS = [{
52767c1b 120 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
121 'info_dict': {
42362fdb 122 'id': '22253814',
52767c1b 123 'ext': 'mp4',
7e09c147 124 'title': 'Winter Is Coming',
125 'description': 'md5:a40e370925074260b1c8a633c632c63a',
79ba9140 126 'timestamp': 1338306241,
127 'upload_date': '20120529',
128 'uploader': 'AENE-NEW',
7e09c147 129 'duration': 2592.0,
130 'thumbnail': r're:^https?://.*\.jpe?g$',
131 'chapters': 'count:5',
132 'tags': 'count:14',
133 'categories': ['Mountain Men'],
134 'episode_number': 1,
135 'episode': 'Episode 1',
136 'season': 'Season 1',
137 'season_number': 1,
138 'series': 'Mountain Men',
52767c1b 139 },
4f1e02ad
RA
140 'params': {
141 # m3u8 download
142 'skip_download': True,
143 },
b9c7a973 144 'add_ie': ['ThePlatform'],
4823ec9f 145 'skip': 'Geo-restricted - This content is not available in your location.'
587dfd44 146 }, {
29f7c58a 147 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
42362fdb 148 'info_dict': {
29f7c58a 149 'id': '600587331957',
150 'ext': 'mp4',
151 'title': 'Inlawful Entry',
152 'description': 'md5:57c12115a2b384d883fe64ca50529e08',
153 'timestamp': 1452634428,
154 'upload_date': '20160112',
155 'uploader': 'AENE-NEW',
7e09c147 156 'duration': 1277.695,
157 'thumbnail': r're:^https?://.*\.jpe?g$',
158 'chapters': 'count:4',
159 'tags': 'count:23',
160 'episode': 'Episode 1',
161 'episode_number': 1,
162 'season': 'Season 9',
163 'season_number': 9,
164 'series': 'Duck Dynasty',
42362fdb 165 },
29f7c58a 166 'params': {
167 # m3u8 download
168 'skip_download': True,
42362fdb 169 },
29f7c58a 170 'add_ie': ['ThePlatform'],
4823ec9f 171 'skip': 'This video is only available for users of participating TV providers.',
587dfd44 172 }, {
42362fdb 173 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
587dfd44 174 'only_matching': True
175 }, {
42362fdb 176 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
587dfd44 177 'only_matching': True
70157c2c
RA
178 }, {
179 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
180 'only_matching': True
459818e2 181 }, {
29f7c58a 182 'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
459818e2 183 'only_matching': True
95728fda
S
184 }, {
185 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
186 'only_matching': True
3ad6dabd 187 }, {
29f7c58a 188 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
189 'only_matching': True
190 }, {
191 'url': 'http://www.history.com/videos/history-of-valentines-day',
3ad6dabd 192 'only_matching': True
4f1e02ad 193 }, {
29f7c58a 194 'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
4f1e02ad 195 'only_matching': True
b9c7a973
S
196 }]
197
198 def _real_extract(self, url):
5ad28e7f 199 domain, canonical = self._match_valid_url(url).groups()
29f7c58a 200 return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
201
202
203class AENetworksListBaseIE(AENetworksBaseIE):
204 def _call_api(self, resource, slug, brand, fields):
205 return self._download_json(
206 'https://yoga.appsvcs.aetnd.com/graphql',
207 slug, query={'brand': brand}, data=urlencode_postdata({
208 'query': '''{
209 %s(slug: "%s") {
210 %s
211 }
212}''' % (resource, slug, fields),
213 }))['data'][resource]
214
215 def _real_extract(self, url):
5ad28e7f 216 domain, slug = self._match_valid_url(url).groups()
29f7c58a 217 _, brand = self._DOMAIN_MAP[domain]
218 playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
219 base_url = 'http://watch.%s' % domain
220
221 entries = []
222 for item in (playlist.get(self._ITEMS_KEY) or []):
223 doc = self._get_doc(item)
224 canonical = doc.get('canonical')
225 if not canonical:
226 continue
227 entries.append(self.url_result(
228 base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
229
230 description = None
231 if self._PLAYLIST_DESCRIPTION_KEY:
232 description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
233
234 return self.playlist_result(
235 entries, playlist.get('id'),
236 playlist.get(self._PLAYLIST_TITLE_KEY), description)
237
238
239class AENetworksCollectionIE(AENetworksListBaseIE):
240 IE_NAME = 'aenetworks:collection'
241 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
242 _TESTS = [{
243 'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
244 'info_dict': {
245 'id': '282',
246 'title': 'America The Story of Us',
247 },
248 'playlist_mincount': 12,
249 }, {
250 'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
251 'only_matching': True
252 }, {
253 'url': 'https://www.historyvault.com/collections/mysteryquest',
254 'only_matching': True
255 }]
256 _RESOURCE = 'list'
257 _ITEMS_KEY = 'items'
258 _PLAYLIST_TITLE_KEY = 'display_title'
259 _PLAYLIST_DESCRIPTION_KEY = None
260 _FIELDS = '''id
261 display_title
262 items {
263 ... on ListVideoItem {
264 doc {
265 canonical
266 id
267 }
268 }
269 }'''
270
271 def _get_doc(self, item):
272 return item.get('doc') or {}
273
274
275class AENetworksShowIE(AENetworksListBaseIE):
276 IE_NAME = 'aenetworks:show'
277 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
278 _TESTS = [{
279 'url': 'http://www.history.com/shows/ancient-aliens',
280 'info_dict': {
2181983a 281 'id': 'SERIES1574',
29f7c58a 282 'title': 'Ancient Aliens',
283 'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
284 },
a820dc72 285 'playlist_mincount': 150,
29f7c58a 286 }]
287 _RESOURCE = 'series'
288 _ITEMS_KEY = 'episodes'
289 _PLAYLIST_TITLE_KEY = 'title'
290 _PLAYLIST_DESCRIPTION_KEY = 'description'
291 _FIELDS = '''description
292 id
293 title
294 episodes {
295 canonical
296 id
297 }'''
298
299 def _get_doc(self, item):
300 return item
b9c7a973 301
b9c7a973 302
42362fdb
RA
303class HistoryTopicIE(AENetworksBaseIE):
304 IE_NAME = 'history:topic'
305 IE_DESC = 'History.com Topic'
4f1e02ad 306 _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
42362fdb 307 _TESTS = [{
4f1e02ad 308 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
42362fdb
RA
309 'info_dict': {
310 'id': '40700995724',
311 'ext': 'mp4',
4f1e02ad 312 'title': "History of Valentine’s Day",
42362fdb
RA
313 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
314 'timestamp': 1375819729,
315 'upload_date': '20130806',
29f7c58a 316 'uploader': 'AENE-NEW',
42362fdb
RA
317 },
318 'params': {
319 # m3u8 download
320 'skip_download': True,
321 },
322 'add_ie': ['ThePlatform'],
42362fdb
RA
323 }]
324
29f7c58a 325 def _real_extract(self, url):
326 display_id = self._match_id(url)
327 return self.url_result(
328 'http://www.history.com/videos/' + display_id,
329 AENetworksIE.ie_key())
330
331
332class HistoryPlayerIE(AENetworksBaseIE):
333 IE_NAME = 'history:player'
334 _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
4823ec9f 335 _TESTS = []
29f7c58a 336
337 def _real_extract(self, url):
5ad28e7f 338 domain, video_id = self._match_valid_url(url).groups()
29f7c58a 339 return self._extract_aetn_info(domain, 'id', video_id, url)
340
341
342class BiographyIE(AENetworksBaseIE):
343 _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
344 _TESTS = [{
345 'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
346 'info_dict': {
347 'id': '30322987',
348 'ext': 'mp4',
349 'title': 'Vincent Van Gogh - Full Episode',
350 'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
351 'timestamp': 1311970571,
352 'upload_date': '20110729',
353 'uploader': 'AENE-NEW',
354 },
355 'params': {
356 # m3u8 download
357 'skip_download': True,
358 },
359 'add_ie': ['ThePlatform'],
19c90e40 360 'skip': '404 Not Found',
29f7c58a 361 }]
05c7feec 362
42362fdb 363 def _real_extract(self, url):
4f1e02ad
RA
364 display_id = self._match_id(url)
365 webpage = self._download_webpage(url, display_id)
29f7c58a 366 player_url = self._search_regex(
367 r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
368 webpage, 'player URL')
369 return self.url_result(player_url, HistoryPlayerIE.ie_key())