]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/aenetworks.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / aenetworks.py
1 from .theplatform import ThePlatformIE
2 from ..utils import (
3 ExtractorError,
4 GeoRestrictedError,
5 int_or_none,
6 remove_start,
7 traverse_obj,
8 update_url_query,
9 urlencode_postdata,
10 )
11
12
13 class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
14 _BASE_URL_REGEX = r'''(?x)https?://
15 (?:(?:www|play|watch)\.)?
16 (?P<domain>
17 (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
18 fyi\.tv
19 )/'''
20 _THEPLATFORM_KEY = '43jXaGRQud'
21 _THEPLATFORM_SECRET = 'S10BPXHMlb'
22 _DOMAIN_MAP = {
23 'history.com': ('HISTORY', 'history'),
24 'aetv.com': ('AETV', 'aetv'),
25 'mylifetime.com': ('LIFETIME', 'lifetime'),
26 'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
27 'fyi.tv': ('FYI', 'fyi'),
28 'historyvault.com': (None, 'historyvault'),
29 'biography.com': (None, 'biography'),
30 }
31
32 def _extract_aen_smil(self, smil_url, video_id, auth=None):
33 query = {
34 'mbr': 'true',
35 'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
36 }
37 if auth:
38 query['auth'] = auth
39 TP_SMIL_QUERY = [{
40 'assetTypes': 'high_video_ak',
41 'switch': 'hls_high_ak',
42 }, {
43 'assetTypes': 'high_video_s3',
44 }, {
45 'assetTypes': 'high_video_s3',
46 'switch': 'hls_high_fastly',
47 }]
48 formats = []
49 subtitles = {}
50 last_e = None
51 for q in TP_SMIL_QUERY:
52 q.update(query)
53 m_url = update_url_query(smil_url, q)
54 m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
55 try:
56 tp_formats, tp_subtitles = self._extract_theplatform_smil(
57 m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
58 except ExtractorError as e:
59 if isinstance(e, GeoRestrictedError):
60 raise
61 last_e = e
62 continue
63 formats.extend(tp_formats)
64 subtitles = self._merge_subtitles(subtitles, tp_subtitles)
65 if last_e and not formats:
66 raise last_e
67 return {
68 'id': video_id,
69 'formats': formats,
70 'subtitles': subtitles,
71 }
72
73 def _extract_aetn_info(self, domain, filter_key, filter_value, url):
74 requestor_id, brand = self._DOMAIN_MAP[domain]
75 result = self._download_json(
76 f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
77 filter_value, query={f'filter[{filter_key}]': filter_value})
78 result = traverse_obj(
79 result, ('results',
80 lambda k, v: k == 0 and v[filter_key] == filter_value),
81 get_all=False)
82 if not result:
83 raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
84 video_id=remove_start(filter_value, '/'))
85 title = result['title']
86 video_id = result['id']
87 media_url = result['publicUrl']
88 theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
89 r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
90 info = self._parse_theplatform_metadata(theplatform_metadata)
91 auth = None
92 if theplatform_metadata.get('AETN$isBehindWall'):
93 resource = self._get_mvpd_resource(
94 requestor_id, theplatform_metadata['title'],
95 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
96 traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
97 auth = self._extract_mvpd_auth(
98 url, video_id, requestor_id, resource)
99 info.update(self._extract_aen_smil(media_url, video_id, auth))
100 info.update({
101 'title': title,
102 'series': result.get('seriesName'),
103 'season_number': int_or_none(result.get('tvSeasonNumber')),
104 'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
105 })
106 return info
107
108
109 class AENetworksIE(AENetworksBaseIE):
110 IE_NAME = 'aenetworks'
111 IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
112 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
113 shows/[^/]+/season-\d+/episode-\d+|
114 (?:
115 (?:movie|special)s/[^/]+|
116 (?:shows/[^/]+/)?videos
117 )/[^/?#&]+
118 )'''
119 _TESTS = [{
120 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
121 'info_dict': {
122 'id': '22253814',
123 'ext': 'mp4',
124 'title': 'Winter Is Coming',
125 'description': 'md5:a40e370925074260b1c8a633c632c63a',
126 'timestamp': 1338306241,
127 'upload_date': '20120529',
128 'uploader': 'AENE-NEW',
129 'duration': 2592.0,
130 'thumbnail': r're:^https?://.*\.jpe?g$',
131 'chapters': 'count:5',
132 'tags': 'count:14',
133 'categories': ['Mountain Men'],
134 'episode_number': 1,
135 'episode': 'Episode 1',
136 'season': 'Season 1',
137 'season_number': 1,
138 'series': 'Mountain Men',
139 },
140 'params': {
141 # m3u8 download
142 'skip_download': True,
143 },
144 'add_ie': ['ThePlatform'],
145 'skip': 'Geo-restricted - This content is not available in your location.',
146 }, {
147 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
148 'info_dict': {
149 'id': '600587331957',
150 'ext': 'mp4',
151 'title': 'Inlawful Entry',
152 'description': 'md5:57c12115a2b384d883fe64ca50529e08',
153 'timestamp': 1452634428,
154 'upload_date': '20160112',
155 'uploader': 'AENE-NEW',
156 'duration': 1277.695,
157 'thumbnail': r're:^https?://.*\.jpe?g$',
158 'chapters': 'count:4',
159 'tags': 'count:23',
160 'episode': 'Episode 1',
161 'episode_number': 1,
162 'season': 'Season 9',
163 'season_number': 9,
164 'series': 'Duck Dynasty',
165 },
166 'params': {
167 # m3u8 download
168 'skip_download': True,
169 },
170 'add_ie': ['ThePlatform'],
171 'skip': 'This video is only available for users of participating TV providers.',
172 }, {
173 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
174 'only_matching': True,
175 }, {
176 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
177 'only_matching': True,
178 }, {
179 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
180 'only_matching': True,
181 }, {
182 'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
183 'only_matching': True,
184 }, {
185 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
186 'only_matching': True,
187 }, {
188 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
189 'only_matching': True,
190 }, {
191 'url': 'http://www.history.com/videos/history-of-valentines-day',
192 'only_matching': True,
193 }, {
194 'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
195 'only_matching': True,
196 }]
197
198 def _real_extract(self, url):
199 domain, canonical = self._match_valid_url(url).groups()
200 return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
201
202
203 class AENetworksListBaseIE(AENetworksBaseIE):
204 def _call_api(self, resource, slug, brand, fields):
205 return self._download_json(
206 'https://yoga.appsvcs.aetnd.com/graphql',
207 slug, query={'brand': brand}, data=urlencode_postdata({
208 'query': '''{
209 %s(slug: "%s") {
210 %s
211 }
212 }''' % (resource, slug, fields), # noqa: UP031
213 }))['data'][resource]
214
215 def _real_extract(self, url):
216 domain, slug = self._match_valid_url(url).groups()
217 _, brand = self._DOMAIN_MAP[domain]
218 playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
219 base_url = f'http://watch.{domain}'
220
221 entries = []
222 for item in (playlist.get(self._ITEMS_KEY) or []):
223 doc = self._get_doc(item)
224 canonical = doc.get('canonical')
225 if not canonical:
226 continue
227 entries.append(self.url_result(
228 base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
229
230 description = None
231 if self._PLAYLIST_DESCRIPTION_KEY:
232 description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
233
234 return self.playlist_result(
235 entries, playlist.get('id'),
236 playlist.get(self._PLAYLIST_TITLE_KEY), description)
237
238
239 class AENetworksCollectionIE(AENetworksListBaseIE):
240 IE_NAME = 'aenetworks:collection'
241 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
242 _TESTS = [{
243 'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
244 'info_dict': {
245 'id': '282',
246 'title': 'America The Story of Us',
247 },
248 'playlist_mincount': 12,
249 }, {
250 'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
251 'only_matching': True,
252 }, {
253 'url': 'https://www.historyvault.com/collections/mysteryquest',
254 'only_matching': True,
255 }]
256 _RESOURCE = 'list'
257 _ITEMS_KEY = 'items'
258 _PLAYLIST_TITLE_KEY = 'display_title'
259 _PLAYLIST_DESCRIPTION_KEY = None
260 _FIELDS = '''id
261 display_title
262 items {
263 ... on ListVideoItem {
264 doc {
265 canonical
266 id
267 }
268 }
269 }'''
270
271 def _get_doc(self, item):
272 return item.get('doc') or {}
273
274
275 class AENetworksShowIE(AENetworksListBaseIE):
276 IE_NAME = 'aenetworks:show'
277 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
278 _TESTS = [{
279 'url': 'http://www.history.com/shows/ancient-aliens',
280 'info_dict': {
281 'id': 'SERIES1574',
282 'title': 'Ancient Aliens',
283 'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
284 },
285 'playlist_mincount': 150,
286 }]
287 _RESOURCE = 'series'
288 _ITEMS_KEY = 'episodes'
289 _PLAYLIST_TITLE_KEY = 'title'
290 _PLAYLIST_DESCRIPTION_KEY = 'description'
291 _FIELDS = '''description
292 id
293 title
294 episodes {
295 canonical
296 id
297 }'''
298
299 def _get_doc(self, item):
300 return item
301
302
303 class HistoryTopicIE(AENetworksBaseIE):
304 IE_NAME = 'history:topic'
305 IE_DESC = 'History.com Topic'
306 _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
307 _TESTS = [{
308 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
309 'info_dict': {
310 'id': '40700995724',
311 'ext': 'mp4',
312 'title': 'History of Valentine’s Day',
313 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
314 'timestamp': 1375819729,
315 'upload_date': '20130806',
316 'uploader': 'AENE-NEW',
317 },
318 'params': {
319 # m3u8 download
320 'skip_download': True,
321 },
322 'add_ie': ['ThePlatform'],
323 }]
324
325 def _real_extract(self, url):
326 display_id = self._match_id(url)
327 return self.url_result(
328 'http://www.history.com/videos/' + display_id,
329 AENetworksIE.ie_key())
330
331
332 class HistoryPlayerIE(AENetworksBaseIE):
333 IE_NAME = 'history:player'
334 _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
335 _TESTS = []
336
337 def _real_extract(self, url):
338 domain, video_id = self._match_valid_url(url).groups()
339 return self._extract_aetn_info(domain, 'id', video_id, url)
340
341
342 class BiographyIE(AENetworksBaseIE):
343 _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
344 _TESTS = [{
345 'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
346 'info_dict': {
347 'id': '30322987',
348 'ext': 'mp4',
349 'title': 'Vincent Van Gogh - Full Episode',
350 'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
351 'timestamp': 1311970571,
352 'upload_date': '20110729',
353 'uploader': 'AENE-NEW',
354 },
355 'params': {
356 # m3u8 download
357 'skip_download': True,
358 },
359 'add_ie': ['ThePlatform'],
360 'skip': '404 Not Found',
361 }]
362
363 def _real_extract(self, url):
364 display_id = self._match_id(url)
365 webpage = self._download_webpage(url, display_id)
366 player_url = self._search_regex(
367 rf'<phoenix-iframe[^>]+src="({HistoryPlayerIE._VALID_URL})',
368 webpage, 'player URL')
369 return self.url_result(player_url, HistoryPlayerIE.ie_key())