]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/aenetworks.py
[cleanup] Update extractor tests (#7718)
[yt-dlp.git] / yt_dlp / extractor / aenetworks.py
1 from .theplatform import ThePlatformIE
2 from ..utils import (
3 ExtractorError,
4 GeoRestrictedError,
5 int_or_none,
6 remove_start,
7 traverse_obj,
8 update_url_query,
9 urlencode_postdata,
10 )
11
12
13 class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
14 _BASE_URL_REGEX = r'''(?x)https?://
15 (?:(?:www|play|watch)\.)?
16 (?P<domain>
17 (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
18 fyi\.tv
19 )/'''
20 _THEPLATFORM_KEY = '43jXaGRQud'
21 _THEPLATFORM_SECRET = 'S10BPXHMlb'
22 _DOMAIN_MAP = {
23 'history.com': ('HISTORY', 'history'),
24 'aetv.com': ('AETV', 'aetv'),
25 'mylifetime.com': ('LIFETIME', 'lifetime'),
26 'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
27 'fyi.tv': ('FYI', 'fyi'),
28 'historyvault.com': (None, 'historyvault'),
29 'biography.com': (None, 'biography'),
30 }
31
32 def _extract_aen_smil(self, smil_url, video_id, auth=None):
33 query = {
34 'mbr': 'true',
35 'formats': 'M3U+none,MPEG-DASH+none,MPEG4,MP3',
36 }
37 if auth:
38 query['auth'] = auth
39 TP_SMIL_QUERY = [{
40 'assetTypes': 'high_video_ak',
41 'switch': 'hls_high_ak',
42 }, {
43 'assetTypes': 'high_video_s3',
44 }, {
45 'assetTypes': 'high_video_s3',
46 'switch': 'hls_high_fastly',
47 }]
48 formats = []
49 subtitles = {}
50 last_e = None
51 for q in TP_SMIL_QUERY:
52 q.update(query)
53 m_url = update_url_query(smil_url, q)
54 m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
55 try:
56 tp_formats, tp_subtitles = self._extract_theplatform_smil(
57 m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
58 except ExtractorError as e:
59 if isinstance(e, GeoRestrictedError):
60 raise
61 last_e = e
62 continue
63 formats.extend(tp_formats)
64 subtitles = self._merge_subtitles(subtitles, tp_subtitles)
65 if last_e and not formats:
66 raise last_e
67 return {
68 'id': video_id,
69 'formats': formats,
70 'subtitles': subtitles,
71 }
72
73 def _extract_aetn_info(self, domain, filter_key, filter_value, url):
74 requestor_id, brand = self._DOMAIN_MAP[domain]
75 result = self._download_json(
76 'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
77 filter_value, query={'filter[%s]' % filter_key: filter_value})
78 result = traverse_obj(
79 result, ('results',
80 lambda k, v: k == 0 and v[filter_key] == filter_value),
81 get_all=False)
82 if not result:
83 raise ExtractorError('Show not found in A&E feed (too new?)', expected=True,
84 video_id=remove_start(filter_value, '/'))
85 title = result['title']
86 video_id = result['id']
87 media_url = result['publicUrl']
88 theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
89 r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
90 info = self._parse_theplatform_metadata(theplatform_metadata)
91 auth = None
92 if theplatform_metadata.get('AETN$isBehindWall'):
93 resource = self._get_mvpd_resource(
94 requestor_id, theplatform_metadata['title'],
95 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
96 theplatform_metadata['ratings'][0]['rating'])
97 auth = self._extract_mvpd_auth(
98 url, video_id, requestor_id, resource)
99 info.update(self._extract_aen_smil(media_url, video_id, auth))
100 info.update({
101 'title': title,
102 'series': result.get('seriesName'),
103 'season_number': int_or_none(result.get('tvSeasonNumber')),
104 'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
105 })
106 return info
107
108
109 class AENetworksIE(AENetworksBaseIE):
110 IE_NAME = 'aenetworks'
111 IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
112 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
113 shows/[^/]+/season-\d+/episode-\d+|
114 (?:
115 (?:movie|special)s/[^/]+|
116 (?:shows/[^/]+/)?videos
117 )/[^/?#&]+
118 )'''
119 _TESTS = [{
120 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
121 'info_dict': {
122 'id': '22253814',
123 'ext': 'mp4',
124 'title': 'Winter is Coming',
125 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
126 'timestamp': 1338306241,
127 'upload_date': '20120529',
128 'uploader': 'AENE-NEW',
129 },
130 'params': {
131 # m3u8 download
132 'skip_download': True,
133 },
134 'add_ie': ['ThePlatform'],
135 'skip': 'Geo-restricted - This content is not available in your location.'
136 }, {
137 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
138 'info_dict': {
139 'id': '600587331957',
140 'ext': 'mp4',
141 'title': 'Inlawful Entry',
142 'description': 'md5:57c12115a2b384d883fe64ca50529e08',
143 'timestamp': 1452634428,
144 'upload_date': '20160112',
145 'uploader': 'AENE-NEW',
146 },
147 'params': {
148 # m3u8 download
149 'skip_download': True,
150 },
151 'add_ie': ['ThePlatform'],
152 'skip': 'This video is only available for users of participating TV providers.',
153 }, {
154 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
155 'only_matching': True
156 }, {
157 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
158 'only_matching': True
159 }, {
160 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
161 'only_matching': True
162 }, {
163 'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
164 'only_matching': True
165 }, {
166 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
167 'only_matching': True
168 }, {
169 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
170 'only_matching': True
171 }, {
172 'url': 'http://www.history.com/videos/history-of-valentines-day',
173 'only_matching': True
174 }, {
175 'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
176 'only_matching': True
177 }]
178
179 def _real_extract(self, url):
180 domain, canonical = self._match_valid_url(url).groups()
181 return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
182
183
184 class AENetworksListBaseIE(AENetworksBaseIE):
185 def _call_api(self, resource, slug, brand, fields):
186 return self._download_json(
187 'https://yoga.appsvcs.aetnd.com/graphql',
188 slug, query={'brand': brand}, data=urlencode_postdata({
189 'query': '''{
190 %s(slug: "%s") {
191 %s
192 }
193 }''' % (resource, slug, fields),
194 }))['data'][resource]
195
196 def _real_extract(self, url):
197 domain, slug = self._match_valid_url(url).groups()
198 _, brand = self._DOMAIN_MAP[domain]
199 playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
200 base_url = 'http://watch.%s' % domain
201
202 entries = []
203 for item in (playlist.get(self._ITEMS_KEY) or []):
204 doc = self._get_doc(item)
205 canonical = doc.get('canonical')
206 if not canonical:
207 continue
208 entries.append(self.url_result(
209 base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
210
211 description = None
212 if self._PLAYLIST_DESCRIPTION_KEY:
213 description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
214
215 return self.playlist_result(
216 entries, playlist.get('id'),
217 playlist.get(self._PLAYLIST_TITLE_KEY), description)
218
219
220 class AENetworksCollectionIE(AENetworksListBaseIE):
221 IE_NAME = 'aenetworks:collection'
222 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
223 _TESTS = [{
224 'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
225 'info_dict': {
226 'id': '282',
227 'title': 'America The Story of Us',
228 },
229 'playlist_mincount': 12,
230 }, {
231 'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
232 'only_matching': True
233 }, {
234 'url': 'https://www.historyvault.com/collections/mysteryquest',
235 'only_matching': True
236 }]
237 _RESOURCE = 'list'
238 _ITEMS_KEY = 'items'
239 _PLAYLIST_TITLE_KEY = 'display_title'
240 _PLAYLIST_DESCRIPTION_KEY = None
241 _FIELDS = '''id
242 display_title
243 items {
244 ... on ListVideoItem {
245 doc {
246 canonical
247 id
248 }
249 }
250 }'''
251
252 def _get_doc(self, item):
253 return item.get('doc') or {}
254
255
256 class AENetworksShowIE(AENetworksListBaseIE):
257 IE_NAME = 'aenetworks:show'
258 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
259 _TESTS = [{
260 'url': 'http://www.history.com/shows/ancient-aliens',
261 'info_dict': {
262 'id': 'SERIES1574',
263 'title': 'Ancient Aliens',
264 'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
265 },
266 'playlist_mincount': 150,
267 }]
268 _RESOURCE = 'series'
269 _ITEMS_KEY = 'episodes'
270 _PLAYLIST_TITLE_KEY = 'title'
271 _PLAYLIST_DESCRIPTION_KEY = 'description'
272 _FIELDS = '''description
273 id
274 title
275 episodes {
276 canonical
277 id
278 }'''
279
280 def _get_doc(self, item):
281 return item
282
283
284 class HistoryTopicIE(AENetworksBaseIE):
285 IE_NAME = 'history:topic'
286 IE_DESC = 'History.com Topic'
287 _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
288 _TESTS = [{
289 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
290 'info_dict': {
291 'id': '40700995724',
292 'ext': 'mp4',
293 'title': "History of Valentine’s Day",
294 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
295 'timestamp': 1375819729,
296 'upload_date': '20130806',
297 'uploader': 'AENE-NEW',
298 },
299 'params': {
300 # m3u8 download
301 'skip_download': True,
302 },
303 'add_ie': ['ThePlatform'],
304 }]
305
306 def _real_extract(self, url):
307 display_id = self._match_id(url)
308 return self.url_result(
309 'http://www.history.com/videos/' + display_id,
310 AENetworksIE.ie_key())
311
312
313 class HistoryPlayerIE(AENetworksBaseIE):
314 IE_NAME = 'history:player'
315 _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
316 _TESTS = []
317
318 def _real_extract(self, url):
319 domain, video_id = self._match_valid_url(url).groups()
320 return self._extract_aetn_info(domain, 'id', video_id, url)
321
322
323 class BiographyIE(AENetworksBaseIE):
324 _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
325 _TESTS = [{
326 'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
327 'info_dict': {
328 'id': '30322987',
329 'ext': 'mp4',
330 'title': 'Vincent Van Gogh - Full Episode',
331 'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
332 'timestamp': 1311970571,
333 'upload_date': '20110729',
334 'uploader': 'AENE-NEW',
335 },
336 'params': {
337 # m3u8 download
338 'skip_download': True,
339 },
340 'add_ie': ['ThePlatform'],
341 'skip': '404 Not Found',
342 }]
343
344 def _real_extract(self, url):
345 display_id = self._match_id(url)
346 webpage = self._download_webpage(url, display_id)
347 player_url = self._search_regex(
348 r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
349 webpage, 'player URL')
350 return self.url_result(player_url, HistoryPlayerIE.ie_key())