]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/aenetworks.py
[extractor] Common function `_match_valid_url`
[yt-dlp.git] / yt_dlp / extractor / aenetworks.py
CommitLineData
10385322 1# coding: utf-8
b9c7a973
S
2from __future__ import unicode_literals
3
d8873d4d 4
05c7feec 5from .theplatform import ThePlatformIE
d8873d4d 6from ..utils import (
4f1e02ad 7 ExtractorError,
29f7c58a 8 GeoRestrictedError,
4f1e02ad 9 int_or_none,
d8873d4d 10 update_url_query,
29f7c58a 11 urlencode_postdata,
42362fdb
RA
12)
13
14
05c7feec 15class AENetworksBaseIE(ThePlatformIE):
29f7c58a 16 _BASE_URL_REGEX = r'''(?x)https?://
17 (?:(?:www|play|watch)\.)?
18 (?P<domain>
19 (?:history(?:vault)?|aetv|mylifetime|lifetimemovieclub)\.com|
20 fyi\.tv
21 )/'''
3b34e388
W
22 _THEPLATFORM_KEY = '43jXaGRQud'
23 _THEPLATFORM_SECRET = 'S10BPXHMlb'
29f7c58a 24 _DOMAIN_MAP = {
25 'history.com': ('HISTORY', 'history'),
26 'aetv.com': ('AETV', 'aetv'),
27 'mylifetime.com': ('LIFETIME', 'lifetime'),
28 'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
29 'fyi.tv': ('FYI', 'fyi'),
30 'historyvault.com': (None, 'historyvault'),
31 'biography.com': (None, 'biography'),
32 }
b9c7a973 33
4f1e02ad
RA
34 def _extract_aen_smil(self, smil_url, video_id, auth=None):
35 query = {'mbr': 'true'}
36 if auth:
37 query['auth'] = auth
38 TP_SMIL_QUERY = [{
39 'assetTypes': 'high_video_ak',
40 'switch': 'hls_high_ak'
41 }, {
42 'assetTypes': 'high_video_s3'
43 }, {
44 'assetTypes': 'high_video_s3',
29f7c58a 45 'switch': 'hls_high_fastly',
4f1e02ad
RA
46 }]
47 formats = []
48 subtitles = {}
49 last_e = None
50 for q in TP_SMIL_QUERY:
51 q.update(query)
52 m_url = update_url_query(smil_url, q)
53 m_url = self._sign_url(m_url, self._THEPLATFORM_KEY, self._THEPLATFORM_SECRET)
54 try:
55 tp_formats, tp_subtitles = self._extract_theplatform_smil(
56 m_url, video_id, 'Downloading %s SMIL data' % (q.get('switch') or q['assetTypes']))
57 except ExtractorError as e:
29f7c58a 58 if isinstance(e, GeoRestrictedError):
59 raise
4f1e02ad
RA
60 last_e = e
61 continue
62 formats.extend(tp_formats)
63 subtitles = self._merge_subtitles(subtitles, tp_subtitles)
64 if last_e and not formats:
65 raise last_e
66 self._sort_formats(formats)
67 return {
68 'id': video_id,
69 'formats': formats,
70 'subtitles': subtitles,
71 }
72
29f7c58a 73 def _extract_aetn_info(self, domain, filter_key, filter_value, url):
74 requestor_id, brand = self._DOMAIN_MAP[domain]
75 result = self._download_json(
76 'https://feeds.video.aetnd.com/api/v2/%s/videos' % brand,
77 filter_value, query={'filter[%s]' % filter_key: filter_value})['results'][0]
78 title = result['title']
79 video_id = result['id']
80 media_url = result['publicUrl']
81 theplatform_metadata = self._download_theplatform_metadata(self._search_regex(
82 r'https?://link\.theplatform\.com/s/([^?]+)', media_url, 'theplatform_path'), video_id)
83 info = self._parse_theplatform_metadata(theplatform_metadata)
84 auth = None
85 if theplatform_metadata.get('AETN$isBehindWall'):
86 resource = self._get_mvpd_resource(
87 requestor_id, theplatform_metadata['title'],
88 theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
89 theplatform_metadata['ratings'][0]['rating'])
90 auth = self._extract_mvpd_auth(
91 url, video_id, requestor_id, resource)
92 info.update(self._extract_aen_smil(media_url, video_id, auth))
93 info.update({
94 'title': title,
95 'series': result.get('seriesName'),
96 'season_number': int_or_none(result.get('tvSeasonNumber')),
97 'episode_number': int_or_none(result.get('tvSeasonEpisodeNumber')),
98 })
99 return info
100
b9c7a973 101
42362fdb 102class AENetworksIE(AENetworksBaseIE):
855f90fa 103 IE_NAME = 'aenetworks'
3ad6dabd 104 IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault'
29f7c58a 105 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'''(?P<id>
106 shows/[^/]+/season-\d+/episode-\d+|
107 (?:
108 (?:movie|special)s/[^/]+|
109 (?:shows/[^/]+/)?videos
110 )/[^/?#&]+
111 )'''
b9c7a973 112 _TESTS = [{
52767c1b 113 'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
114 'info_dict': {
42362fdb 115 'id': '22253814',
52767c1b 116 'ext': 'mp4',
4f1e02ad 117 'title': 'Winter is Coming',
1358b941 118 'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
79ba9140 119 'timestamp': 1338306241,
120 'upload_date': '20120529',
121 'uploader': 'AENE-NEW',
52767c1b 122 },
4f1e02ad
RA
123 'params': {
124 # m3u8 download
125 'skip_download': True,
126 },
b9c7a973 127 'add_ie': ['ThePlatform'],
29f7c58a 128 'skip': 'This video is only available for users of participating TV providers.',
587dfd44 129 }, {
29f7c58a 130 'url': 'http://www.aetv.com/shows/duck-dynasty/season-9/episode-1',
42362fdb 131 'info_dict': {
29f7c58a 132 'id': '600587331957',
133 'ext': 'mp4',
134 'title': 'Inlawful Entry',
135 'description': 'md5:57c12115a2b384d883fe64ca50529e08',
136 'timestamp': 1452634428,
137 'upload_date': '20160112',
138 'uploader': 'AENE-NEW',
42362fdb 139 },
29f7c58a 140 'params': {
141 # m3u8 download
142 'skip_download': True,
42362fdb 143 },
29f7c58a 144 'add_ie': ['ThePlatform'],
587dfd44 145 }, {
42362fdb 146 'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
587dfd44 147 'only_matching': True
148 }, {
42362fdb 149 'url': 'http://www.mylifetime.com/shows/project-runway-junior/season-1/episode-6',
587dfd44 150 'only_matching': True
70157c2c
RA
151 }, {
152 'url': 'http://www.mylifetime.com/movies/center-stage-on-pointe/full-movie',
153 'only_matching': True
459818e2 154 }, {
29f7c58a 155 'url': 'https://watch.lifetimemovieclub.com/movies/10-year-reunion/full-movie',
459818e2 156 'only_matching': True
95728fda
S
157 }, {
158 'url': 'http://www.history.com/specials/sniper-into-the-kill-zone/full-special',
159 'only_matching': True
3ad6dabd 160 }, {
29f7c58a 161 'url': 'https://www.aetv.com/specials/hunting-jonbenets-killer-the-untold-story/preview-hunting-jonbenets-killer-the-untold-story',
162 'only_matching': True
163 }, {
164 'url': 'http://www.history.com/videos/history-of-valentines-day',
3ad6dabd 165 'only_matching': True
4f1e02ad 166 }, {
29f7c58a 167 'url': 'https://play.aetv.com/shows/duck-dynasty/videos/best-of-duck-dynasty-getting-quack-in-shape',
4f1e02ad 168 'only_matching': True
b9c7a973
S
169 }]
170
171 def _real_extract(self, url):
5ad28e7f 172 domain, canonical = self._match_valid_url(url).groups()
29f7c58a 173 return self._extract_aetn_info(domain, 'canonical', '/' + canonical, url)
174
175
176class AENetworksListBaseIE(AENetworksBaseIE):
177 def _call_api(self, resource, slug, brand, fields):
178 return self._download_json(
179 'https://yoga.appsvcs.aetnd.com/graphql',
180 slug, query={'brand': brand}, data=urlencode_postdata({
181 'query': '''{
182 %s(slug: "%s") {
183 %s
184 }
185}''' % (resource, slug, fields),
186 }))['data'][resource]
187
188 def _real_extract(self, url):
5ad28e7f 189 domain, slug = self._match_valid_url(url).groups()
29f7c58a 190 _, brand = self._DOMAIN_MAP[domain]
191 playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
192 base_url = 'http://watch.%s' % domain
193
194 entries = []
195 for item in (playlist.get(self._ITEMS_KEY) or []):
196 doc = self._get_doc(item)
197 canonical = doc.get('canonical')
198 if not canonical:
199 continue
200 entries.append(self.url_result(
201 base_url + canonical, AENetworksIE.ie_key(), doc.get('id')))
202
203 description = None
204 if self._PLAYLIST_DESCRIPTION_KEY:
205 description = playlist.get(self._PLAYLIST_DESCRIPTION_KEY)
206
207 return self.playlist_result(
208 entries, playlist.get('id'),
209 playlist.get(self._PLAYLIST_TITLE_KEY), description)
210
211
212class AENetworksCollectionIE(AENetworksListBaseIE):
213 IE_NAME = 'aenetworks:collection'
214 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'(?:[^/]+/)*(?:list|collections)/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
215 _TESTS = [{
216 'url': 'https://watch.historyvault.com/list/america-the-story-of-us',
217 'info_dict': {
218 'id': '282',
219 'title': 'America The Story of Us',
220 },
221 'playlist_mincount': 12,
222 }, {
223 'url': 'https://watch.historyvault.com/shows/america-the-story-of-us-2/season-1/list/america-the-story-of-us',
224 'only_matching': True
225 }, {
226 'url': 'https://www.historyvault.com/collections/mysteryquest',
227 'only_matching': True
228 }]
229 _RESOURCE = 'list'
230 _ITEMS_KEY = 'items'
231 _PLAYLIST_TITLE_KEY = 'display_title'
232 _PLAYLIST_DESCRIPTION_KEY = None
233 _FIELDS = '''id
234 display_title
235 items {
236 ... on ListVideoItem {
237 doc {
238 canonical
239 id
240 }
241 }
242 }'''
243
244 def _get_doc(self, item):
245 return item.get('doc') or {}
246
247
248class AENetworksShowIE(AENetworksListBaseIE):
249 IE_NAME = 'aenetworks:show'
250 _VALID_URL = AENetworksBaseIE._BASE_URL_REGEX + r'shows/(?P<id>[^/?#&]+)/?(?:[?#&]|$)'
251 _TESTS = [{
252 'url': 'http://www.history.com/shows/ancient-aliens',
253 'info_dict': {
2181983a 254 'id': 'SERIES1574',
29f7c58a 255 'title': 'Ancient Aliens',
256 'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
257 },
a820dc72 258 'playlist_mincount': 150,
29f7c58a 259 }]
260 _RESOURCE = 'series'
261 _ITEMS_KEY = 'episodes'
262 _PLAYLIST_TITLE_KEY = 'title'
263 _PLAYLIST_DESCRIPTION_KEY = 'description'
264 _FIELDS = '''description
265 id
266 title
267 episodes {
268 canonical
269 id
270 }'''
271
272 def _get_doc(self, item):
273 return item
b9c7a973 274
b9c7a973 275
42362fdb
RA
276class HistoryTopicIE(AENetworksBaseIE):
277 IE_NAME = 'history:topic'
278 IE_DESC = 'History.com Topic'
4f1e02ad 279 _VALID_URL = r'https?://(?:www\.)?history\.com/topics/[^/]+/(?P<id>[\w+-]+?)-video'
42362fdb 280 _TESTS = [{
4f1e02ad 281 'url': 'https://www.history.com/topics/valentines-day/history-of-valentines-day-video',
42362fdb
RA
282 'info_dict': {
283 'id': '40700995724',
284 'ext': 'mp4',
4f1e02ad 285 'title': "History of Valentine’s Day",
42362fdb
RA
286 'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
287 'timestamp': 1375819729,
288 'upload_date': '20130806',
29f7c58a 289 'uploader': 'AENE-NEW',
42362fdb
RA
290 },
291 'params': {
292 # m3u8 download
293 'skip_download': True,
294 },
295 'add_ie': ['ThePlatform'],
42362fdb
RA
296 }]
297
29f7c58a 298 def _real_extract(self, url):
299 display_id = self._match_id(url)
300 return self.url_result(
301 'http://www.history.com/videos/' + display_id,
302 AENetworksIE.ie_key())
303
304
305class HistoryPlayerIE(AENetworksBaseIE):
306 IE_NAME = 'history:player'
307 _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:history|biography)\.com)/player/(?P<id>\d+)'
308 _TESTS = []
309
310 def _real_extract(self, url):
5ad28e7f 311 domain, video_id = self._match_valid_url(url).groups()
29f7c58a 312 return self._extract_aetn_info(domain, 'id', video_id, url)
313
314
315class BiographyIE(AENetworksBaseIE):
316 _VALID_URL = r'https?://(?:www\.)?biography\.com/video/(?P<id>[^/?#&]+)'
317 _TESTS = [{
318 'url': 'https://www.biography.com/video/vincent-van-gogh-full-episode-2075049808',
319 'info_dict': {
320 'id': '30322987',
321 'ext': 'mp4',
322 'title': 'Vincent Van Gogh - Full Episode',
323 'description': 'A full biography about the most influential 20th century painter, Vincent Van Gogh.',
324 'timestamp': 1311970571,
325 'upload_date': '20110729',
326 'uploader': 'AENE-NEW',
327 },
328 'params': {
329 # m3u8 download
330 'skip_download': True,
331 },
332 'add_ie': ['ThePlatform'],
333 }]
05c7feec 334
42362fdb 335 def _real_extract(self, url):
4f1e02ad
RA
336 display_id = self._match_id(url)
337 webpage = self._download_webpage(url, display_id)
29f7c58a 338 player_url = self._search_regex(
339 r'<phoenix-iframe[^>]+src="(%s)' % HistoryPlayerIE._VALID_URL,
340 webpage, 'player URL')
341 return self.url_result(player_url, HistoryPlayerIE.ie_key())