]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/tvnow.py
[cleanup] Misc
[yt-dlp.git] / yt_dlp / extractor / tvnow.py
1 import re
2
3 from .common import InfoExtractor
4 from ..compat import compat_str
5 from ..utils import (
6 ExtractorError,
7 get_element_by_id,
8 int_or_none,
9 parse_iso8601,
10 parse_duration,
11 str_or_none,
12 try_get,
13 update_url_query,
14 urljoin,
15 )
16
17
18 class TVNowBaseIE(InfoExtractor):
19 _VIDEO_FIELDS = (
20 'id', 'title', 'free', 'geoblocked', 'articleLong', 'articleShort',
21 'broadcastStartDate', 'isDrm', 'duration', 'season', 'episode',
22 'manifest.dashclear', 'manifest.hlsclear', 'manifest.smoothclear',
23 'format.title', 'format.defaultImage169Format', 'format.defaultImage169Logo')
24
25 def _call_api(self, path, video_id, query):
26 return self._download_json(
27 'https://api.tvnow.de/v3/' + path, video_id, query=query)
28
29 def _extract_video(self, info, display_id):
30 video_id = compat_str(info['id'])
31 title = info['title']
32
33 paths = []
34 for manifest_url in (info.get('manifest') or {}).values():
35 if not manifest_url:
36 continue
37 manifest_url = update_url_query(manifest_url, {'filter': ''})
38 path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
39 if path in paths:
40 continue
41 paths.append(path)
42
43 def url_repl(proto, suffix):
44 return re.sub(
45 r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
46 r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
47 '.ism/' + suffix, manifest_url))
48
49 def make_urls(proto, suffix):
50 urls = [url_repl(proto, suffix)]
51 hd_url = urls[0].replace('/manifest/', '/ngvod/')
52 if hd_url != urls[0]:
53 urls.append(hd_url)
54 return urls
55
56 for man_url in make_urls('dash', '.mpd'):
57 formats = self._extract_mpd_formats(
58 man_url, video_id, mpd_id='dash', fatal=False)
59 for man_url in make_urls('hss', 'Manifest'):
60 formats.extend(self._extract_ism_formats(
61 man_url, video_id, ism_id='mss', fatal=False))
62 for man_url in make_urls('hls', '.m3u8'):
63 formats.extend(self._extract_m3u8_formats(
64 man_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
65 fatal=False))
66 if formats:
67 break
68 else:
69 if not self.get_param('allow_unplayable_formats') and info.get('isDrm'):
70 raise ExtractorError(
71 'Video %s is DRM protected' % video_id, expected=True)
72 if info.get('geoblocked'):
73 raise self.raise_geo_restricted()
74 if not info.get('free', True):
75 raise ExtractorError(
76 'Video %s is not available for free' % video_id, expected=True)
77 self._sort_formats(formats)
78
79 description = info.get('articleLong') or info.get('articleShort')
80 timestamp = parse_iso8601(info.get('broadcastStartDate'), ' ')
81 duration = parse_duration(info.get('duration'))
82
83 f = info.get('format', {})
84
85 thumbnails = [{
86 'url': 'https://aistvnow-a.akamaihd.net/tvnow/movie/%s' % video_id,
87 }]
88 thumbnail = f.get('defaultImage169Format') or f.get('defaultImage169Logo')
89 if thumbnail:
90 thumbnails.append({
91 'url': thumbnail,
92 })
93
94 return {
95 'id': video_id,
96 'display_id': display_id,
97 'title': title,
98 'description': description,
99 'thumbnails': thumbnails,
100 'timestamp': timestamp,
101 'duration': duration,
102 'series': f.get('title'),
103 'season_number': int_or_none(info.get('season')),
104 'episode_number': int_or_none(info.get('episode')),
105 'episode': title,
106 'formats': formats,
107 }
108
109
110 class TVNowIE(TVNowBaseIE):
111 _VALID_URL = r'''(?x)
112 https?://
113 (?:www\.)?tvnow\.(?:de|at|ch)/(?P<station>[^/]+)/
114 (?P<show_id>[^/]+)/
115 (?!(?:list|jahr)(?:/|$))(?P<id>[^/?\#&]+)
116 '''
117
118 @classmethod
119 def suitable(cls, url):
120 return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url) or TVNowShowIE.suitable(url)
121 else super(TVNowIE, cls).suitable(url))
122
123 _TESTS = [{
124 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3/player',
125 'info_dict': {
126 'id': '331082',
127 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
128 'ext': 'mp4',
129 'title': 'Der neue Porsche 911 GT 3',
130 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
131 'timestamp': 1495994400,
132 'upload_date': '20170528',
133 'duration': 5283,
134 'series': 'GRIP - Das Motormagazin',
135 'season_number': 14,
136 'episode_number': 405,
137 'episode': 'Der neue Porsche 911 GT 3',
138 },
139 }, {
140 # rtl2
141 'url': 'https://www.tvnow.de/rtl2/armes-deutschland/episode-0008/player',
142 'only_matching': True,
143 }, {
144 # rtlnitro
145 'url': 'https://www.tvnow.de/nitro/alarm-fuer-cobra-11-die-autobahnpolizei/auf-eigene-faust-pilot/player',
146 'only_matching': True,
147 }, {
148 # superrtl
149 'url': 'https://www.tvnow.de/superrtl/die-lustigsten-schlamassel-der-welt/u-a-ketchup-effekt/player',
150 'only_matching': True,
151 }, {
152 # ntv
153 'url': 'https://www.tvnow.de/ntv/startup-news/goetter-in-weiss/player',
154 'only_matching': True,
155 }, {
156 # vox
157 'url': 'https://www.tvnow.de/vox/auto-mobil/neues-vom-automobilmarkt-2017-11-19-17-00-00/player',
158 'only_matching': True,
159 }, {
160 # rtlplus
161 'url': 'https://www.tvnow.de/rtlplus/op-ruft-dr-bruckner/die-vernaehte-frau/player',
162 'only_matching': True,
163 }, {
164 'url': 'https://www.tvnow.de/rtl2/grip-das-motormagazin/der-neue-porsche-911-gt-3',
165 'only_matching': True,
166 }]
167
168 def _real_extract(self, url):
169 mobj = self._match_valid_url(url)
170 display_id = '%s/%s' % mobj.group(2, 3)
171
172 info = self._call_api(
173 'movies/' + display_id, display_id, query={
174 'fields': ','.join(self._VIDEO_FIELDS),
175 })
176
177 return self._extract_video(info, display_id)
178
179
180 class TVNowNewIE(InfoExtractor):
181 _VALID_URL = r'''(?x)
182 (?P<base_url>https?://
183 (?:www\.)?tvnow\.(?:de|at|ch)/
184 (?:shows|serien))/
185 (?P<show>[^/]+)-\d+/
186 [^/]+/
187 episode-\d+-(?P<episode>[^/?$&]+)-(?P<id>\d+)
188 '''
189
190 _TESTS = [{
191 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
192 'only_matching': True,
193 }]
194
195 def _real_extract(self, url):
196 mobj = self._match_valid_url(url)
197 base_url = re.sub(r'(?:shows|serien)', '_', mobj.group('base_url'))
198 show, episode = mobj.group('show', 'episode')
199 return self.url_result(
200 # Rewrite new URLs to the old format and use extraction via old API
201 # at api.tvnow.de as a loophole for bypassing premium content checks
202 '%s/%s/%s' % (base_url, show, episode),
203 ie=TVNowIE.ie_key(), video_id=mobj.group('id'))
204
205
206 class TVNowFilmIE(TVNowBaseIE):
207 _VALID_URL = r'''(?x)
208 (?P<base_url>https?://
209 (?:www\.)?tvnow\.(?:de|at|ch)/
210 (?:filme))/
211 (?P<title>[^/?$&]+)-(?P<id>\d+)
212 '''
213 _TESTS = [{
214 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959',
215 'info_dict': {
216 'id': '1426690',
217 'display_id': 'lord-of-war-haendler-des-todes',
218 'ext': 'mp4',
219 'title': 'Lord of War',
220 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9',
221 'timestamp': 1550010000,
222 'upload_date': '20190212',
223 'duration': 7016,
224 },
225 }, {
226 'url': 'https://www.tvnow.de/filme/the-machinist-12157',
227 'info_dict': {
228 'id': '328160',
229 'display_id': 'the-machinist',
230 'ext': 'mp4',
231 'title': 'The Machinist',
232 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28',
233 'timestamp': 1496469720,
234 'upload_date': '20170603',
235 'duration': 5836,
236 },
237 }, {
238 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777',
239 'only_matching': True, # DRM protected
240 }]
241
242 def _real_extract(self, url):
243 mobj = self._match_valid_url(url)
244 display_id = mobj.group('title')
245
246 webpage = self._download_webpage(url, display_id, fatal=False)
247 if not webpage:
248 raise ExtractorError('Cannot download "%s"' % url, expected=True)
249
250 json_text = get_element_by_id('now-web-state', webpage)
251 if not json_text:
252 raise ExtractorError('Cannot read video data', expected=True)
253
254 json_data = self._parse_json(
255 json_text,
256 display_id,
257 transform_source=lambda x: x.replace('&q;', '"'),
258 fatal=False)
259 if not json_data:
260 raise ExtractorError('Cannot read video data', expected=True)
261
262 player_key = next(
263 (key for key in json_data.keys() if 'module/player' in key),
264 None)
265 page_key = next(
266 (key for key in json_data.keys() if 'page/filme' in key),
267 None)
268 movie_id = try_get(
269 json_data,
270 [
271 lambda x: x[player_key]['body']['id'],
272 lambda x: x[page_key]['body']['modules'][0]['id'],
273 lambda x: x[page_key]['body']['modules'][1]['id']],
274 int)
275 if not movie_id:
276 raise ExtractorError('Cannot extract movie ID', expected=True)
277
278 info = self._call_api(
279 'movies/%d' % movie_id,
280 display_id,
281 query={'fields': ','.join(self._VIDEO_FIELDS)})
282
283 return self._extract_video(info, display_id)
284
285
286 class TVNowNewBaseIE(InfoExtractor):
287 def _call_api(self, path, video_id, query={}):
288 result = self._download_json(
289 'https://apigw.tvnow.de/module/' + path, video_id, query=query)
290 error = result.get('error')
291 if error:
292 raise ExtractorError(
293 '%s said: %s' % (self.IE_NAME, error), expected=True)
294 return result
295
296
297 r"""
298 TODO: new apigw.tvnow.de based version of TVNowIE. Replace old TVNowIE with it
299 when api.tvnow.de is shut down. This version can't bypass premium checks though.
300 class TVNowIE(TVNowNewBaseIE):
301 _VALID_URL = r'''(?x)
302 https?://
303 (?:www\.)?tvnow\.(?:de|at|ch)/
304 (?:shows|serien)/[^/]+/
305 (?:[^/]+/)+
306 (?P<display_id>[^/?$&]+)-(?P<id>\d+)
307 '''
308
309 _TESTS = [{
310 # episode with annual navigation
311 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
312 'info_dict': {
313 'id': '331082',
314 'display_id': 'grip-das-motormagazin/der-neue-porsche-911-gt-3',
315 'ext': 'mp4',
316 'title': 'Der neue Porsche 911 GT 3',
317 'description': 'md5:6143220c661f9b0aae73b245e5d898bb',
318 'thumbnail': r're:^https?://.*\.jpg$',
319 'timestamp': 1495994400,
320 'upload_date': '20170528',
321 'duration': 5283,
322 'series': 'GRIP - Das Motormagazin',
323 'season_number': 14,
324 'episode_number': 405,
325 'episode': 'Der neue Porsche 911 GT 3',
326 },
327 }, {
328 # rtl2, episode with season navigation
329 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471/staffel-3/episode-14-bernd-steht-seit-der-trennung-von-seiner-frau-allein-da-526124',
330 'only_matching': True,
331 }, {
332 # rtlnitro
333 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13/episode-5-auf-eigene-faust-pilot-366822',
334 'only_matching': True,
335 }, {
336 # superrtl
337 'url': 'https://www.tvnow.de/shows/die-lustigsten-schlamassel-der-welt-1221/staffel-2/episode-14-u-a-ketchup-effekt-364120',
338 'only_matching': True,
339 }, {
340 # ntv
341 'url': 'https://www.tvnow.de/shows/startup-news-10674/staffel-2/episode-39-goetter-in-weiss-387630',
342 'only_matching': True,
343 }, {
344 # vox
345 'url': 'https://www.tvnow.de/shows/auto-mobil-174/2017-11/episode-46-neues-vom-automobilmarkt-2017-11-19-17-00-00-380072',
346 'only_matching': True,
347 }, {
348 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05/episode-405-der-neue-porsche-911-gt-3-331082',
349 'only_matching': True,
350 }]
351
352 def _extract_video(self, info, url, display_id):
353 config = info['config']
354 source = config['source']
355
356 video_id = compat_str(info.get('id') or source['videoId'])
357 title = source['title'].strip()
358
359 paths = []
360 for manifest_url in (info.get('manifest') or {}).values():
361 if not manifest_url:
362 continue
363 manifest_url = update_url_query(manifest_url, {'filter': ''})
364 path = self._search_regex(r'https?://[^/]+/(.+?)\.ism/', manifest_url, 'path')
365 if path in paths:
366 continue
367 paths.append(path)
368
369 def url_repl(proto, suffix):
370 return re.sub(
371 r'(?:hls|dash|hss)([.-])', proto + r'\1', re.sub(
372 r'\.ism/(?:[^.]*\.(?:m3u8|mpd)|[Mm]anifest)',
373 '.ism/' + suffix, manifest_url))
374
375 formats = self._extract_mpd_formats(
376 url_repl('dash', '.mpd'), video_id,
377 mpd_id='dash', fatal=False)
378 formats.extend(self._extract_ism_formats(
379 url_repl('hss', 'Manifest'),
380 video_id, ism_id='mss', fatal=False))
381 formats.extend(self._extract_m3u8_formats(
382 url_repl('hls', '.m3u8'), video_id, 'mp4',
383 'm3u8_native', m3u8_id='hls', fatal=False))
384 if formats:
385 break
386 else:
387 if try_get(info, lambda x: x['rights']['isDrm']):
388 raise ExtractorError(
389 'Video %s is DRM protected' % video_id, expected=True)
390 if try_get(config, lambda x: x['boards']['geoBlocking']['block']):
391 raise self.raise_geo_restricted()
392 if not info.get('free', True):
393 raise ExtractorError(
394 'Video %s is not available for free' % video_id, expected=True)
395 self._sort_formats(formats)
396
397 description = source.get('description')
398 thumbnail = url_or_none(source.get('poster'))
399 timestamp = unified_timestamp(source.get('previewStart'))
400 duration = parse_duration(source.get('length'))
401
402 series = source.get('format')
403 season_number = int_or_none(self._search_regex(
404 r'staffel-(\d+)', url, 'season number', default=None))
405 episode_number = int_or_none(self._search_regex(
406 r'episode-(\d+)', url, 'episode number', default=None))
407
408 return {
409 'id': video_id,
410 'display_id': display_id,
411 'title': title,
412 'description': description,
413 'thumbnail': thumbnail,
414 'timestamp': timestamp,
415 'duration': duration,
416 'series': series,
417 'season_number': season_number,
418 'episode_number': episode_number,
419 'episode': title,
420 'formats': formats,
421 }
422
423 def _real_extract(self, url):
424 display_id, video_id = self._match_valid_url(url).groups()
425 info = self._call_api('player/' + video_id, video_id)
426 return self._extract_video(info, video_id, display_id)
427
428
429 class TVNowFilmIE(TVNowIE): # XXX: Do not subclass from concrete IE
430 _VALID_URL = r'''(?x)
431 (?P<base_url>https?://
432 (?:www\.)?tvnow\.(?:de|at|ch)/
433 (?:filme))/
434 (?P<title>[^/?$&]+)-(?P<id>\d+)
435 '''
436 _TESTS = [{
437 'url': 'https://www.tvnow.de/filme/lord-of-war-haendler-des-todes-7959',
438 'info_dict': {
439 'id': '1426690',
440 'display_id': 'lord-of-war-haendler-des-todes',
441 'ext': 'mp4',
442 'title': 'Lord of War',
443 'description': 'md5:5eda15c0d5b8cb70dac724c8a0ff89a9',
444 'timestamp': 1550010000,
445 'upload_date': '20190212',
446 'duration': 7016,
447 },
448 }, {
449 'url': 'https://www.tvnow.de/filme/the-machinist-12157',
450 'info_dict': {
451 'id': '328160',
452 'display_id': 'the-machinist',
453 'ext': 'mp4',
454 'title': 'The Machinist',
455 'description': 'md5:9a0e363fdd74b3a9e1cdd9e21d0ecc28',
456 'timestamp': 1496469720,
457 'upload_date': '20170603',
458 'duration': 5836,
459 },
460 }, {
461 'url': 'https://www.tvnow.de/filme/horst-schlaemmer-isch-kandidiere-17777',
462 'only_matching': True, # DRM protected
463 }]
464
465 def _real_extract(self, url):
466 mobj = self._match_valid_url(url)
467 display_id = mobj.group('title')
468
469 webpage = self._download_webpage(url, display_id, fatal=False)
470 if not webpage:
471 raise ExtractorError('Cannot download "%s"' % url, expected=True)
472
473 json_text = get_element_by_id('now-web-state', webpage)
474 if not json_text:
475 raise ExtractorError('Cannot read video data', expected=True)
476
477 json_data = self._parse_json(
478 json_text,
479 display_id,
480 transform_source=lambda x: x.replace('&q;', '"'),
481 fatal=False)
482 if not json_data:
483 raise ExtractorError('Cannot read video data', expected=True)
484
485 player_key = next(
486 (key for key in json_data.keys() if 'module/player' in key),
487 None)
488 page_key = next(
489 (key for key in json_data.keys() if 'page/filme' in key),
490 None)
491 movie_id = try_get(
492 json_data,
493 [
494 lambda x: x[player_key]['body']['id'],
495 lambda x: x[page_key]['body']['modules'][0]['id'],
496 lambda x: x[page_key]['body']['modules'][1]['id']],
497 int)
498 if not movie_id:
499 raise ExtractorError('Cannot extract movie ID', expected=True)
500
501 info = self._call_api('player/%d' % movie_id, display_id)
502 return self._extract_video(info, url, display_id)
503 """
504
505
506 class TVNowListBaseIE(TVNowNewBaseIE):
507 _SHOW_VALID_URL = r'''(?x)
508 (?P<base_url>
509 https?://
510 (?:www\.)?tvnow\.(?:de|at|ch)/(?:shows|serien)/
511 [^/?#&]+-(?P<show_id>\d+)
512 )
513 '''
514
515 @classmethod
516 def suitable(cls, url):
517 return (False if TVNowNewIE.suitable(url)
518 else super(TVNowListBaseIE, cls).suitable(url))
519
520 def _extract_items(self, url, show_id, list_id, query):
521 items = self._call_api(
522 'teaserrow/format/episode/' + show_id, list_id,
523 query=query)['items']
524
525 entries = []
526 for item in items:
527 if not isinstance(item, dict):
528 continue
529 item_url = urljoin(url, item.get('url'))
530 if not item_url:
531 continue
532 video_id = str_or_none(item.get('id') or item.get('videoId'))
533 item_title = item.get('subheadline') or item.get('text')
534 entries.append(self.url_result(
535 item_url, ie=TVNowNewIE.ie_key(), video_id=video_id,
536 video_title=item_title))
537
538 return self.playlist_result(entries, '%s/%s' % (show_id, list_id))
539
540
541 class TVNowSeasonIE(TVNowListBaseIE):
542 _VALID_URL = r'%s/staffel-(?P<id>\d+)' % TVNowListBaseIE._SHOW_VALID_URL
543 _TESTS = [{
544 'url': 'https://www.tvnow.de/serien/alarm-fuer-cobra-11-die-autobahnpolizei-1815/staffel-13',
545 'info_dict': {
546 'id': '1815/13',
547 },
548 'playlist_mincount': 22,
549 }]
550
551 def _real_extract(self, url):
552 _, show_id, season_id = self._match_valid_url(url).groups()
553 return self._extract_items(
554 url, show_id, season_id, {'season': season_id})
555
556
557 class TVNowAnnualIE(TVNowListBaseIE):
558 _VALID_URL = r'%s/(?P<year>\d{4})-(?P<month>\d{2})' % TVNowListBaseIE._SHOW_VALID_URL
559 _TESTS = [{
560 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669/2017-05',
561 'info_dict': {
562 'id': '1669/2017-05',
563 },
564 'playlist_mincount': 2,
565 }]
566
567 def _real_extract(self, url):
568 _, show_id, year, month = self._match_valid_url(url).groups()
569 return self._extract_items(
570 url, show_id, '%s-%s' % (year, month), {
571 'year': int(year),
572 'month': int(month),
573 })
574
575
576 class TVNowShowIE(TVNowListBaseIE):
577 _VALID_URL = TVNowListBaseIE._SHOW_VALID_URL
578 _TESTS = [{
579 # annual navigationType
580 'url': 'https://www.tvnow.de/shows/grip-das-motormagazin-1669',
581 'info_dict': {
582 'id': '1669',
583 },
584 'playlist_mincount': 73,
585 }, {
586 # season navigationType
587 'url': 'https://www.tvnow.de/shows/armes-deutschland-11471',
588 'info_dict': {
589 'id': '11471',
590 },
591 'playlist_mincount': 3,
592 }]
593
594 @classmethod
595 def suitable(cls, url):
596 return (False if TVNowNewIE.suitable(url) or TVNowSeasonIE.suitable(url) or TVNowAnnualIE.suitable(url)
597 else super(TVNowShowIE, cls).suitable(url))
598
599 def _real_extract(self, url):
600 base_url, show_id = self._match_valid_url(url).groups()
601
602 result = self._call_api(
603 'teaserrow/format/navigation/' + show_id, show_id)
604
605 items = result['items']
606
607 entries = []
608 navigation = result.get('navigationType')
609 if navigation == 'annual':
610 for item in items:
611 if not isinstance(item, dict):
612 continue
613 year = int_or_none(item.get('year'))
614 if year is None:
615 continue
616 months = item.get('months')
617 if not isinstance(months, list):
618 continue
619 for month_dict in months:
620 if not isinstance(month_dict, dict) or not month_dict:
621 continue
622 month_number = int_or_none(list(month_dict.keys())[0])
623 if month_number is None:
624 continue
625 entries.append(self.url_result(
626 '%s/%04d-%02d' % (base_url, year, month_number),
627 ie=TVNowAnnualIE.ie_key()))
628 elif navigation == 'season':
629 for item in items:
630 if not isinstance(item, dict):
631 continue
632 season_number = int_or_none(item.get('season'))
633 if season_number is None:
634 continue
635 entries.append(self.url_result(
636 '%s/staffel-%d' % (base_url, season_number),
637 ie=TVNowSeasonIE.ie_key()))
638 else:
639 raise ExtractorError('Unknown navigationType')
640
641 return self.playlist_result(entries, show_id)