]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/zattoo.py
[spotify] Detect iframe embeds (#3430)
[yt-dlp.git] / yt_dlp / extractor / zattoo.py
CommitLineData
4a733545 1import re
67ca1a8e 2from uuid import uuid4
4a733545
AS
3
4from .common import InfoExtractor
67ca1a8e
S
5from ..compat import (
6 compat_HTTPError,
4a733545 7 compat_str,
67ca1a8e
S
8)
9from ..utils import (
4a733545 10 ExtractorError,
67ca1a8e 11 int_or_none,
34921b43 12 join_nonempty,
67ca1a8e 13 try_get,
3052a30d 14 url_or_none,
4a733545
AS
15 urlencode_postdata,
16)
17
18
f6d7f7b4 19class ZattooPlatformBaseIE(InfoExtractor):
4a733545
AS
20 _power_guide_hash = None
21
f6d7f7b4 22 def _host_url(self):
16d896b2 23 return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
f6d7f7b4 24
52efa4b3 25 def _real_initialize(self):
26 if not self._power_guide_hash:
27 self.raise_login_required('An account is needed to access this media', method='password')
67ca1a8e 28
52efa4b3 29 def _perform_login(self, username, password):
67ca1a8e
S
30 try:
31 data = self._download_json(
f6d7f7b4 32 '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in',
67ca1a8e
S
33 data=urlencode_postdata({
34 'login': username,
35 'password': password,
36 'remember': 'true',
37 }), headers={
f6d7f7b4 38 'Referer': '%s/login' % self._host_url(),
67ca1a8e
S
39 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
40 })
41 except ExtractorError as e:
42 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
43 raise ExtractorError(
44 'Unable to login: incorrect username and/or password',
45 expected=True)
46 raise
47
48 self._power_guide_hash = data['session']['power_guide_hash']
49
52efa4b3 50 def _initialize_pre_login(self):
9b8b7a7b
AS
51 session_token = self._download_json(
52 f'{self._host_url()}/token.json', None, 'Downloading session token')['session_token']
67ca1a8e
S
53
54 # Will setup appropriate cookies
55 self._request_webpage(
9b8b7a7b 56 '%s/zapi/v3/session/hello' % self._host_url(), None,
67ca1a8e 57 'Opening session', data=urlencode_postdata({
67ca1a8e
S
58 'uuid': compat_str(uuid4()),
59 'lang': 'en',
9b8b7a7b 60 'app_version': '1.8.2',
67ca1a8e 61 'format': 'json',
9b8b7a7b 62 'client_app_token': session_token,
67ca1a8e 63 }))
4a733545 64
9b8b7a7b
AS
65 def _extract_video_id_from_recording(self, recid):
66 playlist = self._download_json(
67 f'{self._host_url()}/zapi/v2/playlist', recid, 'Downloading playlist')
68 try:
69 return next(
70 str(item['program_id']) for item in playlist['recordings']
71 if item.get('program_id') and str(item.get('id')) == recid)
72 except (StopIteration, KeyError):
73 raise ExtractorError('Could not extract video id from recording')
74
4a733545
AS
75 def _extract_cid(self, video_id, channel_name):
76 channel_groups = self._download_json(
f6d7f7b4 77 '%s/zapi/v2/cached/channels/%s' % (self._host_url(),
4a733545 78 self._power_guide_hash),
67ca1a8e 79 video_id, 'Downloading channel list',
4a733545
AS
80 query={'details': False})['channel_groups']
81 channel_list = []
82 for chgrp in channel_groups:
83 channel_list.extend(chgrp['channels'])
84 try:
85 return next(
86 chan['cid'] for chan in channel_list
67ca1a8e 87 if chan.get('cid') and (
3089bc74
S
88 chan.get('display_alias') == channel_name
89 or chan.get('cid') == channel_name))
4a733545
AS
90 except StopIteration:
91 raise ExtractorError('Could not extract channel id')
92
93 def _extract_cid_and_video_info(self, video_id):
94 data = self._download_json(
21160a17 95 '%s/zapi/v2/cached/program/power_details/%s' % (
f6d7f7b4 96 self._host_url(), self._power_guide_hash),
4a733545
AS
97 video_id,
98 'Downloading video information',
99 query={
21160a17
AS
100 'program_ids': video_id,
101 'complete': True,
4a733545
AS
102 })
103
21160a17 104 p = data['programs'][0]
67ca1a8e
S
105 cid = p['cid']
106
4a733545
AS
107 info_dict = {
108 'id': video_id,
21160a17
AS
109 'title': p.get('t') or p['et'],
110 'description': p.get('d'),
111 'thumbnail': p.get('i_url'),
67ca1a8e 112 'creator': p.get('channel_name'),
21160a17
AS
113 'episode': p.get('et'),
114 'episode_number': int_or_none(p.get('e_no')),
115 'season_number': int_or_none(p.get('s_no')),
67ca1a8e 116 'release_year': int_or_none(p.get('year')),
21160a17
AS
117 'categories': try_get(p, lambda x: x['c'], list),
118 'tags': try_get(p, lambda x: x['g'], list)
4a733545 119 }
67ca1a8e 120
4a733545
AS
121 return cid, info_dict
122
9b8b7a7b
AS
123 def _extract_ondemand_info(self, ondemand_id):
124 """
125 @returns (ondemand_token, ondemand_type, info_dict)
126 """
127 data = self._download_json(
128 '%s/zapi/vod/movies/%s' % (self._host_url(), ondemand_id),
129 ondemand_id, 'Downloading ondemand information')
130 info_dict = {
131 'id': ondemand_id,
132 'title': data.get('title'),
133 'description': data.get('description'),
134 'duration': int_or_none(data.get('duration')),
135 'release_year': int_or_none(data.get('year')),
136 'episode_number': int_or_none(data.get('episode_number')),
137 'season_number': int_or_none(data.get('season_number')),
138 'categories': try_get(data, lambda x: x['categories'], list),
139 }
140 return data['terms_catalog'][0]['terms'][0]['token'], data['type'], info_dict
141
142 def _extract_formats(self, cid, video_id, record_id=None, ondemand_id=None, ondemand_termtoken=None, ondemand_type=None, is_live=False):
67ca1a8e 143 postdata_common = {
4a733545
AS
144 'https_watch_urls': True,
145 }
4a733545
AS
146
147 if is_live:
67ca1a8e 148 postdata_common.update({'timeshift': 10800})
f6d7f7b4 149 url = '%s/zapi/watch/live/%s' % (self._host_url(), cid)
67ca1a8e 150 elif record_id:
f6d7f7b4 151 url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id)
9b8b7a7b
AS
152 elif ondemand_id:
153 postdata_common.update({
154 'teasable_id': ondemand_id,
155 'term_token': ondemand_termtoken,
156 'teasable_type': ondemand_type
157 })
158 url = '%s/zapi/watch/vod/video' % self._host_url()
67ca1a8e 159 else:
9b8b7a7b 160 url = '%s/zapi/v3/watch/replay/%s/%s' % (self._host_url(), cid, video_id)
4a733545 161 formats = []
9b8b7a7b
AS
162 subtitles = {}
163 for stream_type in ('dash', 'hls7'):
67ca1a8e
S
164 postdata = postdata_common.copy()
165 postdata['stream_type'] = stream_type
166
167 data = self._download_json(
168 url, video_id, 'Downloading %s formats' % stream_type.upper(),
169 data=urlencode_postdata(postdata), fatal=False)
170 if not data:
171 continue
172
173 watch_urls = try_get(
174 data, lambda x: x['stream']['watch_urls'], list)
175 if not watch_urls:
176 continue
177
178 for watch in watch_urls:
179 if not isinstance(watch, dict):
180 continue
3052a30d
S
181 watch_url = url_or_none(watch.get('url'))
182 if not watch_url:
67ca1a8e 183 continue
67ca1a8e 184 audio_channel = watch.get('audio_channel')
67ca1a8e 185 preference = 1 if audio_channel == 'A' else None
34921b43 186 format_id = join_nonempty(stream_type, watch.get('maxrate'), audio_channel)
9b8b7a7b
AS
187 if stream_type.startswith('dash'):
188 this_formats, subs = self._extract_mpd_formats_and_subtitles(
67ca1a8e 189 watch_url, video_id, mpd_id=format_id, fatal=False)
9b8b7a7b
AS
190 self._merge_subtitles(subs, target=subtitles)
191 elif stream_type.startswith('hls'):
192 this_formats, subs = self._extract_m3u8_formats_and_subtitles(
67ca1a8e
S
193 watch_url, video_id, 'mp4',
194 entry_protocol='m3u8_native', m3u8_id=format_id,
195 fatal=False)
9b8b7a7b 196 self._merge_subtitles(subs, target=subtitles)
67ca1a8e
S
197 elif stream_type == 'hds':
198 this_formats = self._extract_f4m_formats(
199 watch_url, video_id, f4m_id=format_id, fatal=False)
200 elif stream_type == 'smooth_playready':
201 this_formats = self._extract_ism_formats(
202 watch_url, video_id, ism_id=format_id, fatal=False)
203 else:
204 assert False
205 for this_format in this_formats:
f983b875 206 this_format['quality'] = preference
67ca1a8e 207 formats.extend(this_formats)
4a733545 208 self._sort_formats(formats)
9b8b7a7b 209 return formats, subtitles
4a733545 210
9b8b7a7b
AS
211 def _extract_video(self, video_id, record_id=None):
212 cid, info_dict = self._extract_cid_and_video_info(video_id)
213 info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
4a733545
AS
214 return info_dict
215
9b8b7a7b
AS
216 def _extract_live(self, channel_name):
217 cid = self._extract_cid(channel_name, channel_name)
218 formats, subtitles = self._extract_formats(cid, cid, is_live=True)
219 return {
220 'id': channel_name,
221 'title': channel_name,
222 'is_live': True,
223 'format': formats,
224 'subtitles': subtitles
225 }
4a733545 226
9b8b7a7b
AS
227 def _extract_record(self, record_id):
228 video_id = self._extract_video_id_from_recording(record_id)
229 cid, info_dict = self._extract_cid_and_video_info(video_id)
230 info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
231 return info_dict
67ca1a8e 232
9b8b7a7b
AS
233 def _extract_ondemand(self, ondemand_id):
234 ondemand_termtoken, ondemand_type, info_dict = self._extract_ondemand_info(ondemand_id)
235 info_dict['formats'], info_dict['subtitles'] = self._extract_formats(
236 None, ondemand_id, ondemand_id=ondemand_id,
237 ondemand_termtoken=ondemand_termtoken, ondemand_type=ondemand_type)
238 return info_dict
67ca1a8e 239
4a733545 240
9b8b7a7b
AS
241def _make_valid_url(host):
242 return rf'https?://(?:www\.)?{re.escape(host)}/watch/[^/]+?/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
4a733545
AS
243
244
f6d7f7b4
S
245class ZattooBaseIE(ZattooPlatformBaseIE):
246 _NETRC_MACHINE = 'zattoo'
247 _HOST = 'zattoo.com'
248
9b8b7a7b
AS
249 @staticmethod
250 def _create_valid_url(match, qs, base_re=None):
251 match_base = fr'|{base_re}/(?P<vid1>{match})' if base_re else '(?P<vid1>)'
252 return rf'''(?x)https?://(?:www\.)?zattoo\.com/(?:
253 [^?#]+\?(?:[^#]+&)?{qs}=(?P<vid2>{match})
254 {match_base}
255 )'''
f6d7f7b4 256
9b8b7a7b
AS
257 def _real_extract(self, url):
258 vid1, vid2 = self._match_valid_url(url).group('vid1', 'vid2')
259 return getattr(self, f'_extract_{self._TYPE}')(vid1 or vid2)
f6d7f7b4
S
260
261
4a733545 262class ZattooIE(ZattooBaseIE):
9b8b7a7b
AS
263 _VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'program', '(?:program|watch)/[^/]+')
264 _TYPE = 'video'
4a733545 265 _TESTS = [{
9b8b7a7b
AS
266 'url': 'https://zattoo.com/program/zdf/250170418',
267 'info_dict': {
268 'id': '250170418',
269 'ext': 'mp4',
270 'title': 'Markus Lanz',
271 'description': 'md5:e41cb1257de008ca62a73bb876ffa7fc',
272 'thumbnail': 're:http://images.zattic.com/cms/.+/format_480x360.jpg',
273 'creator': 'ZDF HD',
274 'release_year': 2022,
275 'episode': 'Folge 1655',
276 'categories': 'count:1',
277 'tags': 'count:2'
278 },
279 'params': {'skip_download': 'm3u8'}
280 }, {
281 'url': 'https://zattoo.com/program/daserste/210177916',
4a733545
AS
282 'only_matching': True,
283 }, {
9b8b7a7b 284 'url': 'https://zattoo.com/guide/german?channel=srf1&program=169860555',
4a733545
AS
285 'only_matching': True,
286 }]
287
4a733545
AS
288
289class ZattooLiveIE(ZattooBaseIE):
9b8b7a7b
AS
290 _VALID_URL = ZattooBaseIE._create_valid_url(r'[^/?&#]+', 'channel', 'live')
291 _TYPE = 'live'
292 _TESTS = [{
293 'url': 'https://zattoo.com/channels/german?channel=srf_zwei',
4a733545 294 'only_matching': True,
9b8b7a7b
AS
295 }, {
296 'url': 'https://zattoo.com/live/srf1',
297 'only_matching': True,
298 }]
4a733545 299
67ca1a8e
S
300 @classmethod
301 def suitable(cls, url):
9b8b7a7b 302 return False if ZattooIE.suitable(url) else super().suitable(url)
67ca1a8e 303
9b8b7a7b
AS
304
305class ZattooMoviesIE(ZattooBaseIE):
306 _VALID_URL = ZattooBaseIE._create_valid_url(r'\w+', 'movie_id', 'vod/movies')
307 _TYPE = 'ondemand'
308 _TESTS = [{
309 'url': 'https://zattoo.com/vod/movies/7521',
310 'only_matching': True,
311 }, {
312 'url': 'https://zattoo.com/ondemand?movie_id=7521&term_token=9f00f43183269484edde',
313 'only_matching': True,
314 }]
f6d7f7b4
S
315
316
9b8b7a7b
AS
317class ZattooRecordingsIE(ZattooBaseIE):
318 _VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'recording')
319 _TYPE = 'record'
320 _TESTS = [{
321 'url': 'https://zattoo.com/recordings?recording=193615508',
322 'only_matching': True,
323 }, {
324 'url': 'https://zattoo.com/tc/ptc_recordings_all_recordings?recording=193615420',
325 'only_matching': True,
326 }]
327
328
329class NetPlusIE(ZattooPlatformBaseIE):
f6d7f7b4
S
330 _NETRC_MACHINE = 'netplus'
331 _HOST = 'netplus.tv'
16d896b2 332 _API_HOST = 'www.%s' % _HOST
9b8b7a7b 333 _VALID_URL = _make_valid_url(_HOST)
f6d7f7b4
S
334
335 _TESTS = [{
336 'url': 'https://www.netplus.tv/watch/abc/123-abc',
337 'only_matching': True,
338 }]
339
340
9b8b7a7b 341class MNetTVIE(ZattooPlatformBaseIE):
f6d7f7b4
S
342 _NETRC_MACHINE = 'mnettv'
343 _HOST = 'tvplus.m-net.de'
9b8b7a7b 344 _VALID_URL = _make_valid_url(_HOST)
f6d7f7b4
S
345
346 _TESTS = [{
16d896b2 347 'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
f6d7f7b4
S
348 'only_matching': True,
349 }]
350
351
9b8b7a7b 352class WalyTVIE(ZattooPlatformBaseIE):
f6d7f7b4
S
353 _NETRC_MACHINE = 'walytv'
354 _HOST = 'player.waly.tv'
9b8b7a7b 355 _VALID_URL = _make_valid_url(_HOST)
f6d7f7b4
S
356
357 _TESTS = [{
16d896b2 358 'url': 'https://player.waly.tv/watch/abc/123-abc',
f6d7f7b4
S
359 'only_matching': True,
360 }]
361
362
9b8b7a7b 363class BBVTVIE(ZattooPlatformBaseIE):
f6d7f7b4
S
364 _NETRC_MACHINE = 'bbvtv'
365 _HOST = 'bbv-tv.net'
16d896b2 366 _API_HOST = 'www.%s' % _HOST
9b8b7a7b 367 _VALID_URL = _make_valid_url(_HOST)
f6d7f7b4
S
368
369 _TESTS = [{
370 'url': 'https://www.bbv-tv.net/watch/abc/123-abc',
371 'only_matching': True,
372 }]
373
374
9b8b7a7b 375class VTXTVIE(ZattooPlatformBaseIE):
f6d7f7b4
S
376 _NETRC_MACHINE = 'vtxtv'
377 _HOST = 'vtxtv.ch'
16d896b2 378 _API_HOST = 'www.%s' % _HOST
9b8b7a7b 379 _VALID_URL = _make_valid_url(_HOST)
f6d7f7b4
S
380
381 _TESTS = [{
382 'url': 'https://www.vtxtv.ch/watch/abc/123-abc',
383 'only_matching': True,
384 }]
385
386
9b8b7a7b 387class GlattvisionTVIE(ZattooPlatformBaseIE):
f6d7f7b4
S
388 _NETRC_MACHINE = 'glattvisiontv'
389 _HOST = 'iptv.glattvision.ch'
9b8b7a7b 390 _VALID_URL = _make_valid_url(_HOST)
f6d7f7b4
S
391
392 _TESTS = [{
16d896b2 393 'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
f6d7f7b4
S
394 'only_matching': True,
395 }]
396
397
9b8b7a7b 398class SAKTVIE(ZattooPlatformBaseIE):
f6d7f7b4
S
399 _NETRC_MACHINE = 'saktv'
400 _HOST = 'saktv.ch'
16d896b2 401 _API_HOST = 'www.%s' % _HOST
9b8b7a7b 402 _VALID_URL = _make_valid_url(_HOST)
f6d7f7b4
S
403
404 _TESTS = [{
405 'url': 'https://www.saktv.ch/watch/abc/123-abc',
406 'only_matching': True,
407 }]
408
409
9b8b7a7b 410class EWETVIE(ZattooPlatformBaseIE):
f6d7f7b4
S
411 _NETRC_MACHINE = 'ewetv'
412 _HOST = 'tvonline.ewe.de'
9b8b7a7b 413 _VALID_URL = _make_valid_url(_HOST)
f6d7f7b4
S
414
415 _TESTS = [{
16d896b2 416 'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
f6d7f7b4
S
417 'only_matching': True,
418 }]
419
420
9b8b7a7b 421class QuantumTVIE(ZattooPlatformBaseIE):
f6d7f7b4
S
422 _NETRC_MACHINE = 'quantumtv'
423 _HOST = 'quantum-tv.com'
16d896b2 424 _API_HOST = 'www.%s' % _HOST
9b8b7a7b 425 _VALID_URL = _make_valid_url(_HOST)
f6d7f7b4
S
426
427 _TESTS = [{
428 'url': 'https://www.quantum-tv.com/watch/abc/123-abc',
429 'only_matching': True,
430 }]
431
432
9b8b7a7b 433class OsnatelTVIE(ZattooPlatformBaseIE):
f6d7f7b4 434 _NETRC_MACHINE = 'osnateltv'
2004e221 435 _HOST = 'tvonline.osnatel.de'
9b8b7a7b 436 _VALID_URL = _make_valid_url(_HOST)
f6d7f7b4
S
437
438 _TESTS = [{
16d896b2 439 'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
f6d7f7b4
S
440 'only_matching': True,
441 }]
442
443
9b8b7a7b 444class EinsUndEinsTVIE(ZattooPlatformBaseIE):
f6d7f7b4
S
445 _NETRC_MACHINE = '1und1tv'
446 _HOST = '1und1.tv'
16d896b2 447 _API_HOST = 'www.%s' % _HOST
9b8b7a7b 448 _VALID_URL = _make_valid_url(_HOST)
f6d7f7b4
S
449
450 _TESTS = [{
451 'url': 'https://www.1und1.tv/watch/abc/123-abc',
452 'only_matching': True,
453 }]
a81daba2
AS
454
455
9b8b7a7b 456class SaltTVIE(ZattooPlatformBaseIE):
a81daba2
AS
457 _NETRC_MACHINE = 'salttv'
458 _HOST = 'tv.salt.ch'
9b8b7a7b 459 _VALID_URL = _make_valid_url(_HOST)
a81daba2
AS
460
461 _TESTS = [{
462 'url': 'https://tv.salt.ch/watch/abc/123-abc',
463 'only_matching': True,
464 }]