yt_dlp/extractor/zattoo.py

   1 import re
   2 from uuid import uuid4
   3
   4 from .common import InfoExtractor
   5 from ..compat import (
   6     compat_HTTPError,
   7     compat_str,
   8 )
   9 from ..utils import (
  10     ExtractorError,
  11     int_or_none,
  12     join_nonempty,
  13     try_get,
  14     url_or_none,
  15     urlencode_postdata,
  16 )
  17
  18
  19 class ZattooPlatformBaseIE(InfoExtractor):
  20     _power_guide_hash = None
  21
  22     def _host_url(self):
  23         return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
  24
  25     def _real_initialize(self):
  26         if not self._power_guide_hash:
  27             self.raise_login_required('An account is needed to access this media', method='password')
  28
  29     def _perform_login(self, username, password):
  30         try:
  31             data = self._download_json(
  32                 '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in',
  33                 data=urlencode_postdata({
  34                     'login': username,
  35                     'password': password,
  36                     'remember': 'true',
  37                 }), headers={
  38                     'Referer': '%s/login' % self._host_url(),
  39                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
  40                 })
  41         except ExtractorError as e:
  42             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
  43                 raise ExtractorError(
  44                     'Unable to login: incorrect username and/or password',
  45                     expected=True)
  46             raise
  47
  48         self._power_guide_hash = data['session']['power_guide_hash']
  49
  50     def _initialize_pre_login(self):
  51         session_token = self._download_json(
  52             f'{self._host_url()}/token.json', None, 'Downloading session token')['session_token']
  53
  54         # Will setup appropriate cookies
  55         self._request_webpage(
  56             '%s/zapi/v3/session/hello' % self._host_url(), None,
  57             'Opening session', data=urlencode_postdata({
  58                 'uuid': compat_str(uuid4()),
  59                 'lang': 'en',
  60                 'app_version': '1.8.2',
  61                 'format': 'json',
  62                 'client_app_token': session_token,
  63             }))
  64
  65     def _extract_video_id_from_recording(self, recid):
  66         playlist = self._download_json(
  67             f'{self._host_url()}/zapi/v2/playlist', recid, 'Downloading playlist')
  68         try:
  69             return next(
  70                 str(item['program_id']) for item in playlist['recordings']
  71                 if item.get('program_id') and str(item.get('id')) == recid)
  72         except (StopIteration, KeyError):
  73             raise ExtractorError('Could not extract video id from recording')
  74
  75     def _extract_cid(self, video_id, channel_name):
  76         channel_groups = self._download_json(
  77             '%s/zapi/v2/cached/channels/%s' % (self._host_url(),
  78                                                self._power_guide_hash),
  79             video_id, 'Downloading channel list',
  80             query={'details': False})['channel_groups']
  81         channel_list = []
  82         for chgrp in channel_groups:
  83             channel_list.extend(chgrp['channels'])
  84         try:
  85             return next(
  86                 chan['cid'] for chan in channel_list
  87                 if chan.get('cid') and (
  88                     chan.get('display_alias') == channel_name
  89                     or chan.get('cid') == channel_name))
  90         except StopIteration:
  91             raise ExtractorError('Could not extract channel id')
  92
  93     def _extract_cid_and_video_info(self, video_id):
  94         data = self._download_json(
  95             '%s/zapi/v2/cached/program/power_details/%s' % (
  96                 self._host_url(), self._power_guide_hash),
  97             video_id,
  98             'Downloading video information',
  99             query={
 100                 'program_ids': video_id,
 101                 'complete': True,
 102             })
 103
 104         p = data['programs'][0]
 105         cid = p['cid']
 106
 107         info_dict = {
 108             'id': video_id,
 109             'title': p.get('t') or p['et'],
 110             'description': p.get('d'),
 111             'thumbnail': p.get('i_url'),
 112             'creator': p.get('channel_name'),
 113             'episode': p.get('et'),
 114             'episode_number': int_or_none(p.get('e_no')),
 115             'season_number': int_or_none(p.get('s_no')),
 116             'release_year': int_or_none(p.get('year')),
 117             'categories': try_get(p, lambda x: x['c'], list),
 118             'tags': try_get(p, lambda x: x['g'], list)
 119         }
 120
 121         return cid, info_dict
 122
 123     def _extract_ondemand_info(self, ondemand_id):
 124         """
 125         @returns    (ondemand_token, ondemand_type, info_dict)
 126         """
 127         data = self._download_json(
 128             '%s/zapi/vod/movies/%s' % (self._host_url(), ondemand_id),
 129             ondemand_id, 'Downloading ondemand information')
 130         info_dict = {
 131             'id': ondemand_id,
 132             'title': data.get('title'),
 133             'description': data.get('description'),
 134             'duration': int_or_none(data.get('duration')),
 135             'release_year': int_or_none(data.get('year')),
 136             'episode_number': int_or_none(data.get('episode_number')),
 137             'season_number': int_or_none(data.get('season_number')),
 138             'categories': try_get(data, lambda x: x['categories'], list),
 139         }
 140         return data['terms_catalog'][0]['terms'][0]['token'], data['type'], info_dict
 141
 142     def _extract_formats(self, cid, video_id, record_id=None, ondemand_id=None, ondemand_termtoken=None, ondemand_type=None, is_live=False):
 143         postdata_common = {
 144             'https_watch_urls': True,
 145         }
 146
 147         if is_live:
 148             postdata_common.update({'timeshift': 10800})
 149             url = '%s/zapi/watch/live/%s' % (self._host_url(), cid)
 150         elif record_id:
 151             url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id)
 152         elif ondemand_id:
 153             postdata_common.update({
 154                 'teasable_id': ondemand_id,
 155                 'term_token': ondemand_termtoken,
 156                 'teasable_type': ondemand_type
 157             })
 158             url = '%s/zapi/watch/vod/video' % self._host_url()
 159         else:
 160             url = '%s/zapi/v3/watch/replay/%s/%s' % (self._host_url(), cid, video_id)
 161         formats = []
 162         subtitles = {}
 163         for stream_type in ('dash', 'hls7'):
 164             postdata = postdata_common.copy()
 165             postdata['stream_type'] = stream_type
 166
 167             data = self._download_json(
 168                 url, video_id, 'Downloading %s formats' % stream_type.upper(),
 169                 data=urlencode_postdata(postdata), fatal=False)
 170             if not data:
 171                 continue
 172
 173             watch_urls = try_get(
 174                 data, lambda x: x['stream']['watch_urls'], list)
 175             if not watch_urls:
 176                 continue
 177
 178             for watch in watch_urls:
 179                 if not isinstance(watch, dict):
 180                     continue
 181                 watch_url = url_or_none(watch.get('url'))
 182                 if not watch_url:
 183                     continue
 184                 audio_channel = watch.get('audio_channel')
 185                 preference = 1 if audio_channel == 'A' else None
 186                 format_id = join_nonempty(stream_type, watch.get('maxrate'), audio_channel)
 187                 if stream_type.startswith('dash'):
 188                     this_formats, subs = self._extract_mpd_formats_and_subtitles(
 189                         watch_url, video_id, mpd_id=format_id, fatal=False)
 190                     self._merge_subtitles(subs, target=subtitles)
 191                 elif stream_type.startswith('hls'):
 192                     this_formats, subs = self._extract_m3u8_formats_and_subtitles(
 193                         watch_url, video_id, 'mp4',
 194                         entry_protocol='m3u8_native', m3u8_id=format_id,
 195                         fatal=False)
 196                     self._merge_subtitles(subs, target=subtitles)
 197                 elif stream_type == 'hds':
 198                     this_formats = self._extract_f4m_formats(
 199                         watch_url, video_id, f4m_id=format_id, fatal=False)
 200                 elif stream_type == 'smooth_playready':
 201                     this_formats = self._extract_ism_formats(
 202                         watch_url, video_id, ism_id=format_id, fatal=False)
 203                 else:
 204                     assert False
 205                 for this_format in this_formats:
 206                     this_format['quality'] = preference
 207                 formats.extend(this_formats)
 208         self._sort_formats(formats)
 209         return formats, subtitles
 210
 211     def _extract_video(self, video_id, record_id=None):
 212         cid, info_dict = self._extract_cid_and_video_info(video_id)
 213         info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
 214         return info_dict
 215
 216     def _extract_live(self, channel_name):
 217         cid = self._extract_cid(channel_name, channel_name)
 218         formats, subtitles = self._extract_formats(cid, cid, is_live=True)
 219         return {
 220             'id': channel_name,
 221             'title': channel_name,
 222             'is_live': True,
 223             'formats': formats,
 224             'subtitles': subtitles
 225         }
 226
 227     def _extract_record(self, record_id):
 228         video_id = self._extract_video_id_from_recording(record_id)
 229         cid, info_dict = self._extract_cid_and_video_info(video_id)
 230         info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
 231         return info_dict
 232
 233     def _extract_ondemand(self, ondemand_id):
 234         ondemand_termtoken, ondemand_type, info_dict = self._extract_ondemand_info(ondemand_id)
 235         info_dict['formats'], info_dict['subtitles'] = self._extract_formats(
 236             None, ondemand_id, ondemand_id=ondemand_id,
 237             ondemand_termtoken=ondemand_termtoken, ondemand_type=ondemand_type)
 238         return info_dict
 239
 240     def _real_extract(self, url):
 241         vid1, vid2 = self._match_valid_url(url).group('vid1', 'vid2')
 242         return getattr(self, f'_extract_{self._TYPE}')(vid1 or vid2)
 243
 244
 245 def _make_valid_url(host):
 246     return rf'https?://(?:www\.)?{re.escape(host)}/watch/[^/]+?/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
 247
 248
 249 class ZattooBaseIE(ZattooPlatformBaseIE):
 250     _NETRC_MACHINE = 'zattoo'
 251     _HOST = 'zattoo.com'
 252
 253     @staticmethod
 254     def _create_valid_url(match, qs, base_re=None):
 255         match_base = fr'|{base_re}/(?P<vid1>{match})' if base_re else '(?P<vid1>)'
 256         return rf'''(?x)https?://(?:www\.)?zattoo\.com/(?:
 257             [^?#]+\?(?:[^#]+&)?{qs}=(?P<vid2>{match})
 258             {match_base}
 259         )'''
 260
 261
 262 class ZattooIE(ZattooBaseIE):
 263     _VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'program', '(?:program|watch)/[^/]+')
 264     _TYPE = 'video'
 265     _TESTS = [{
 266         'url': 'https://zattoo.com/program/zdf/250170418',
 267         'info_dict': {
 268             'id': '250170418',
 269             'ext': 'mp4',
 270             'title': 'Markus Lanz',
 271             'description': 'md5:e41cb1257de008ca62a73bb876ffa7fc',
 272             'thumbnail': 're:http://images.zattic.com/cms/.+/format_480x360.jpg',
 273             'creator': 'ZDF HD',
 274             'release_year': 2022,
 275             'episode': 'Folge 1655',
 276             'categories': 'count:1',
 277             'tags': 'count:2'
 278         },
 279         'params': {'skip_download': 'm3u8'}
 280     }, {
 281         'url': 'https://zattoo.com/program/daserste/210177916',
 282         'only_matching': True,
 283     }, {
 284         'url': 'https://zattoo.com/guide/german?channel=srf1&program=169860555',
 285         'only_matching': True,
 286     }]
 287
 288
 289 class ZattooLiveIE(ZattooBaseIE):
 290     _VALID_URL = ZattooBaseIE._create_valid_url(r'[^/?&#]+', 'channel', 'live')
 291     _TYPE = 'live'
 292     _TESTS = [{
 293         'url': 'https://zattoo.com/channels/german?channel=srf_zwei',
 294         'only_matching': True,
 295     }, {
 296         'url': 'https://zattoo.com/live/srf1',
 297         'only_matching': True,
 298     }]
 299
 300     @classmethod
 301     def suitable(cls, url):
 302         return False if ZattooIE.suitable(url) else super().suitable(url)
 303
 304
 305 class ZattooMoviesIE(ZattooBaseIE):
 306     _VALID_URL = ZattooBaseIE._create_valid_url(r'\w+', 'movie_id', 'vod/movies')
 307     _TYPE = 'ondemand'
 308     _TESTS = [{
 309         'url': 'https://zattoo.com/vod/movies/7521',
 310         'only_matching': True,
 311     }, {
 312         'url': 'https://zattoo.com/ondemand?movie_id=7521&term_token=9f00f43183269484edde',
 313         'only_matching': True,
 314     }]
 315
 316
 317 class ZattooRecordingsIE(ZattooBaseIE):
 318     _VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'recording')
 319     _TYPE = 'record'
 320     _TESTS = [{
 321         'url': 'https://zattoo.com/recordings?recording=193615508',
 322         'only_matching': True,
 323     }, {
 324         'url': 'https://zattoo.com/tc/ptc_recordings_all_recordings?recording=193615420',
 325         'only_matching': True,
 326     }]
 327
 328
 329 class NetPlusIE(ZattooPlatformBaseIE):
 330     _NETRC_MACHINE = 'netplus'
 331     _HOST = 'netplus.tv'
 332     _API_HOST = 'www.%s' % _HOST
 333     _VALID_URL = _make_valid_url(_HOST)
 334
 335     _TESTS = [{
 336         'url': 'https://www.netplus.tv/watch/abc/123-abc',
 337         'only_matching': True,
 338     }]
 339
 340
 341 class MNetTVIE(ZattooPlatformBaseIE):
 342     _NETRC_MACHINE = 'mnettv'
 343     _HOST = 'tvplus.m-net.de'
 344     _VALID_URL = _make_valid_url(_HOST)
 345
 346     _TESTS = [{
 347         'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
 348         'only_matching': True,
 349     }]
 350
 351
 352 class WalyTVIE(ZattooPlatformBaseIE):
 353     _NETRC_MACHINE = 'walytv'
 354     _HOST = 'player.waly.tv'
 355     _VALID_URL = _make_valid_url(_HOST)
 356
 357     _TESTS = [{
 358         'url': 'https://player.waly.tv/watch/abc/123-abc',
 359         'only_matching': True,
 360     }]
 361
 362
 363 class BBVTVIE(ZattooPlatformBaseIE):
 364     _NETRC_MACHINE = 'bbvtv'
 365     _HOST = 'bbv-tv.net'
 366     _API_HOST = 'www.%s' % _HOST
 367     _VALID_URL = _make_valid_url(_HOST)
 368
 369     _TESTS = [{
 370         'url': 'https://www.bbv-tv.net/watch/abc/123-abc',
 371         'only_matching': True,
 372     }]
 373
 374
 375 class VTXTVIE(ZattooPlatformBaseIE):
 376     _NETRC_MACHINE = 'vtxtv'
 377     _HOST = 'vtxtv.ch'
 378     _API_HOST = 'www.%s' % _HOST
 379     _VALID_URL = _make_valid_url(_HOST)
 380
 381     _TESTS = [{
 382         'url': 'https://www.vtxtv.ch/watch/abc/123-abc',
 383         'only_matching': True,
 384     }]
 385
 386
 387 class GlattvisionTVIE(ZattooPlatformBaseIE):
 388     _NETRC_MACHINE = 'glattvisiontv'
 389     _HOST = 'iptv.glattvision.ch'
 390     _VALID_URL = _make_valid_url(_HOST)
 391
 392     _TESTS = [{
 393         'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
 394         'only_matching': True,
 395     }]
 396
 397
 398 class SAKTVIE(ZattooPlatformBaseIE):
 399     _NETRC_MACHINE = 'saktv'
 400     _HOST = 'saktv.ch'
 401     _API_HOST = 'www.%s' % _HOST
 402     _VALID_URL = _make_valid_url(_HOST)
 403
 404     _TESTS = [{
 405         'url': 'https://www.saktv.ch/watch/abc/123-abc',
 406         'only_matching': True,
 407     }]
 408
 409
 410 class EWETVIE(ZattooPlatformBaseIE):
 411     _NETRC_MACHINE = 'ewetv'
 412     _HOST = 'tvonline.ewe.de'
 413     _VALID_URL = _make_valid_url(_HOST)
 414
 415     _TESTS = [{
 416         'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
 417         'only_matching': True,
 418     }]
 419
 420
 421 class QuantumTVIE(ZattooPlatformBaseIE):
 422     _NETRC_MACHINE = 'quantumtv'
 423     _HOST = 'quantum-tv.com'
 424     _API_HOST = 'www.%s' % _HOST
 425     _VALID_URL = _make_valid_url(_HOST)
 426
 427     _TESTS = [{
 428         'url': 'https://www.quantum-tv.com/watch/abc/123-abc',
 429         'only_matching': True,
 430     }]
 431
 432
 433 class OsnatelTVIE(ZattooPlatformBaseIE):
 434     _NETRC_MACHINE = 'osnateltv'
 435     _HOST = 'tvonline.osnatel.de'
 436     _VALID_URL = _make_valid_url(_HOST)
 437
 438     _TESTS = [{
 439         'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
 440         'only_matching': True,
 441     }]
 442
 443
 444 class EinsUndEinsTVIE(ZattooPlatformBaseIE):
 445     _NETRC_MACHINE = '1und1tv'
 446     _HOST = '1und1.tv'
 447     _API_HOST = 'www.%s' % _HOST
 448     _VALID_URL = _make_valid_url(_HOST)
 449
 450     _TESTS = [{
 451         'url': 'https://www.1und1.tv/watch/abc/123-abc',
 452         'only_matching': True,
 453     }]
 454
 455
 456 class SaltTVIE(ZattooPlatformBaseIE):
 457     _NETRC_MACHINE = 'salttv'
 458     _HOST = 'tv.salt.ch'
 459     _VALID_URL = _make_valid_url(_HOST)
 460
 461     _TESTS = [{
 462         'url': 'https://tv.salt.ch/watch/abc/123-abc',
 463         'only_matching': True,
 464     }]