yt_dlp/extractor/zattoo.py

   1 import re
   2 from uuid import uuid4
   3
   4 from .common import InfoExtractor
   5 from ..compat import compat_HTTPError, compat_str
   6 from ..utils import (
   7     ExtractorError,
   8     int_or_none,
   9     join_nonempty,
  10     try_get,
  11     url_or_none,
  12     urlencode_postdata,
  13 )
  14
  15
  16 class ZattooPlatformBaseIE(InfoExtractor):
  17     _power_guide_hash = None
  18
  19     def _host_url(self):
  20         return 'https://%s' % (self._API_HOST if hasattr(self, '_API_HOST') else self._HOST)
  21
  22     def _real_initialize(self):
  23         if not self._power_guide_hash:
  24             self.raise_login_required('An account is needed to access this media', method='password')
  25
  26     def _perform_login(self, username, password):
  27         try:
  28             data = self._download_json(
  29                 '%s/zapi/v2/account/login' % self._host_url(), None, 'Logging in',
  30                 data=urlencode_postdata({
  31                     'login': username,
  32                     'password': password,
  33                     'remember': 'true',
  34                 }), headers={
  35                     'Referer': '%s/login' % self._host_url(),
  36                     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
  37                 })
  38         except ExtractorError as e:
  39             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
  40                 raise ExtractorError(
  41                     'Unable to login: incorrect username and/or password',
  42                     expected=True)
  43             raise
  44
  45         self._power_guide_hash = data['session']['power_guide_hash']
  46
  47     def _initialize_pre_login(self):
  48         session_token = self._download_json(
  49             f'{self._host_url()}/token.json', None, 'Downloading session token')['session_token']
  50
  51         # Will setup appropriate cookies
  52         self._request_webpage(
  53             '%s/zapi/v3/session/hello' % self._host_url(), None,
  54             'Opening session', data=urlencode_postdata({
  55                 'uuid': compat_str(uuid4()),
  56                 'lang': 'en',
  57                 'app_version': '1.8.2',
  58                 'format': 'json',
  59                 'client_app_token': session_token,
  60             }))
  61
  62     def _extract_video_id_from_recording(self, recid):
  63         playlist = self._download_json(
  64             f'{self._host_url()}/zapi/v2/playlist', recid, 'Downloading playlist')
  65         try:
  66             return next(
  67                 str(item['program_id']) for item in playlist['recordings']
  68                 if item.get('program_id') and str(item.get('id')) == recid)
  69         except (StopIteration, KeyError):
  70             raise ExtractorError('Could not extract video id from recording')
  71
  72     def _extract_cid(self, video_id, channel_name):
  73         channel_groups = self._download_json(
  74             '%s/zapi/v2/cached/channels/%s' % (self._host_url(),
  75                                                self._power_guide_hash),
  76             video_id, 'Downloading channel list',
  77             query={'details': False})['channel_groups']
  78         channel_list = []
  79         for chgrp in channel_groups:
  80             channel_list.extend(chgrp['channels'])
  81         try:
  82             return next(
  83                 chan['cid'] for chan in channel_list
  84                 if chan.get('cid') and (
  85                     chan.get('display_alias') == channel_name
  86                     or chan.get('cid') == channel_name))
  87         except StopIteration:
  88             raise ExtractorError('Could not extract channel id')
  89
  90     def _extract_cid_and_video_info(self, video_id):
  91         data = self._download_json(
  92             '%s/zapi/v2/cached/program/power_details/%s' % (
  93                 self._host_url(), self._power_guide_hash),
  94             video_id,
  95             'Downloading video information',
  96             query={
  97                 'program_ids': video_id,
  98                 'complete': True,
  99             })
 100
 101         p = data['programs'][0]
 102         cid = p['cid']
 103
 104         info_dict = {
 105             'id': video_id,
 106             'title': p.get('t') or p['et'],
 107             'description': p.get('d'),
 108             'thumbnail': p.get('i_url'),
 109             'creator': p.get('channel_name'),
 110             'episode': p.get('et'),
 111             'episode_number': int_or_none(p.get('e_no')),
 112             'season_number': int_or_none(p.get('s_no')),
 113             'release_year': int_or_none(p.get('year')),
 114             'categories': try_get(p, lambda x: x['c'], list),
 115             'tags': try_get(p, lambda x: x['g'], list)
 116         }
 117
 118         return cid, info_dict
 119
 120     def _extract_ondemand_info(self, ondemand_id):
 121         """
 122         @returns    (ondemand_token, ondemand_type, info_dict)
 123         """
 124         data = self._download_json(
 125             '%s/zapi/vod/movies/%s' % (self._host_url(), ondemand_id),
 126             ondemand_id, 'Downloading ondemand information')
 127         info_dict = {
 128             'id': ondemand_id,
 129             'title': data.get('title'),
 130             'description': data.get('description'),
 131             'duration': int_or_none(data.get('duration')),
 132             'release_year': int_or_none(data.get('year')),
 133             'episode_number': int_or_none(data.get('episode_number')),
 134             'season_number': int_or_none(data.get('season_number')),
 135             'categories': try_get(data, lambda x: x['categories'], list),
 136         }
 137         return data['terms_catalog'][0]['terms'][0]['token'], data['type'], info_dict
 138
 139     def _extract_formats(self, cid, video_id, record_id=None, ondemand_id=None, ondemand_termtoken=None, ondemand_type=None, is_live=False):
 140         postdata_common = {
 141             'https_watch_urls': True,
 142         }
 143
 144         if is_live:
 145             postdata_common.update({'timeshift': 10800})
 146             url = '%s/zapi/watch/live/%s' % (self._host_url(), cid)
 147         elif record_id:
 148             url = '%s/zapi/watch/recording/%s' % (self._host_url(), record_id)
 149         elif ondemand_id:
 150             postdata_common.update({
 151                 'teasable_id': ondemand_id,
 152                 'term_token': ondemand_termtoken,
 153                 'teasable_type': ondemand_type
 154             })
 155             url = '%s/zapi/watch/vod/video' % self._host_url()
 156         else:
 157             url = '%s/zapi/v3/watch/replay/%s/%s' % (self._host_url(), cid, video_id)
 158         formats = []
 159         subtitles = {}
 160         for stream_type in ('dash', 'hls7'):
 161             postdata = postdata_common.copy()
 162             postdata['stream_type'] = stream_type
 163
 164             data = self._download_json(
 165                 url, video_id, 'Downloading %s formats' % stream_type.upper(),
 166                 data=urlencode_postdata(postdata), fatal=False)
 167             if not data:
 168                 continue
 169
 170             watch_urls = try_get(
 171                 data, lambda x: x['stream']['watch_urls'], list)
 172             if not watch_urls:
 173                 continue
 174
 175             for watch in watch_urls:
 176                 if not isinstance(watch, dict):
 177                     continue
 178                 watch_url = url_or_none(watch.get('url'))
 179                 if not watch_url:
 180                     continue
 181                 audio_channel = watch.get('audio_channel')
 182                 preference = 1 if audio_channel == 'A' else None
 183                 format_id = join_nonempty(stream_type, watch.get('maxrate'), audio_channel)
 184                 if stream_type.startswith('dash'):
 185                     this_formats, subs = self._extract_mpd_formats_and_subtitles(
 186                         watch_url, video_id, mpd_id=format_id, fatal=False)
 187                     self._merge_subtitles(subs, target=subtitles)
 188                 elif stream_type.startswith('hls'):
 189                     this_formats, subs = self._extract_m3u8_formats_and_subtitles(
 190                         watch_url, video_id, 'mp4',
 191                         entry_protocol='m3u8_native', m3u8_id=format_id,
 192                         fatal=False)
 193                     self._merge_subtitles(subs, target=subtitles)
 194                 elif stream_type == 'hds':
 195                     this_formats = self._extract_f4m_formats(
 196                         watch_url, video_id, f4m_id=format_id, fatal=False)
 197                 elif stream_type == 'smooth_playready':
 198                     this_formats = self._extract_ism_formats(
 199                         watch_url, video_id, ism_id=format_id, fatal=False)
 200                 else:
 201                     assert False
 202                 for this_format in this_formats:
 203                     this_format['quality'] = preference
 204                 formats.extend(this_formats)
 205         self._sort_formats(formats)
 206         return formats, subtitles
 207
 208     def _extract_video(self, video_id, record_id=None):
 209         cid, info_dict = self._extract_cid_and_video_info(video_id)
 210         info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
 211         return info_dict
 212
 213     def _extract_live(self, channel_name):
 214         cid = self._extract_cid(channel_name, channel_name)
 215         formats, subtitles = self._extract_formats(cid, cid, is_live=True)
 216         return {
 217             'id': channel_name,
 218             'title': channel_name,
 219             'is_live': True,
 220             'formats': formats,
 221             'subtitles': subtitles
 222         }
 223
 224     def _extract_record(self, record_id):
 225         video_id = self._extract_video_id_from_recording(record_id)
 226         cid, info_dict = self._extract_cid_and_video_info(video_id)
 227         info_dict['formats'], info_dict['subtitles'] = self._extract_formats(cid, video_id, record_id=record_id)
 228         return info_dict
 229
 230     def _extract_ondemand(self, ondemand_id):
 231         ondemand_termtoken, ondemand_type, info_dict = self._extract_ondemand_info(ondemand_id)
 232         info_dict['formats'], info_dict['subtitles'] = self._extract_formats(
 233             None, ondemand_id, ondemand_id=ondemand_id,
 234             ondemand_termtoken=ondemand_termtoken, ondemand_type=ondemand_type)
 235         return info_dict
 236
 237     def _real_extract(self, url):
 238         video_id, record_id = self._match_valid_url(url).groups()
 239         return self._extract_video(video_id, record_id)
 240
 241
 242 def _make_valid_url(host):
 243     return rf'https?://(?:www\.)?{re.escape(host)}/watch/[^/]+?/(?P<id>[0-9]+)[^/]+(?:/(?P<recid>[0-9]+))?'
 244
 245
 246 class ZattooBaseIE(ZattooPlatformBaseIE):
 247     _NETRC_MACHINE = 'zattoo'
 248     _HOST = 'zattoo.com'
 249
 250     @staticmethod
 251     def _create_valid_url(match, qs, base_re=None):
 252         match_base = fr'|{base_re}/(?P<vid1>{match})' if base_re else '(?P<vid1>)'
 253         return rf'''(?x)https?://(?:www\.)?zattoo\.com/(?:
 254             [^?#]+\?(?:[^#]+&)?{qs}=(?P<vid2>{match})
 255             {match_base}
 256         )'''
 257
 258     def _real_extract(self, url):
 259         vid1, vid2 = self._match_valid_url(url).group('vid1', 'vid2')
 260         return getattr(self, f'_extract_{self._TYPE}')(vid1 or vid2)
 261
 262
 263 class ZattooIE(ZattooBaseIE):
 264     _VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'program', '(?:program|watch)/[^/]+')
 265     _TYPE = 'video'
 266     _TESTS = [{
 267         'url': 'https://zattoo.com/program/zdf/250170418',
 268         'info_dict': {
 269             'id': '250170418',
 270             'ext': 'mp4',
 271             'title': 'Markus Lanz',
 272             'description': 'md5:e41cb1257de008ca62a73bb876ffa7fc',
 273             'thumbnail': 're:http://images.zattic.com/cms/.+/format_480x360.jpg',
 274             'creator': 'ZDF HD',
 275             'release_year': 2022,
 276             'episode': 'Folge 1655',
 277             'categories': 'count:1',
 278             'tags': 'count:2'
 279         },
 280         'params': {'skip_download': 'm3u8'}
 281     }, {
 282         'url': 'https://zattoo.com/program/daserste/210177916',
 283         'only_matching': True,
 284     }, {
 285         'url': 'https://zattoo.com/guide/german?channel=srf1&program=169860555',
 286         'only_matching': True,
 287     }]
 288
 289
 290 class ZattooLiveIE(ZattooBaseIE):
 291     _VALID_URL = ZattooBaseIE._create_valid_url(r'[^/?&#]+', 'channel', 'live')
 292     _TYPE = 'live'
 293     _TESTS = [{
 294         'url': 'https://zattoo.com/channels/german?channel=srf_zwei',
 295         'only_matching': True,
 296     }, {
 297         'url': 'https://zattoo.com/live/srf1',
 298         'only_matching': True,
 299     }]
 300
 301     @classmethod
 302     def suitable(cls, url):
 303         return False if ZattooIE.suitable(url) else super().suitable(url)
 304
 305
 306 class ZattooMoviesIE(ZattooBaseIE):
 307     _VALID_URL = ZattooBaseIE._create_valid_url(r'\w+', 'movie_id', 'vod/movies')
 308     _TYPE = 'ondemand'
 309     _TESTS = [{
 310         'url': 'https://zattoo.com/vod/movies/7521',
 311         'only_matching': True,
 312     }, {
 313         'url': 'https://zattoo.com/ondemand?movie_id=7521&term_token=9f00f43183269484edde',
 314         'only_matching': True,
 315     }]
 316
 317
 318 class ZattooRecordingsIE(ZattooBaseIE):
 319     _VALID_URL = ZattooBaseIE._create_valid_url(r'\d+', 'recording')
 320     _TYPE = 'record'
 321     _TESTS = [{
 322         'url': 'https://zattoo.com/recordings?recording=193615508',
 323         'only_matching': True,
 324     }, {
 325         'url': 'https://zattoo.com/tc/ptc_recordings_all_recordings?recording=193615420',
 326         'only_matching': True,
 327     }]
 328
 329
 330 class NetPlusIE(ZattooPlatformBaseIE):
 331     _NETRC_MACHINE = 'netplus'
 332     _HOST = 'netplus.tv'
 333     _API_HOST = 'www.%s' % _HOST
 334     _VALID_URL = _make_valid_url(_HOST)
 335
 336     _TESTS = [{
 337         'url': 'https://www.netplus.tv/watch/abc/123-abc',
 338         'only_matching': True,
 339     }]
 340
 341
 342 class MNetTVIE(ZattooPlatformBaseIE):
 343     _NETRC_MACHINE = 'mnettv'
 344     _HOST = 'tvplus.m-net.de'
 345     _VALID_URL = _make_valid_url(_HOST)
 346
 347     _TESTS = [{
 348         'url': 'https://tvplus.m-net.de/watch/abc/123-abc',
 349         'only_matching': True,
 350     }]
 351
 352
 353 class WalyTVIE(ZattooPlatformBaseIE):
 354     _NETRC_MACHINE = 'walytv'
 355     _HOST = 'player.waly.tv'
 356     _VALID_URL = _make_valid_url(_HOST)
 357
 358     _TESTS = [{
 359         'url': 'https://player.waly.tv/watch/abc/123-abc',
 360         'only_matching': True,
 361     }]
 362
 363
 364 class BBVTVIE(ZattooPlatformBaseIE):
 365     _NETRC_MACHINE = 'bbvtv'
 366     _HOST = 'bbv-tv.net'
 367     _API_HOST = 'www.%s' % _HOST
 368     _VALID_URL = _make_valid_url(_HOST)
 369
 370     _TESTS = [{
 371         'url': 'https://www.bbv-tv.net/watch/abc/123-abc',
 372         'only_matching': True,
 373     }]
 374
 375
 376 class VTXTVIE(ZattooPlatformBaseIE):
 377     _NETRC_MACHINE = 'vtxtv'
 378     _HOST = 'vtxtv.ch'
 379     _API_HOST = 'www.%s' % _HOST
 380     _VALID_URL = _make_valid_url(_HOST)
 381
 382     _TESTS = [{
 383         'url': 'https://www.vtxtv.ch/watch/abc/123-abc',
 384         'only_matching': True,
 385     }]
 386
 387
 388 class GlattvisionTVIE(ZattooPlatformBaseIE):
 389     _NETRC_MACHINE = 'glattvisiontv'
 390     _HOST = 'iptv.glattvision.ch'
 391     _VALID_URL = _make_valid_url(_HOST)
 392
 393     _TESTS = [{
 394         'url': 'https://iptv.glattvision.ch/watch/abc/123-abc',
 395         'only_matching': True,
 396     }]
 397
 398
 399 class SAKTVIE(ZattooPlatformBaseIE):
 400     _NETRC_MACHINE = 'saktv'
 401     _HOST = 'saktv.ch'
 402     _API_HOST = 'www.%s' % _HOST
 403     _VALID_URL = _make_valid_url(_HOST)
 404
 405     _TESTS = [{
 406         'url': 'https://www.saktv.ch/watch/abc/123-abc',
 407         'only_matching': True,
 408     }]
 409
 410
 411 class EWETVIE(ZattooPlatformBaseIE):
 412     _NETRC_MACHINE = 'ewetv'
 413     _HOST = 'tvonline.ewe.de'
 414     _VALID_URL = _make_valid_url(_HOST)
 415
 416     _TESTS = [{
 417         'url': 'https://tvonline.ewe.de/watch/abc/123-abc',
 418         'only_matching': True,
 419     }]
 420
 421
 422 class QuantumTVIE(ZattooPlatformBaseIE):
 423     _NETRC_MACHINE = 'quantumtv'
 424     _HOST = 'quantum-tv.com'
 425     _API_HOST = 'www.%s' % _HOST
 426     _VALID_URL = _make_valid_url(_HOST)
 427
 428     _TESTS = [{
 429         'url': 'https://www.quantum-tv.com/watch/abc/123-abc',
 430         'only_matching': True,
 431     }]
 432
 433
 434 class OsnatelTVIE(ZattooPlatformBaseIE):
 435     _NETRC_MACHINE = 'osnateltv'
 436     _HOST = 'tvonline.osnatel.de'
 437     _VALID_URL = _make_valid_url(_HOST)
 438
 439     _TESTS = [{
 440         'url': 'https://tvonline.osnatel.de/watch/abc/123-abc',
 441         'only_matching': True,
 442     }]
 443
 444
 445 class EinsUndEinsTVIE(ZattooPlatformBaseIE):
 446     _NETRC_MACHINE = '1und1tv'
 447     _HOST = '1und1.tv'
 448     _API_HOST = 'www.%s' % _HOST
 449     _VALID_URL = _make_valid_url(_HOST)
 450
 451     _TESTS = [{
 452         'url': 'https://www.1und1.tv/watch/abc/123-abc',
 453         'only_matching': True,
 454     }]
 455
 456
 457 class SaltTVIE(ZattooPlatformBaseIE):
 458     _NETRC_MACHINE = 'salttv'
 459     _HOST = 'tv.salt.ch'
 460     _VALID_URL = _make_valid_url(_HOST)
 461
 462     _TESTS = [{
 463         'url': 'https://tv.salt.ch/watch/abc/123-abc',
 464         'only_matching': True,
 465     }]