yt_dlp/extractor/dplay.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import json
   5 import re
   6
   7 from .common import InfoExtractor
   8 from ..compat import compat_HTTPError
   9 from ..utils import (
  10     determine_ext,
  11     ExtractorError,
  12     float_or_none,
  13     int_or_none,
  14     strip_or_none,
  15     unified_timestamp,
  16 )
  17
  18
  19 class DPlayIE(InfoExtractor):
  20     _PATH_REGEX = r'/(?P<id>[^/]+/[^/?#]+)'
  21     _VALID_URL = r'''(?x)https?://
  22         (?P<domain>
  23             (?:www\.)?(?P<host>d
  24                 (?:
  25                     play\.(?P<country>dk|fi|jp|se|no)|
  26                     iscoveryplus\.(?P<plus_country>dk|es|fi|it|se|no)
  27                 )
  28             )|
  29             (?P<subdomain_country>es|it)\.dplay\.com
  30         )/[^/]+''' + _PATH_REGEX
  31
  32     _TESTS = [{
  33         # non geo restricted, via secure api, unsigned download hls URL
  34         'url': 'https://www.dplay.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
  35         'info_dict': {
  36             'id': '13628',
  37             'display_id': 'nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
  38             'ext': 'mp4',
  39             'title': 'Svensken lär sig njuta av livet',
  40             'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
  41             'duration': 2649.856,
  42             'timestamp': 1365453720,
  43             'upload_date': '20130408',
  44             'creator': 'Kanal 5',
  45             'series': 'Nugammalt - 77 händelser som format Sverige',
  46             'season_number': 1,
  47             'episode_number': 1,
  48         },
  49         'params': {
  50             'format': 'bestvideo',
  51             'skip_download': True,
  52         },
  53     }, {
  54         # geo restricted, via secure api, unsigned download hls URL
  55         'url': 'http://www.dplay.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
  56         'info_dict': {
  57             'id': '104465',
  58             'display_id': 'ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
  59             'ext': 'mp4',
  60             'title': 'Ted Bundy: Mind Of A Monster',
  61             'description': 'md5:8b780f6f18de4dae631668b8a9637995',
  62             'duration': 5290.027,
  63             'timestamp': 1570694400,
  64             'upload_date': '20191010',
  65             'creator': 'ID - Investigation Discovery',
  66             'series': 'Ted Bundy: Mind Of A Monster',
  67             'season_number': 1,
  68             'episode_number': 1,
  69         },
  70         'params': {
  71             'format': 'bestvideo',
  72             'skip_download': True,
  73         },
  74     }, {
  75         # disco-api
  76         'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
  77         'info_dict': {
  78             'id': '40206',
  79             'display_id': 'i-kongens-klr/sesong-1-episode-7',
  80             'ext': 'mp4',
  81             'title': 'Episode 7',
  82             'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
  83             'duration': 2611.16,
  84             'timestamp': 1516726800,
  85             'upload_date': '20180123',
  86             'series': 'I kongens klær',
  87             'season_number': 1,
  88             'episode_number': 7,
  89         },
  90         'params': {
  91             'format': 'bestvideo',
  92             'skip_download': True,
  93         },
  94         'skip': 'Available for Premium users',
  95     }, {
  96         'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
  97         'md5': '2b808ffb00fc47b884a172ca5d13053c',
  98         'info_dict': {
  99             'id': '6918',
 100             'display_id': 'biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
 101             'ext': 'mp4',
 102             'title': 'Luigi Di Maio: la psicosi di Stanislawskij',
 103             'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
 104             'thumbnail': r're:^https?://.*\.jpe?g',
 105             'upload_date': '20160524',
 106             'timestamp': 1464076800,
 107             'series': 'Biografie imbarazzanti',
 108             'season_number': 1,
 109             'episode': 'Episode 1',
 110             'episode_number': 1,
 111         },
 112     }, {
 113         'url': 'https://es.dplay.com/dmax/la-fiebre-del-oro/temporada-8-episodio-1/',
 114         'info_dict': {
 115             'id': '21652',
 116             'display_id': 'la-fiebre-del-oro/temporada-8-episodio-1',
 117             'ext': 'mp4',
 118             'title': 'Episodio 1',
 119             'description': 'md5:b9dcff2071086e003737485210675f69',
 120             'thumbnail': r're:^https?://.*\.png',
 121             'upload_date': '20180709',
 122             'timestamp': 1531173540,
 123             'series': 'La fiebre del oro',
 124             'season_number': 8,
 125             'episode': 'Episode 1',
 126             'episode_number': 1,
 127         },
 128         'params': {
 129             'skip_download': True,
 130         },
 131     }, {
 132         'url': 'https://www.dplay.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
 133         'only_matching': True,
 134     }, {
 135         'url': 'https://www.dplay.jp/video/gold-rush/24086',
 136         'only_matching': True,
 137     }, {
 138         'url': 'https://www.discoveryplus.se/videos/nugammalt-77-handelser-som-format-sverige/nugammalt-77-handelser-som-format-sverige-101',
 139         'only_matching': True,
 140     }, {
 141         'url': 'https://www.discoveryplus.dk/videoer/ted-bundy-mind-of-a-monster/ted-bundy-mind-of-a-monster',
 142         'only_matching': True,
 143     }, {
 144         'url': 'https://www.discoveryplus.no/videoer/i-kongens-klr/sesong-1-episode-7',
 145         'only_matching': True,
 146     }, {
 147         'url': 'https://www.discoveryplus.it/videos/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij',
 148         'only_matching': True,
 149     }, {
 150         'url': 'https://www.discoveryplus.es/videos/la-fiebre-del-oro/temporada-8-episodio-1',
 151         'only_matching': True,
 152     }, {
 153         'url': 'https://www.discoveryplus.fi/videot/shifting-gears-with-aaron-kaufman/episode-16',
 154         'only_matching': True,
 155     }]
 156
 157     def _process_errors(self, e, geo_countries):
 158         info = self._parse_json(e.cause.read().decode('utf-8'), None)
 159         error = info['errors'][0]
 160         error_code = error.get('code')
 161         if error_code == 'access.denied.geoblocked':
 162             self.raise_geo_restricted(countries=geo_countries)
 163         elif error_code in ('access.denied.missingpackage', 'invalid.token'):
 164             raise ExtractorError(
 165                 'This video is only available for registered users. You may want to use --cookies.', expected=True)
 166         raise ExtractorError(info['errors'][0]['detail'], expected=True)
 167
 168     def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
 169         headers['Authorization'] = 'Bearer ' + self._download_json(
 170             disco_base + 'token', display_id, 'Downloading token',
 171             query={
 172                 'realm': realm,
 173             })['data']['attributes']['token']
 174
 175     def _download_video_playback_info(self, disco_base, video_id, headers):
 176         streaming = self._download_json(
 177             disco_base + 'playback/videoPlaybackInfo/' + video_id,
 178             video_id, headers=headers)['data']['attributes']['streaming']
 179         streaming_list = []
 180         for format_id, format_dict in streaming.items():
 181             streaming_list.append({
 182                 'type': format_id,
 183                 'url': format_dict.get('url'),
 184             })
 185         return streaming_list
 186
 187     def _get_disco_api_info(self, url, display_id, disco_host, realm, country):
 188         geo_countries = [country.upper()]
 189         self._initialize_geo_bypass({
 190             'countries': geo_countries,
 191         })
 192         disco_base = 'https://%s/' % disco_host
 193         headers = {
 194             'Referer': url,
 195         }
 196         self._update_disco_api_headers(headers, disco_base, display_id, realm)
 197         try:
 198             video = self._download_json(
 199                 disco_base + 'content/videos/' + display_id, display_id,
 200                 headers=headers, query={
 201                     'fields[channel]': 'name',
 202                     'fields[image]': 'height,src,width',
 203                     'fields[show]': 'name',
 204                     'fields[tag]': 'name',
 205                     'fields[video]': 'description,episodeNumber,name,publishStart,seasonNumber,videoDuration',
 206                     'include': 'images,primaryChannel,show,tags'
 207                 })
 208         except ExtractorError as e:
 209             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
 210                 self._process_errors(e, geo_countries)
 211             raise
 212         video_id = video['data']['id']
 213         info = video['data']['attributes']
 214         title = info['name'].strip()
 215         formats = []
 216         try:
 217             streaming = self._download_video_playback_info(
 218                 disco_base, video_id, headers)
 219         except ExtractorError as e:
 220             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
 221                 self._process_errors(e, geo_countries)
 222             raise
 223         for format_dict in streaming:
 224             if not isinstance(format_dict, dict):
 225                 continue
 226             format_url = format_dict.get('url')
 227             if not format_url:
 228                 continue
 229             format_id = format_dict.get('type')
 230             ext = determine_ext(format_url)
 231             if format_id == 'dash' or ext == 'mpd':
 232                 formats.extend(self._extract_mpd_formats(
 233                     format_url, display_id, mpd_id='dash', fatal=False))
 234             elif format_id == 'hls' or ext == 'm3u8':
 235                 formats.extend(self._extract_m3u8_formats(
 236                     format_url, display_id, 'mp4',
 237                     entry_protocol='m3u8_native', m3u8_id='hls',
 238                     fatal=False))
 239             else:
 240                 formats.append({
 241                     'url': format_url,
 242                     'format_id': format_id,
 243                 })
 244         self._sort_formats(formats)
 245
 246         creator = series = None
 247         tags = []
 248         thumbnails = []
 249         included = video.get('included') or []
 250         if isinstance(included, list):
 251             for e in included:
 252                 attributes = e.get('attributes')
 253                 if not attributes:
 254                     continue
 255                 e_type = e.get('type')
 256                 if e_type == 'channel':
 257                     creator = attributes.get('name')
 258                 elif e_type == 'image':
 259                     src = attributes.get('src')
 260                     if src:
 261                         thumbnails.append({
 262                             'url': src,
 263                             'width': int_or_none(attributes.get('width')),
 264                             'height': int_or_none(attributes.get('height')),
 265                         })
 266                 if e_type == 'show':
 267                     series = attributes.get('name')
 268                 elif e_type == 'tag':
 269                     name = attributes.get('name')
 270                     if name:
 271                         tags.append(name)
 272
 273         return {
 274             'id': video_id,
 275             'display_id': display_id,
 276             'title': title,
 277             'description': strip_or_none(info.get('description')),
 278             'duration': float_or_none(info.get('videoDuration'), 1000),
 279             'timestamp': unified_timestamp(info.get('publishStart')),
 280             'series': series,
 281             'season_number': int_or_none(info.get('seasonNumber')),
 282             'episode_number': int_or_none(info.get('episodeNumber')),
 283             'creator': creator,
 284             'tags': tags,
 285             'thumbnails': thumbnails,
 286             'formats': formats,
 287         }
 288
 289     def _real_extract(self, url):
 290         mobj = re.match(self._VALID_URL, url)
 291         display_id = mobj.group('id')
 292         domain = mobj.group('domain').lstrip('www.')
 293         country = mobj.group('country') or mobj.group('subdomain_country') or mobj.group('plus_country')
 294         host = 'disco-api.' + domain if domain[0] == 'd' else 'eu2-prod.disco-api.com'
 295         return self._get_disco_api_info(
 296             url, display_id, host, 'dplay' + country, country)
 297
 298
 299 class DiscoveryPlusIE(DPlayIE):
 300     _VALID_URL = r'https?://(?:www\.)?discoveryplus\.com/video' + DPlayIE._PATH_REGEX
 301     _TESTS = [{
 302         'url': 'https://www.discoveryplus.com/video/property-brothers-forever-home/food-and-family',
 303         'info_dict': {
 304             'id': '1140794',
 305             'display_id': 'property-brothers-forever-home/food-and-family',
 306             'ext': 'mp4',
 307             'title': 'Food and Family',
 308             'description': 'The brothers help a Richmond family expand their single-level home.',
 309             'duration': 2583.113,
 310             'timestamp': 1609304400,
 311             'upload_date': '20201230',
 312             'creator': 'HGTV',
 313             'series': 'Property Brothers: Forever Home',
 314             'season_number': 1,
 315             'episode_number': 1,
 316         },
 317         'skip': 'Available for Premium users',
 318     }]
 319
 320     def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
 321         headers['x-disco-client'] = 'WEB:UNKNOWN:dplus_us:15.0.0'
 322
 323     def _download_video_playback_info(self, disco_base, video_id, headers):
 324         return self._download_json(
 325             disco_base + 'playback/v3/videoPlaybackInfo',
 326             video_id, headers=headers, data=json.dumps({
 327                 'deviceInfo': {
 328                     'adBlocker': False,
 329                 },
 330                 'videoId': video_id,
 331                 'wisteriaProperties': {
 332                     'platform': 'desktop',
 333                 },
 334             }).encode('utf-8'))['data']['attributes']['streaming']
 335
 336     def _real_extract(self, url):
 337         display_id = self._match_id(url)
 338         return self._get_disco_api_info(
 339             url, display_id, 'us1-prod-direct.discoveryplus.com', 'go', 'us')
 340
 341
 342 class HGTVDeIE(DPlayIE):
 343     _VALID_URL = r'https?://de\.hgtv\.com/sendungen' + DPlayIE._PATH_REGEX
 344     _TESTS = [{
 345         'url': 'https://de.hgtv.com/sendungen/tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette/',
 346         'info_dict': {
 347             'id': '151205',
 348             'display_id': 'tiny-house-klein-aber-oho/wer-braucht-schon-eine-toilette',
 349             'ext': 'mp4',
 350             'title': 'Wer braucht schon eine Toilette',
 351             'description': 'md5:05b40a27e7aed2c9172de34d459134e2',
 352             'duration': 1177.024,
 353             'timestamp': 1595705400,
 354             'upload_date': '20200725',
 355             'creator': 'HGTV',
 356             'series': 'Tiny House - klein, aber oho',
 357             'season_number': 3,
 358             'episode_number': 3,
 359         },
 360         'params': {
 361             'format': 'bestvideo',
 362         },
 363     }]
 364
 365     def _real_extract(self, url):
 366         display_id = self._match_id(url)
 367         return self._get_disco_api_info(
 368             url, display_id, 'eu1-prod.disco-api.com', 'hgtv', 'de')