yt_dlp/extractor/discoveryplusindia.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import json
   5 import re
   6
   7 from ..compat import compat_str
   8 from ..utils import try_get
   9 from .common import InfoExtractor
  10 from .dplay import DPlayIE
  11
  12
  13 class DiscoveryPlusIndiaIE(DPlayIE):
  14     _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/videos?' + DPlayIE._PATH_REGEX
  15     _TESTS = [{
  16         'url': 'https://www.discoveryplus.in/videos/how-do-they-do-it/fugu-and-more?seasonId=8&type=EPISODE',
  17         'info_dict': {
  18             'id': '27104',
  19             'ext': 'mp4',
  20             'display_id': 'how-do-they-do-it/fugu-and-more',
  21             'title': 'Fugu and More',
  22             'description': 'The Japanese catch, prepare and eat the deadliest fish on the planet.',
  23             'duration': 1319,
  24             'timestamp': 1582309800,
  25             'upload_date': '20200221',
  26             'series': 'How Do They Do It?',
  27             'season_number': 8,
  28             'episode_number': 2,
  29             'creator': 'Discovery Channel',
  30         },
  31         'params': {
  32             'format': 'bestvideo',
  33             'skip_download': True,
  34         },
  35         'skip': 'Cookies (not necessarily logged in) are needed'
  36     }]
  37
  38     def _update_disco_api_headers(self, headers, disco_base, display_id, realm):
  39         headers['x-disco-params'] = 'realm=%s' % realm
  40         headers['x-disco-client'] = 'WEB:UNKNOWN:dplus-india:17.0.0'
  41
  42     def _download_video_playback_info(self, disco_base, video_id, headers):
  43         return self._download_json(
  44             disco_base + 'playback/v3/videoPlaybackInfo',
  45             video_id, headers=headers, data=json.dumps({
  46                 'deviceInfo': {
  47                     'adBlocker': False,
  48                 },
  49                 'videoId': video_id,
  50             }).encode('utf-8'))['data']['attributes']['streaming']
  51
  52     def _real_extract(self, url):
  53         display_id = self._match_id(url)
  54         return self._get_disco_api_info(
  55             url, display_id, 'ap2-prod-direct.discoveryplus.in', 'dplusindia', 'in')
  56
  57
  58 class DiscoveryPlusIndiaShowIE(InfoExtractor):
  59     _VALID_URL = r'https?://(?:www\.)?discoveryplus\.in/show/(?P<show_name>[^/]+)/?(?:[?#]|$)'
  60     _TESTS = [{
  61         'url': 'https://www.discoveryplus.in/show/how-do-they-do-it',
  62         'playlist_mincount': 140,
  63         'info_dict': {
  64             'id': 'how-do-they-do-it',
  65         },
  66     }]
  67
  68     def _entries(self, show_name):
  69         headers = {
  70             'x-disco-client': 'WEB:UNKNOWN:dplus-india:prod',
  71             'x-disco-params': 'realm=dplusindia',
  72             'referer': 'https://www.discoveryplus.in/',
  73         }
  74         show_url = 'https://ap2-prod-direct.discoveryplus.in/cms/routes/show/{}?include=default'.format(show_name)
  75         show_json = self._download_json(show_url,
  76                                         video_id=show_name,
  77                                         headers=headers)['included'][4]['attributes']['component']
  78         show_id = show_json['mandatoryParams'].split('=')[-1]
  79         season_url = 'https://ap2-prod-direct.discoveryplus.in/content/videos?sort=episodeNumber&filter[seasonNumber]={}&filter[show.id]={}&page[size]=100&page[number]={}'
  80         for season in show_json['filters'][0]['options']:
  81             season_id = season['id']
  82             total_pages, page_num = 1, 0
  83             while page_num < total_pages:
  84                 season_json = self._download_json(season_url.format(season_id, show_id, compat_str(page_num + 1)),
  85                                                   video_id=show_id, headers=headers,
  86                                                   note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
  87                 if page_num == 0:
  88                     total_pages = try_get(season_json, lambda x: x['meta']['totalPages'], int) or 1
  89                 episodes_json = season_json['data']
  90                 for episode in episodes_json:
  91                     video_id = episode['attributes']['path']
  92                     yield self.url_result(
  93                         'https://discoveryplus.in/videos/%s' % video_id,
  94                         ie=DiscoveryPlusIndiaIE.ie_key(), video_id=video_id)
  95                 page_num += 1
  96
  97     def _real_extract(self, url):
  98         show_name = re.match(self._VALID_URL, url).group('show_name')
  99         return self.playlist_result(self._entries(show_name), playlist_id=show_name)