yt_dlp/extractor/sovietscloset.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     try_get,
   7     unified_timestamp
   8 )
   9
  10
  11 class SovietsClosetBaseIE(InfoExtractor):
  12     MEDIADELIVERY_REFERER = {'Referer': 'https://iframe.mediadelivery.net/'}
  13
  14     def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name):
  15         nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__')
  16         return self._search_nuxt_data(nuxt_jsonp, video_id, '__NUXT_JSONP__')
  17
  18     def video_meta(self, video_id, game_name, category_name, episode_number, stream_date):
  19         title = game_name
  20         if category_name and category_name != 'Misc':
  21             title += f' - {category_name}'
  22         if episode_number:
  23             title += f' #{episode_number}'
  24
  25         timestamp = unified_timestamp(stream_date)
  26
  27         return {
  28             'id': video_id,
  29             'title': title,
  30             'http_headers': self.MEDIADELIVERY_REFERER,
  31             'uploader': 'SovietWomble',
  32             'creator': 'SovietWomble',
  33             'release_timestamp': timestamp,
  34             'timestamp': timestamp,
  35             'uploader_id': 'SovietWomble',
  36             'uploader_url': 'https://www.twitch.tv/SovietWomble',
  37             'was_live': True,
  38             'availability': 'public',
  39             'series': game_name,
  40             'season': category_name,
  41             'episode_number': episode_number,
  42         }
  43
  44
  45 class SovietsClosetIE(SovietsClosetBaseIE):
  46     _VALID_URL = r'https?://(?:www\.)?sovietscloset\.com/video/(?P<id>[0-9]+)/?'
  47     _TESTS = [
  48         {
  49             'url': 'https://sovietscloset.com/video/1337',
  50             'md5': '11e58781c4ca5b283307aa54db5b3f93',
  51             'info_dict': {
  52                 'id': '1337',
  53                 'ext': 'mp4',
  54                 'title': 'The Witcher #13',
  55                 'thumbnail': r're:^https?://.*\.b-cdn\.net/2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67/thumbnail\.jpg$',
  56                 'uploader': 'SovietWomble',
  57                 'creator': 'SovietWomble',
  58                 'release_timestamp': 1492091580,
  59                 'release_date': '20170413',
  60                 'timestamp': 1492091580,
  61                 'upload_date': '20170413',
  62                 'uploader_id': 'SovietWomble',
  63                 'uploader_url': 'https://www.twitch.tv/SovietWomble',
  64                 'duration': 7007,
  65                 'was_live': True,
  66                 'availability': 'public',
  67                 'series': 'The Witcher',
  68                 'season': 'Misc',
  69                 'episode_number': 13,
  70             },
  71         },
  72         {
  73             'url': 'https://sovietscloset.com/video/1105',
  74             'md5': '578b1958a379e7110ba38697042e9efb',
  75             'info_dict': {
  76                 'id': '1105',
  77                 'ext': 'mp4',
  78                 'title': 'Arma 3 - Zeus Games #3',
  79                 'uploader': 'SovietWomble',
  80                 'thumbnail': r're:^https?://.*\.b-cdn\.net/c0e5e76f-3a93-40b4-bf01-12343c2eec5d/thumbnail\.jpg$',
  81                 'uploader': 'SovietWomble',
  82                 'creator': 'SovietWomble',
  83                 'release_timestamp': 1461157200,
  84                 'release_date': '20160420',
  85                 'timestamp': 1461157200,
  86                 'upload_date': '20160420',
  87                 'uploader_id': 'SovietWomble',
  88                 'uploader_url': 'https://www.twitch.tv/SovietWomble',
  89                 'duration': 8804,
  90                 'was_live': True,
  91                 'availability': 'public',
  92                 'series': 'Arma 3',
  93                 'season': 'Zeus Games',
  94                 'episode_number': 3,
  95             },
  96         },
  97     ]
  98
  99     def _extract_bunnycdn_iframe(self, video_id, bunnycdn_id):
 100         iframe = self._download_webpage(
 101             f'https://iframe.mediadelivery.net/embed/5105/{bunnycdn_id}',
 102             video_id, note='Downloading BunnyCDN iframe', headers=self.MEDIADELIVERY_REFERER)
 103
 104         m3u8_url = self._search_regex(r'(https?://.*?\.m3u8)', iframe, 'm3u8 url')
 105         thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url')
 106
 107         m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER)
 108         self._sort_formats(m3u8_formats)
 109
 110         if not m3u8_formats:
 111             duration = None
 112         else:
 113             duration = self._extract_m3u8_vod_duration(
 114                 m3u8_formats[0]['url'], video_id, headers=self.MEDIADELIVERY_REFERER)
 115
 116         return {
 117             'formats': m3u8_formats,
 118             'thumbnail': thumbnail_url,
 119             'duration': duration,
 120         }
 121
 122     def _real_extract(self, url):
 123         video_id = self._match_id(url)
 124         webpage = self._download_webpage(url, video_id)
 125
 126         static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase')
 127         static_assets_base = f'https://sovietscloset.com{static_assets_base}'
 128
 129         stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream']
 130
 131         return {
 132             **self.video_meta(
 133                 video_id=video_id, game_name=stream['game']['name'],
 134                 category_name=try_get(stream, lambda x: x['subcategory']['name'], str),
 135                 episode_number=stream.get('number'), stream_date=stream.get('date')),
 136             **self._extract_bunnycdn_iframe(video_id, stream['bunnyId']),
 137         }
 138
 139
 140 class SovietsClosetPlaylistIE(SovietsClosetBaseIE):
 141     _VALID_URL = r'https?://(?:www\.)?sovietscloset\.com/(?!video)(?P<id>[^#?]+)'
 142     _TESTS = [
 143
 144         {
 145             'url': 'https://sovietscloset.com/The-Witcher',
 146             'info_dict': {
 147                 'id': 'The-Witcher',
 148                 'title': 'The Witcher',
 149             },
 150             'playlist_mincount': 31,
 151         },
 152         {
 153             'url': 'https://sovietscloset.com/Arma-3/Zeus-Games',
 154             'info_dict': {
 155                 'id': 'Arma-3/Zeus-Games',
 156                 'title': 'Arma 3 - Zeus Games',
 157             },
 158             'playlist_mincount': 3,
 159         },
 160         {
 161             'url': 'https://sovietscloset.com/arma-3/zeus-games/',
 162             'info_dict': {
 163                 'id': 'arma-3/zeus-games',
 164                 'title': 'Arma 3 - Zeus Games',
 165             },
 166             'playlist_mincount': 3,
 167         },
 168         {
 169             'url': 'https://sovietscloset.com/Total-War-Warhammer',
 170             'info_dict': {
 171                 'id': 'Total-War-Warhammer',
 172                 'title': 'Total War: Warhammer - Greenskins',
 173             },
 174             'playlist_mincount': 33,
 175         },
 176     ]
 177
 178     def _real_extract(self, url):
 179         playlist_id = self._match_id(url)
 180         if playlist_id.endswith('/'):
 181             playlist_id = playlist_id[:-1]
 182
 183         webpage = self._download_webpage(url, playlist_id)
 184
 185         static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase')
 186         static_assets_base = f'https://sovietscloset.com{static_assets_base}'
 187
 188         sovietscloset = self.parse_nuxt_jsonp(f'{static_assets_base}/payload.js', playlist_id, 'global')['games']
 189
 190         if '/' in playlist_id:
 191             game_slug, category_slug = playlist_id.lower().split('/')
 192         else:
 193             game_slug = playlist_id.lower()
 194             category_slug = 'misc'
 195
 196         game = next(game for game in sovietscloset if game['slug'].lower() == game_slug)
 197         category = next((cat for cat in game['subcategories'] if cat.get('slug', '').lower() == category_slug),
 198                         game['subcategories'][0])
 199         category_slug = category.get('slug', '').lower() or category_slug
 200         playlist_title = game.get('name') or game_slug
 201         if category_slug != 'misc':
 202             playlist_title += f' - {category.get("name") or category_slug}'
 203         entries = [{
 204             **self.url_result(f'https://sovietscloset.com/video/{stream["id"]}', ie=SovietsClosetIE.ie_key()),
 205             **self.video_meta(
 206                 video_id=stream['id'], game_name=game['name'], category_name=category.get('name'),
 207                 episode_number=i + 1, stream_date=stream.get('date')),
 208         } for i, stream in enumerate(category['streams'])]
 209
 210         return self.playlist_result(entries, playlist_id, playlist_title)