]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/sovietscloset.py
[youtube] Differentiate descriptive audio by language code
[yt-dlp.git] / yt_dlp / extractor / sovietscloset.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6 try_get,
7 unified_timestamp
8 )
9
10
11 class SovietsClosetBaseIE(InfoExtractor):
12 MEDIADELIVERY_REFERER = {'Referer': 'https://iframe.mediadelivery.net/'}
13
14 def parse_nuxt_jsonp(self, nuxt_jsonp_url, video_id, name):
15 nuxt_jsonp = self._download_webpage(nuxt_jsonp_url, video_id, note=f'Downloading {name} __NUXT_JSONP__')
16 return self._search_nuxt_data(nuxt_jsonp, video_id, '__NUXT_JSONP__')
17
18 def video_meta(self, video_id, game_name, category_name, episode_number, stream_date):
19 title = game_name
20 if category_name and category_name != 'Misc':
21 title += f' - {category_name}'
22 if episode_number:
23 title += f' #{episode_number}'
24
25 timestamp = unified_timestamp(stream_date)
26
27 return {
28 'id': video_id,
29 'title': title,
30 'http_headers': self.MEDIADELIVERY_REFERER,
31 'uploader': 'SovietWomble',
32 'creator': 'SovietWomble',
33 'release_timestamp': timestamp,
34 'timestamp': timestamp,
35 'uploader_id': 'SovietWomble',
36 'uploader_url': 'https://www.twitch.tv/SovietWomble',
37 'was_live': True,
38 'availability': 'public',
39 'series': game_name,
40 'season': category_name,
41 'episode_number': episode_number,
42 }
43
44
45 class SovietsClosetIE(SovietsClosetBaseIE):
46 _VALID_URL = r'https?://(?:www\.)?sovietscloset\.com/video/(?P<id>[0-9]+)/?'
47 _TESTS = [
48 {
49 'url': 'https://sovietscloset.com/video/1337',
50 'md5': '11e58781c4ca5b283307aa54db5b3f93',
51 'info_dict': {
52 'id': '1337',
53 'ext': 'mp4',
54 'title': 'The Witcher #13',
55 'thumbnail': r're:^https?://.*\.b-cdn\.net/2f0cfbf4-3588-43a9-a7d6-7c9ea3755e67/thumbnail\.jpg$',
56 'uploader': 'SovietWomble',
57 'creator': 'SovietWomble',
58 'release_timestamp': 1492091580,
59 'release_date': '20170413',
60 'timestamp': 1492091580,
61 'upload_date': '20170413',
62 'uploader_id': 'SovietWomble',
63 'uploader_url': 'https://www.twitch.tv/SovietWomble',
64 'duration': 7007,
65 'was_live': True,
66 'availability': 'public',
67 'series': 'The Witcher',
68 'season': 'Misc',
69 'episode_number': 13,
70 },
71 },
72 {
73 'url': 'https://sovietscloset.com/video/1105',
74 'md5': '578b1958a379e7110ba38697042e9efb',
75 'info_dict': {
76 'id': '1105',
77 'ext': 'mp4',
78 'title': 'Arma 3 - Zeus Games #3',
79 'uploader': 'SovietWomble',
80 'thumbnail': r're:^https?://.*\.b-cdn\.net/c0e5e76f-3a93-40b4-bf01-12343c2eec5d/thumbnail\.jpg$',
81 'uploader': 'SovietWomble',
82 'creator': 'SovietWomble',
83 'release_timestamp': 1461157200,
84 'release_date': '20160420',
85 'timestamp': 1461157200,
86 'upload_date': '20160420',
87 'uploader_id': 'SovietWomble',
88 'uploader_url': 'https://www.twitch.tv/SovietWomble',
89 'duration': 8804,
90 'was_live': True,
91 'availability': 'public',
92 'series': 'Arma 3',
93 'season': 'Zeus Games',
94 'episode_number': 3,
95 },
96 },
97 ]
98
99 def _extract_bunnycdn_iframe(self, video_id, bunnycdn_id):
100 iframe = self._download_webpage(
101 f'https://iframe.mediadelivery.net/embed/5105/{bunnycdn_id}',
102 video_id, note='Downloading BunnyCDN iframe', headers=self.MEDIADELIVERY_REFERER)
103
104 m3u8_url = self._search_regex(r'(https?://.*?\.m3u8)', iframe, 'm3u8 url')
105 thumbnail_url = self._search_regex(r'(https?://.*?thumbnail\.jpg)', iframe, 'thumbnail url')
106
107 m3u8_formats = self._extract_m3u8_formats(m3u8_url, video_id, headers=self.MEDIADELIVERY_REFERER)
108 self._sort_formats(m3u8_formats)
109
110 if not m3u8_formats:
111 duration = None
112 else:
113 duration = self._extract_m3u8_vod_duration(
114 m3u8_formats[0]['url'], video_id, headers=self.MEDIADELIVERY_REFERER)
115
116 return {
117 'formats': m3u8_formats,
118 'thumbnail': thumbnail_url,
119 'duration': duration,
120 }
121
122 def _real_extract(self, url):
123 video_id = self._match_id(url)
124 webpage = self._download_webpage(url, video_id)
125
126 static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase')
127 static_assets_base = f'https://sovietscloset.com{static_assets_base}'
128
129 stream = self.parse_nuxt_jsonp(f'{static_assets_base}/video/{video_id}/payload.js', video_id, 'video')['stream']
130
131 return {
132 **self.video_meta(
133 video_id=video_id, game_name=stream['game']['name'],
134 category_name=try_get(stream, lambda x: x['subcategory']['name'], str),
135 episode_number=stream.get('number'), stream_date=stream.get('date')),
136 **self._extract_bunnycdn_iframe(video_id, stream['bunnyId']),
137 }
138
139
140 class SovietsClosetPlaylistIE(SovietsClosetBaseIE):
141 _VALID_URL = r'https?://(?:www\.)?sovietscloset\.com/(?!video)(?P<id>[^#?]+)'
142 _TESTS = [
143
144 {
145 'url': 'https://sovietscloset.com/The-Witcher',
146 'info_dict': {
147 'id': 'The-Witcher',
148 'title': 'The Witcher',
149 },
150 'playlist_mincount': 31,
151 },
152 {
153 'url': 'https://sovietscloset.com/Arma-3/Zeus-Games',
154 'info_dict': {
155 'id': 'Arma-3/Zeus-Games',
156 'title': 'Arma 3 - Zeus Games',
157 },
158 'playlist_mincount': 3,
159 },
160 {
161 'url': 'https://sovietscloset.com/arma-3/zeus-games/',
162 'info_dict': {
163 'id': 'arma-3/zeus-games',
164 'title': 'Arma 3 - Zeus Games',
165 },
166 'playlist_mincount': 3,
167 },
168 {
169 'url': 'https://sovietscloset.com/Total-War-Warhammer',
170 'info_dict': {
171 'id': 'Total-War-Warhammer',
172 'title': 'Total War: Warhammer - Greenskins',
173 },
174 'playlist_mincount': 33,
175 },
176 ]
177
178 def _real_extract(self, url):
179 playlist_id = self._match_id(url)
180 if playlist_id.endswith('/'):
181 playlist_id = playlist_id[:-1]
182
183 webpage = self._download_webpage(url, playlist_id)
184
185 static_assets_base = self._search_regex(r'staticAssetsBase:\"(.*?)\"', webpage, 'staticAssetsBase')
186 static_assets_base = f'https://sovietscloset.com{static_assets_base}'
187
188 sovietscloset = self.parse_nuxt_jsonp(f'{static_assets_base}/payload.js', playlist_id, 'global')['games']
189
190 if '/' in playlist_id:
191 game_slug, category_slug = playlist_id.lower().split('/')
192 else:
193 game_slug = playlist_id.lower()
194 category_slug = 'misc'
195
196 game = next(game for game in sovietscloset if game['slug'].lower() == game_slug)
197 category = next((cat for cat in game['subcategories'] if cat.get('slug', '').lower() == category_slug),
198 game['subcategories'][0])
199 category_slug = category.get('slug', '').lower() or category_slug
200 playlist_title = game.get('name') or game_slug
201 if category_slug != 'misc':
202 playlist_title += f' - {category.get("name") or category_slug}'
203 entries = [{
204 **self.url_result(f'https://sovietscloset.com/video/{stream["id"]}', ie=SovietsClosetIE.ie_key()),
205 **self.video_meta(
206 video_id=stream['id'], game_name=game['name'], category_name=category.get('name'),
207 episode_number=i + 1, stream_date=stream.get('date')),
208 } for i, stream in enumerate(category['streams'])]
209
210 return self.playlist_result(entries, playlist_id, playlist_title)