]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitch.py
[spotify] Detect iframe embeds (#3430)
[yt-dlp.git] / yt_dlp / extractor / twitch.py
CommitLineData
841b6838 1import collections
3182f3e2 2import itertools
48afc6ca 3import json
841b6838
S
4import random
5import re
79e93125
PH
6
7from .common import InfoExtractor
1cc79574 8from ..compat import (
e704f87f 9 compat_parse_qs,
240b9b7a 10 compat_str,
15707c7e 11 compat_urllib_parse_urlencode,
e704f87f 12 compat_urllib_parse_urlparse,
1cc79574
PH
13)
14from ..utils import (
efe470e2 15 clean_html,
30a074c2 16 dict_get,
79e93125 17 ExtractorError,
841b6838 18 float_or_none,
7a6e8a1b 19 int_or_none,
e704f87f 20 parse_duration,
355d074f 21 parse_iso8601,
4dfbf869 22 parse_qs,
a0455d0f 23 qualities,
639f80c1 24 str_or_none,
25 traverse_obj,
49fa7de3
S
26 try_get,
27 unified_timestamp,
264e77c4 28 update_url_query,
3052a30d 29 url_or_none,
c64c03be 30 urljoin,
79e93125
PH
31)
32
33
c5db6bb3 34class TwitchBaseIE(InfoExtractor):
fce79626 35 _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
c5db6bb3 36
46fd0dd5 37 _API_BASE = 'https://api.twitch.tv'
9f4576a7 38 _USHER_BASE = 'https://usher.ttvnw.net'
48afc6ca
TB
39 _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
40 _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
cb1c3a3c 41 _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
499bfcbf 42 _NETRC_MACHINE = 'twitch'
79e93125 43
00dd0cd5 44 _OPERATION_HASHES = {
45 'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
46 'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
47 'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
48 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
49 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
50 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
14eb1ee1 51 'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
00dd0cd5 52 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
53 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
639f80c1 54 'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41',
00dd0cd5 55 }
355d074f 56
52efa4b3 57 def _perform_login(self, username, password):
efe470e2
S
58 def fail(message):
59 raise ExtractorError(
60 'Unable to login. Twitch said: %s' % message, expected=True)
61
5316566e 62 def login_step(page, urlh, note, data):
c64c03be
XDG
63 form = self._hidden_inputs(page)
64 form.update(data)
65
66 page_url = urlh.geturl()
67 post_url = self._search_regex(
68 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
48afc6ca 69 'post url', default=self._LOGIN_POST_URL, group='url')
c64c03be
XDG
70 post_url = urljoin(page_url, post_url)
71
48afc6ca
TB
72 headers = {
73 'Referer': page_url,
30a074c2 74 'Origin': 'https://www.twitch.tv',
cb1c3a3c 75 'Content-Type': 'text/plain;charset=UTF-8',
48afc6ca 76 }
c64c03be 77
cb1c3a3c
S
78 response = self._download_json(
79 post_url, None, note, data=json.dumps(form).encode(),
80 headers=headers, expected_status=400)
30a074c2 81 error = dict_get(response, ('error', 'error_description', 'error_code'))
cb1c3a3c
S
82 if error:
83 fail(error)
c64c03be 84
1370dba5
S
85 if 'Authenticated successfully' in response.get('message', ''):
86 return None, None
87
88 redirect_url = urljoin(
89 post_url,
90 response.get('redirect') or response['redirect_path'])
5316566e
S
91 return self._download_webpage_handle(
92 redirect_url, None, 'Downloading login redirect page',
93 headers=headers)
c64c03be 94
fbd9f6ea 95 login_page, handle = self._download_webpage_handle(
48afc6ca 96 self._LOGIN_FORM_URL, None, 'Downloading login page')
c5db6bb3 97
efe470e2
S
98 # Some TOR nodes and public proxies are blocked completely
99 if 'blacklist_message' in login_page:
100 fail(clean_html(login_page))
101
5316566e 102 redirect_page, handle = login_step(
e4d95865 103 login_page, handle, 'Logging in', {
5316566e
S
104 'username': username,
105 'password': password,
48afc6ca 106 'client_id': self._CLIENT_ID,
fe646a2f 107 })
c64c03be 108
1370dba5
S
109 # Successful login
110 if not redirect_page:
111 return
112
c64c03be
XDG
113 if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
114 # TODO: Add mechanism to request an SMS or phone call
115 tfa_token = self._get_tfa_info('two-factor authentication token')
5316566e 116 login_step(redirect_page, handle, 'Submitting TFA token', {
c64c03be
XDG
117 'authy_token': tfa_token,
118 'remember_2fa': 'true',
5316566e 119 })
17b41a33 120
d0e958c7
PH
121 def _prefer_source(self, formats):
122 try:
123 source = next(f for f in formats if f['format_id'] == 'Source')
60e67c5b 124 source['quality'] = 10
d0e958c7 125 except StopIteration:
985637cb
S
126 for f in formats:
127 if '/chunked/' in f['url']:
128 f.update({
60e67c5b 129 'quality': 10,
985637cb
S
130 'format_note': 'Source',
131 })
d0e958c7
PH
132 self._sort_formats(formats)
133
00dd0cd5 134 def _download_base_gql(self, video_id, ops, note, fatal=True):
30a074c2 135 headers = {
136 'Content-Type': 'text/plain;charset=UTF-8',
137 'Client-ID': self._CLIENT_ID,
138 }
139 gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
140 if gql_auth:
141 headers['Authorization'] = 'OAuth ' + gql_auth.value
00dd0cd5 142 return self._download_json(
143 'https://gql.twitch.tv/gql', video_id, note,
144 data=json.dumps(ops).encode(),
30a074c2 145 headers=headers, fatal=fatal)
c5db6bb3 146
00dd0cd5 147 def _download_gql(self, video_id, ops, note, fatal=True):
148 for op in ops:
149 op['extensions'] = {
150 'persistedQuery': {
151 'version': 1,
152 'sha256Hash': self._OPERATION_HASHES[op['operationName']],
153 }
154 }
155 return self._download_base_gql(video_id, ops, note)
156
157 def _download_access_token(self, video_id, token_kind, param_name):
158 method = '%sPlaybackAccessToken' % token_kind
159 ops = {
160 'query': '''{
161 %s(
162 %s: "%s",
163 params: {
164 platform: "web",
165 playerBackend: "mediaplayer",
166 playerType: "site"
167 }
168 )
169 {
170 value
171 signature
172 }
173 }''' % (method, param_name, video_id),
174 }
175 return self._download_base_gql(
176 video_id, ops,
177 'Downloading %s access token GraphQL' % token_kind)['data'][method]
04d02a9d 178
04d02a9d 179
841b6838 180class TwitchVodIE(TwitchBaseIE):
c5db6bb3 181 IE_NAME = 'twitch:vod'
3f1ce168
S
182 _VALID_URL = r'''(?x)
183 https?://
184 (?:
644921b3 185 (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
bc48773e 186 player\.twitch\.tv/\?.*?\bvideo=v?
3f1ce168
S
187 )
188 (?P<id>\d+)
189 '''
c5db6bb3 190
9c724a98 191 _TESTS = [{
e704f87f 192 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
c5db6bb3 193 'info_dict': {
ac0474f8 194 'id': 'v6528877',
c5db6bb3 195 'ext': 'mp4',
ac0474f8 196 'title': 'LCK Summer Split - Week 6 Day 1',
ec85ded8 197 'thumbnail': r're:^https?://.*\.jpg$',
ac0474f8 198 'duration': 17208,
00dd0cd5 199 'timestamp': 1435131734,
ac0474f8
YCH
200 'upload_date': '20150624',
201 'uploader': 'Riot Games',
202 'uploader_id': 'riotgames',
c5db6bb3 203 'view_count': int,
e704f87f 204 'start_time': 310,
c5db6bb3
S
205 },
206 'params': {
207 # m3u8 download
208 'skip_download': True,
209 },
9c724a98
S
210 }, {
211 # Untitled broadcast (title is None)
212 'url': 'http://www.twitch.tv/belkao_o/v/11230755',
213 'info_dict': {
214 'id': 'v11230755',
215 'ext': 'mp4',
216 'title': 'Untitled Broadcast',
ec85ded8 217 'thumbnail': r're:^https?://.*\.jpg$',
9c724a98
S
218 'duration': 1638,
219 'timestamp': 1439746708,
220 'upload_date': '20150816',
221 'uploader': 'BelkAO_o',
222 'uploader_id': 'belkao_o',
223 'view_count': int,
224 },
225 'params': {
226 # m3u8 download
227 'skip_download': True,
228 },
9bd7bd0b 229 'skip': 'HTTP Error 404: Not Found',
3f1ce168
S
230 }, {
231 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
232 'only_matching': True,
0b23c222
S
233 }, {
234 'url': 'https://www.twitch.tv/videos/6528877',
235 'only_matching': True,
fce79626
S
236 }, {
237 'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
238 'only_matching': True,
644921b3
S
239 }, {
240 'url': 'https://www.twitch.tv/northernlion/video/291940395',
241 'only_matching': True,
bc48773e
M
242 }, {
243 'url': 'https://player.twitch.tv/?video=480452374',
244 'only_matching': True,
639f80c1 245 }, {
246 'url': 'https://www.twitch.tv/videos/635475444',
247 'info_dict': {
248 'id': 'v635475444',
249 'ext': 'mp4',
250 'title': 'Riot Games',
251 'duration': 11643,
252 'uploader': 'Riot Games',
253 'uploader_id': 'riotgames',
254 'timestamp': 1590770569,
255 'upload_date': '20200529',
256 'chapters': [
257 {
258 'start_time': 0,
259 'end_time': 573,
260 'title': 'League of Legends'
261 },
262 {
263 'start_time': 573,
264 'end_time': 3922,
265 'title': 'Legends of Runeterra'
266 },
267 {
268 'start_time': 3922,
269 'end_time': 11643,
270 'title': 'Art'
271 }
272 ],
273 },
274 'params': {
275 'skip_download': True
276 }
9c724a98 277 }]
04d02a9d 278
841b6838 279 def _download_info(self, item_id):
00dd0cd5 280 data = self._download_gql(
281 item_id, [{
282 'operationName': 'VideoMetadata',
283 'variables': {
284 'channelLogin': '',
285 'videoID': item_id,
286 },
639f80c1 287 }, {
288 'operationName': 'VideoPlayer_ChapterSelectButtonVideo',
289 'variables': {
290 'includePrivate': False,
291 'videoID': item_id,
292 },
00dd0cd5 293 }],
639f80c1 294 'Downloading stream metadata GraphQL')
295
296 video = traverse_obj(data, (0, 'data', 'video'))
297 video['moments'] = traverse_obj(data, (1, 'data', 'video', 'moments', 'edges', ..., 'node'))
298
00dd0cd5 299 if video is None:
300 raise ExtractorError(
301 'Video %s does not exist' % item_id, expected=True)
302 return self._extract_info_gql(video, item_id)
841b6838 303
639f80c1 304 def _extract_info(self, info):
841b6838
S
305 status = info.get('status')
306 if status == 'recording':
307 is_live = True
308 elif status == 'recorded':
309 is_live = False
310 else:
311 is_live = None
312 _QUALITIES = ('small', 'medium', 'large')
313 quality_key = qualities(_QUALITIES)
314 thumbnails = []
315 preview = info.get('preview')
316 if isinstance(preview, dict):
317 for thumbnail_id, thumbnail_url in preview.items():
318 thumbnail_url = url_or_none(thumbnail_url)
319 if not thumbnail_url:
320 continue
321 if thumbnail_id not in _QUALITIES:
322 continue
323 thumbnails.append({
324 'url': thumbnail_url,
325 'preference': quality_key(thumbnail_id),
326 })
327 return {
328 'id': info['_id'],
329 'title': info.get('title') or 'Untitled Broadcast',
330 'description': info.get('description'),
331 'duration': int_or_none(info.get('length')),
332 'thumbnails': thumbnails,
333 'uploader': info.get('channel', {}).get('display_name'),
334 'uploader_id': info.get('channel', {}).get('name'),
335 'timestamp': parse_iso8601(info.get('recorded_at')),
336 'view_count': int_or_none(info.get('views')),
337 'is_live': is_live,
a33c0d9c 338 'was_live': True,
841b6838
S
339 }
340
639f80c1 341 def _extract_moments(self, info, item_id):
342 for moment in info.get('moments') or []:
343 start_time = int_or_none(moment.get('positionMilliseconds'), 1000)
344 duration = int_or_none(moment.get('durationMilliseconds'), 1000)
345 name = str_or_none(moment.get('description'))
346
347 if start_time is None or duration is None:
348 self.report_warning(f'Important chapter information missing for chapter {name}', item_id)
349 continue
350 yield {
351 'start_time': start_time,
352 'end_time': start_time + duration,
353 'title': name,
354 }
355
356 def _extract_info_gql(self, info, item_id):
00dd0cd5 357 vod_id = info.get('id') or item_id
358 # id backward compatibility for download archives
359 if vod_id[0] != 'v':
360 vod_id = 'v%s' % vod_id
361 thumbnail = url_or_none(info.get('previewThumbnailURL'))
a33c0d9c 362 is_live = None
00dd0cd5 363 if thumbnail:
a33c0d9c
D
364 if thumbnail.endswith('/404_processing_{width}x{height}.png'):
365 is_live, thumbnail = True, None
366 else:
367 is_live = False
368 for p in ('width', 'height'):
369 thumbnail = thumbnail.replace('{%s}' % p, '0')
639f80c1 370
00dd0cd5 371 return {
372 'id': vod_id,
373 'title': info.get('title') or 'Untitled Broadcast',
374 'description': info.get('description'),
375 'duration': int_or_none(info.get('lengthSeconds')),
376 'thumbnail': thumbnail,
377 'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
378 'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
379 'timestamp': unified_timestamp(info.get('publishedAt')),
380 'view_count': int_or_none(info.get('viewCount')),
639f80c1 381 'chapters': list(self._extract_moments(info, item_id)),
a33c0d9c
D
382 'is_live': is_live,
383 'was_live': True,
00dd0cd5 384 }
385
79e93125 386 def _real_extract(self, url):
841b6838 387 vod_id = self._match_id(url)
e5e99661 388
841b6838 389 info = self._download_info(vod_id)
00dd0cd5 390 access_token = self._download_access_token(vod_id, 'video', 'id')
e5e99661 391
c5db6bb3 392 formats = self._extract_m3u8_formats(
2da4316e 393 '%s/vod/%s.m3u8?%s' % (
841b6838 394 self._USHER_BASE, vod_id,
15707c7e 395 compat_urllib_parse_urlencode({
e5e99661 396 'allow_source': 'true',
ac455055 397 'allow_audio_only': 'true',
e5e99661
S
398 'allow_spectre': 'true',
399 'player': 'twitchweb',
1d31b7ca 400 'playlist_include_framerate': 'true',
00dd0cd5 401 'nauth': access_token['value'],
402 'nauthsig': access_token['signature'],
e5e99661 403 })),
841b6838 404 vod_id, 'mp4', entry_protocol='m3u8_native')
e5e99661 405
d0e958c7 406 self._prefer_source(formats)
c5db6bb3 407 info['formats'] = formats
e704f87f
NH
408
409 parsed_url = compat_urllib_parse_urlparse(url)
410 query = compat_parse_qs(parsed_url.query)
411 if 't' in query:
412 info['start_time'] = parse_duration(query['t'][0])
413
264e77c4
S
414 if info.get('timestamp') is not None:
415 info['subtitles'] = {
416 'rechat': [{
417 'url': update_url_query(
841b6838 418 'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, {
ce112a8c 419 'client_id': self._CLIENT_ID,
264e77c4
S
420 }),
421 'ext': 'json',
422 }],
423 }
424
c5db6bb3
S
425 return info
426
427
841b6838
S
428def _make_video_result(node):
429 assert isinstance(node, dict)
430 video_id = node.get('id')
431 if not video_id:
432 return
433 return {
434 '_type': 'url_transparent',
435 'ie_key': TwitchVodIE.ie_key(),
c76eb41b 436 'id': 'v' + video_id,
841b6838
S
437 'url': 'https://www.twitch.tv/videos/%s' % video_id,
438 'title': node.get('title'),
439 'thumbnail': node.get('previewThumbnailURL'),
440 'duration': float_or_none(node.get('lengthSeconds')),
441 'view_count': int_or_none(node.get('viewCount')),
442 }
443
444
00dd0cd5 445class TwitchCollectionIE(TwitchBaseIE):
841b6838
S
446 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
447
448 _TESTS = [{
449 'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
450 'info_dict': {
451 'id': 'wlDCoH0zEBZZbQ',
452 'title': 'Overthrow Nook, capitalism for children',
453 },
454 'playlist_mincount': 13,
455 }]
456
457 _OPERATION_NAME = 'CollectionSideBar'
841b6838
S
458
459 def _real_extract(self, url):
460 collection_id = self._match_id(url)
461 collection = self._download_gql(
defc820b
S
462 collection_id, [{
463 'operationName': self._OPERATION_NAME,
464 'variables': {'collectionID': collection_id},
465 }],
466 'Downloading collection GraphQL')[0]['data']['collection']
841b6838 467 title = collection.get('title')
c5db6bb3 468 entries = []
841b6838
S
469 for edge in collection['items']['edges']:
470 if not isinstance(edge, dict):
471 continue
472 node = edge.get('node')
473 if not isinstance(node, dict):
474 continue
475 video = _make_video_result(node)
476 if video:
477 entries.append(video)
478 return self.playlist_result(
479 entries, playlist_id=collection_id, playlist_title=title)
480
481
00dd0cd5 482class TwitchPlaylistBaseIE(TwitchBaseIE):
483 _PAGE_LIMIT = 100
484
841b6838
S
485 def _entries(self, channel_name, *args):
486 cursor = None
487 variables_common = self._make_variables(channel_name, *args)
488 entries_key = '%ss' % self._ENTRY_KIND
489 for page_num in itertools.count(1):
490 variables = variables_common.copy()
491 variables['limit'] = self._PAGE_LIMIT
492 if cursor:
493 variables['cursor'] = cursor
494 page = self._download_gql(
defc820b
S
495 channel_name, [{
496 'operationName': self._OPERATION_NAME,
497 'variables': variables,
498 }],
841b6838
S
499 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
500 fatal=False)
501 if not page:
502 break
503 edges = try_get(
defc820b 504 page, lambda x: x[0]['data']['user'][entries_key]['edges'], list)
841b6838
S
505 if not edges:
506 break
507 for edge in edges:
508 if not isinstance(edge, dict):
509 continue
510 if edge.get('__typename') != self._EDGE_KIND:
511 continue
512 node = edge.get('node')
513 if not isinstance(node, dict):
514 continue
515 if node.get('__typename') != self._NODE_KIND:
516 continue
517 entry = self._extract_entry(node)
518 if entry:
519 cursor = edge.get('cursor')
520 yield entry
521 if not cursor or not isinstance(cursor, compat_str):
522 break
523
c5db6bb3 524
841b6838
S
525class TwitchVideosIE(TwitchPlaylistBaseIE):
526 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
c5db6bb3 527
fce79626 528 _TESTS = [{
841b6838
S
529 # All Videos sorted by Date
530 'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
c5db6bb3 531 'info_dict': {
841b6838
S
532 'id': 'spamfish',
533 'title': 'spamfish - All Videos sorted by Date',
c5db6bb3 534 },
841b6838 535 'playlist_mincount': 924,
fce79626 536 }, {
841b6838
S
537 # All Videos sorted by Popular
538 'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
93753aad
S
539 'info_dict': {
540 'id': 'spamfish',
841b6838 541 'title': 'spamfish - All Videos sorted by Popular',
93753aad 542 },
841b6838 543 'playlist_mincount': 931,
fce79626 544 }, {
841b6838
S
545 # Past Broadcasts sorted by Date
546 'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
547 'info_dict': {
548 'id': 'spamfish',
549 'title': 'spamfish - Past Broadcasts sorted by Date',
550 },
551 'playlist_mincount': 27,
552 }, {
553 # Highlights sorted by Date
554 'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights',
555 'info_dict': {
556 'id': 'spamfish',
557 'title': 'spamfish - Highlights sorted by Date',
558 },
559 'playlist_mincount': 901,
560 }, {
561 # Uploads sorted by Date
562 'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
563 'info_dict': {
564 'id': 'esl_csgo',
565 'title': 'esl_csgo - Uploads sorted by Date',
566 },
567 'playlist_mincount': 5,
568 }, {
569 # Past Premieres sorted by Date
570 'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
93753aad
S
571 'info_dict': {
572 'id': 'spamfish',
841b6838 573 'title': 'spamfish - Past Premieres sorted by Date',
93753aad 574 },
841b6838 575 'playlist_mincount': 1,
fce79626 576 }, {
841b6838
S
577 'url': 'https://www.twitch.tv/spamfish/videos/all',
578 'only_matching': True,
579 }, {
580 'url': 'https://m.twitch.tv/spamfish/videos/all',
581 'only_matching': True,
582 }, {
583 'url': 'https://www.twitch.tv/spamfish/videos',
fce79626
S
584 'only_matching': True,
585 }]
93753aad 586
841b6838
S
587 Broadcast = collections.namedtuple('Broadcast', ['type', 'label'])
588
589 _DEFAULT_BROADCAST = Broadcast(None, 'All Videos')
590 _BROADCASTS = {
591 'archives': Broadcast('ARCHIVE', 'Past Broadcasts'),
592 'highlights': Broadcast('HIGHLIGHT', 'Highlights'),
593 'uploads': Broadcast('UPLOAD', 'Uploads'),
594 'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'),
595 'all': _DEFAULT_BROADCAST,
596 }
597
598 _DEFAULT_SORTED_BY = 'Date'
599 _SORTED_BY = {
600 'time': _DEFAULT_SORTED_BY,
601 'views': 'Popular',
602 }
603
841b6838
S
604 _OPERATION_NAME = 'FilterableVideoTower_Videos'
605 _ENTRY_KIND = 'video'
606 _EDGE_KIND = 'VideoEdge'
607 _NODE_KIND = 'Video'
608
609 @classmethod
610 def suitable(cls, url):
611 return (False
612 if any(ie.suitable(url) for ie in (
613 TwitchVideosClipsIE,
614 TwitchVideosCollectionsIE))
615 else super(TwitchVideosIE, cls).suitable(url))
616
617 @staticmethod
618 def _make_variables(channel_name, broadcast_type, sort):
619 return {
620 'channelOwnerLogin': channel_name,
621 'broadcastType': broadcast_type,
622 'videoSort': sort.upper(),
623 }
624
625 @staticmethod
626 def _extract_entry(node):
627 return _make_video_result(node)
93753aad 628
841b6838
S
629 def _real_extract(self, url):
630 channel_name = self._match_id(url)
4dfbf869 631 qs = parse_qs(url)
841b6838
S
632 filter = qs.get('filter', ['all'])[0]
633 sort = qs.get('sort', ['time'])[0]
634 broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
635 return self.playlist_result(
636 self._entries(channel_name, broadcast.type, sort),
637 playlist_id=channel_name,
638 playlist_title='%s - %s sorted by %s'
639 % (channel_name, broadcast.label,
640 self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)))
641
642
643class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
644 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
c5db6bb3 645
fce79626 646 _TESTS = [{
841b6838
S
647 # Clips
648 'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
93753aad 649 'info_dict': {
841b6838
S
650 'id': 'vanillatv',
651 'title': 'vanillatv - Clips Top All',
93753aad 652 },
841b6838 653 'playlist_mincount': 1,
fce79626 654 }, {
841b6838 655 'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d',
fce79626
S
656 'only_matching': True,
657 }]
93753aad 658
841b6838
S
659 Clip = collections.namedtuple('Clip', ['filter', 'label'])
660
661 _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D')
662 _RANGE = {
663 '24hr': Clip('LAST_DAY', 'Top 24H'),
664 '7d': _DEFAULT_CLIP,
665 '30d': Clip('LAST_MONTH', 'Top 30D'),
666 'all': Clip('ALL_TIME', 'Top All'),
667 }
668
669 # NB: values other than 20 result in skipped videos
670 _PAGE_LIMIT = 20
671
841b6838
S
672 _OPERATION_NAME = 'ClipsCards__User'
673 _ENTRY_KIND = 'clip'
674 _EDGE_KIND = 'ClipEdge'
675 _NODE_KIND = 'Clip'
676
677 @staticmethod
678 def _make_variables(channel_name, filter):
679 return {
680 'login': channel_name,
681 'criteria': {
682 'filter': filter,
683 },
684 }
685
686 @staticmethod
687 def _extract_entry(node):
688 assert isinstance(node, dict)
689 clip_url = url_or_none(node.get('url'))
690 if not clip_url:
691 return
692 return {
693 '_type': 'url_transparent',
694 'ie_key': TwitchClipsIE.ie_key(),
695 'id': node.get('id'),
696 'url': clip_url,
697 'title': node.get('title'),
698 'thumbnail': node.get('thumbnailURL'),
699 'duration': float_or_none(node.get('durationSeconds')),
700 'timestamp': unified_timestamp(node.get('createdAt')),
701 'view_count': int_or_none(node.get('viewCount')),
702 'language': node.get('language'),
703 }
704
705 def _real_extract(self, url):
706 channel_name = self._match_id(url)
4dfbf869 707 qs = parse_qs(url)
841b6838
S
708 range = qs.get('range', ['7d'])[0]
709 clip = self._RANGE.get(range, self._DEFAULT_CLIP)
710 return self.playlist_result(
711 self._entries(channel_name, clip.filter),
712 playlist_id=channel_name,
713 playlist_title='%s - Clips %s' % (channel_name, clip.label))
714
93753aad 715
841b6838
S
716class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
717 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
93753aad 718
fce79626 719 _TESTS = [{
841b6838
S
720 # Collections
721 'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
c5db6bb3
S
722 'info_dict': {
723 'id': 'spamfish',
841b6838 724 'title': 'spamfish - Collections',
c5db6bb3 725 },
841b6838 726 'playlist_mincount': 3,
fce79626 727 }]
240b9b7a 728
841b6838
S
729 _OPERATION_NAME = 'ChannelCollectionsContent'
730 _ENTRY_KIND = 'collection'
731 _EDGE_KIND = 'CollectionsItemEdge'
732 _NODE_KIND = 'Collection'
733
734 @staticmethod
735 def _make_variables(channel_name):
736 return {
737 'ownerLogin': channel_name,
738 }
739
740 @staticmethod
741 def _extract_entry(node):
742 assert isinstance(node, dict)
743 collection_id = node.get('id')
744 if not collection_id:
745 return
746 return {
747 '_type': 'url_transparent',
748 'ie_key': TwitchCollectionIE.ie_key(),
749 'id': collection_id,
750 'url': 'https://www.twitch.tv/collections/%s' % collection_id,
751 'title': node.get('title'),
752 'thumbnail': node.get('thumbnailURL'),
753 'duration': float_or_none(node.get('lengthSeconds')),
754 'timestamp': unified_timestamp(node.get('updatedAt')),
755 'view_count': int_or_none(node.get('viewCount')),
756 }
757
758 def _real_extract(self, url):
759 channel_name = self._match_id(url)
760 return self.playlist_result(
761 self._entries(channel_name), playlist_id=channel_name,
762 playlist_title='%s - Collections' % channel_name)
763
240b9b7a 764
00dd0cd5 765class TwitchStreamIE(TwitchBaseIE):
240b9b7a 766 IE_NAME = 'twitch:stream'
9db8f6c5
S
767 _VALID_URL = r'''(?x)
768 https?://
769 (?:
fce79626 770 (?:(?:www|go|m)\.)?twitch\.tv/|
9db8f6c5
S
771 player\.twitch\.tv/\?.*?\bchannel=
772 )
773 (?P<id>[^/#?]+)
774 '''
240b9b7a 775
faa1f83a 776 _TESTS = [{
240b9b7a
S
777 'url': 'http://www.twitch.tv/shroomztv',
778 'info_dict': {
779 'id': '12772022048',
780 'display_id': 'shroomztv',
781 'ext': 'mp4',
782 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
783 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
784 'is_live': True,
785 'timestamp': 1421928037,
786 'upload_date': '20150122',
787 'uploader': 'ShroomzTV',
788 'uploader_id': 'shroomztv',
789 'view_count': int,
790 },
791 'params': {
792 # m3u8 download
793 'skip_download': True,
794 },
faa1f83a
S
795 }, {
796 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
797 'only_matching': True,
9db8f6c5
S
798 }, {
799 'url': 'https://player.twitch.tv/?channel=lotsofs',
800 'only_matching': True,
b763e1d6
S
801 }, {
802 'url': 'https://go.twitch.tv/food',
803 'only_matching': True,
fce79626
S
804 }, {
805 'url': 'https://m.twitch.tv/food',
806 'only_matching': True,
faa1f83a 807 }]
240b9b7a 808
9db8f6c5
S
809 @classmethod
810 def suitable(cls, url):
811 return (False
812 if any(ie.suitable(url) for ie in (
9db8f6c5 813 TwitchVodIE,
841b6838
S
814 TwitchCollectionIE,
815 TwitchVideosIE,
816 TwitchVideosClipsIE,
817 TwitchVideosCollectionsIE,
db348e88 818 TwitchClipsIE))
9db8f6c5
S
819 else super(TwitchStreamIE, cls).suitable(url))
820
240b9b7a 821 def _real_extract(self, url):
defc820b
S
822 channel_name = self._match_id(url).lower()
823
824 gql = self._download_gql(
825 channel_name, [{
826 'operationName': 'StreamMetadata',
827 'variables': {'channelLogin': channel_name},
828 }, {
829 'operationName': 'ComscoreStreamingQuery',
830 'variables': {
831 'channel': channel_name,
832 'clipSlug': '',
833 'isClip': False,
834 'isLive': True,
835 'isVodOrCollection': False,
836 'vodID': '',
837 },
838 }, {
839 'operationName': 'VideoPreviewOverlay',
840 'variables': {'login': channel_name},
841 }],
842 'Downloading stream GraphQL')
843
844 user = gql[0]['data']['user']
845
846 if not user:
847 raise ExtractorError(
848 '%s does not exist' % channel_name, expected=True)
240b9b7a 849
defc820b 850 stream = user['stream']
240b9b7a 851
240b9b7a 852 if not stream:
defc820b 853 raise ExtractorError('%s is offline' % channel_name, expected=True)
240b9b7a 854
00dd0cd5 855 access_token = self._download_access_token(
856 channel_name, 'stream', 'channelName')
857 token = access_token['value']
240b9b7a 858
defc820b 859 stream_id = stream.get('id') or channel_name
240b9b7a
S
860 query = {
861 'allow_source': 'true',
ac455055 862 'allow_audio_only': 'true',
9aa929d3 863 'allow_spectre': 'true',
f353cbdb 864 'p': random.randint(1000000, 10000000),
240b9b7a 865 'player': 'twitchweb',
1d31b7ca 866 'playlist_include_framerate': 'true',
240b9b7a 867 'segment_preference': '4',
00dd0cd5 868 'sig': access_token['signature'].encode('utf-8'),
ce3735df 869 'token': token.encode('utf-8'),
240b9b7a 870 }
240b9b7a 871 formats = self._extract_m3u8_formats(
defc820b
S
872 '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
873 stream_id, 'mp4', query=query)
d0e958c7 874 self._prefer_source(formats)
240b9b7a
S
875
876 view_count = stream.get('viewers')
defc820b 877 timestamp = unified_timestamp(stream.get('createdAt'))
240b9b7a 878
defc820b
S
879 sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
880 uploader = sq_user.get('displayName')
881 description = try_get(
882 sq_user, lambda x: x['broadcastSettings']['title'], compat_str)
240b9b7a 883
defc820b
S
884 thumbnail = url_or_none(try_get(
885 gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'],
886 compat_str))
887
888 title = uploader or channel_name
889 stream_type = stream.get('type')
890 if stream_type in ['rerun', 'live']:
891 title += ' (%s)' % stream_type
240b9b7a
S
892
893 return {
defc820b 894 'id': stream_id,
ce3735df 895 'display_id': channel_name,
39ca3b5c 896 'title': title,
240b9b7a 897 'description': description,
defc820b
S
898 'thumbnail': thumbnail,
899 'uploader': uploader,
900 'uploader_id': channel_name,
240b9b7a
S
901 'timestamp': timestamp,
902 'view_count': view_count,
903 'formats': formats,
defc820b 904 'is_live': stream_type == 'live',
12d1fb5a 905 }
778f9694
S
906
907
49fa7de3 908class TwitchClipsIE(TwitchBaseIE):
778f9694 909 IE_NAME = 'twitch:clips'
c9595ee7
FS
910 _VALID_URL = r'''(?x)
911 https?://
912 (?:
913 clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
914 (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
915 )
916 (?P<id>[^/?#&]+)
917 '''
778f9694 918
74ba450a 919 _TESTS = [{
49fa7de3 920 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
778f9694
S
921 'md5': '761769e1eafce0ffebfb4089cb3847cd',
922 'info_dict': {
49fa7de3 923 'id': '42850523',
678da2f2 924 'display_id': 'FaintLightGullWholeWheat',
778f9694
S
925 'ext': 'mp4',
926 'title': 'EA Play 2016 Live from the Novo Theatre',
ec85ded8 927 'thumbnail': r're:^https?://.*\.jpg',
49fa7de3
S
928 'timestamp': 1465767393,
929 'upload_date': '20160612',
778f9694
S
930 'creator': 'EA',
931 'uploader': 'stereotype_',
49fa7de3 932 'uploader_id': '43566419',
778f9694 933 },
74ba450a
S
934 }, {
935 # multiple formats
936 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
937 'only_matching': True,
db348e88
S
938 }, {
939 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
940 'only_matching': True,
0b16b3c2
RA
941 }, {
942 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
943 'only_matching': True,
c9595ee7
FS
944 }, {
945 'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
946 'only_matching': True,
947 }, {
948 'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
949 'only_matching': True,
74ba450a 950 }]
778f9694
S
951
952 def _real_extract(self, url):
953 video_id = self._match_id(url)
954
14eb1ee1 955 clip = self._download_gql(
956 video_id, [{
957 'operationName': 'VideoAccessToken_Clip',
958 'variables': {
959 'slug': video_id,
960 },
961 }],
962 'Downloading clip access token GraphQL')[0]['data']['clip']
963
964 if not clip:
965 raise ExtractorError(
966 'This clip is no longer available', expected=True)
967
968 access_query = {
969 'sig': clip['playbackAccessToken']['signature'],
970 'token': clip['playbackAccessToken']['value'],
971 }
972
973 data = self._download_base_gql(
00dd0cd5 974 video_id, {
232ed8e6
RA
975 'query': '''{
976 clip(slug: "%s") {
977 broadcaster {
978 displayName
979 }
980 createdAt
981 curator {
982 displayName
983 id
984 }
985 durationSeconds
986 id
987 tiny: thumbnailURL(width: 86, height: 45)
988 small: thumbnailURL(width: 260, height: 147)
989 medium: thumbnailURL(width: 480, height: 272)
990 title
991 videoQualities {
992 frameRate
993 quality
994 sourceURL
995 }
996 viewCount
997 }
14eb1ee1 998}''' % video_id}, 'Downloading clip GraphQL', fatal=False)
232ed8e6 999
14eb1ee1 1000 if data:
1001 clip = try_get(data, lambda x: x['data']['clip'], dict) or clip
778f9694 1002
49fa7de3 1003 formats = []
232ed8e6 1004 for option in clip.get('videoQualities', []):
49fa7de3
S
1005 if not isinstance(option, dict):
1006 continue
232ed8e6 1007 source = url_or_none(option.get('sourceURL'))
3052a30d 1008 if not source:
49fa7de3
S
1009 continue
1010 formats.append({
14eb1ee1 1011 'url': update_url_query(source, access_query),
49fa7de3
S
1012 'format_id': option.get('quality'),
1013 'height': int_or_none(option.get('quality')),
232ed8e6 1014 'fps': int_or_none(option.get('frameRate')),
49fa7de3 1015 })
07acdc5a
S
1016 self._sort_formats(formats)
1017
232ed8e6
RA
1018 thumbnails = []
1019 for thumbnail_id in ('tiny', 'small', 'medium'):
1020 thumbnail_url = clip.get(thumbnail_id)
1021 if not thumbnail_url:
1022 continue
1023 thumb = {
1024 'id': thumbnail_id,
1025 'url': thumbnail_url,
1026 }
1027 mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
1028 if mobj:
1029 thumb.update({
1030 'height': int(mobj.group(2)),
1031 'width': int(mobj.group(1)),
1032 })
1033 thumbnails.append(thumb)
1034
1035 return {
1036 'id': clip.get('id') or video_id,
678da2f2 1037 'display_id': video_id,
232ed8e6 1038 'title': clip.get('title') or video_id,
49fa7de3 1039 'formats': formats,
232ed8e6 1040 'duration': int_or_none(clip.get('durationSeconds')),
be8cd3cb 1041 'view_count': int_or_none(clip.get('viewCount')),
232ed8e6
RA
1042 'timestamp': unified_timestamp(clip.get('createdAt')),
1043 'thumbnails': thumbnails,
1044 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
1045 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
1046 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
49fa7de3 1047 }