]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitch.py
4a17d80489a38a5495cb31b15c028dd46d1e42cc
[yt-dlp.git] / yt_dlp / extractor / twitch.py
1 import collections
2 import itertools
3 import json
4 import random
5 import re
6
7 from .common import InfoExtractor
8 from ..compat import (
9 compat_parse_qs,
10 compat_str,
11 compat_urllib_parse_urlencode,
12 compat_urllib_parse_urlparse,
13 )
14 from ..utils import (
15 ExtractorError,
16 UserNotLive,
17 base_url,
18 clean_html,
19 dict_get,
20 float_or_none,
21 int_or_none,
22 make_archive_id,
23 parse_duration,
24 parse_iso8601,
25 parse_qs,
26 qualities,
27 str_or_none,
28 traverse_obj,
29 try_get,
30 unified_timestamp,
31 update_url_query,
32 url_or_none,
33 urljoin,
34 )
35
36
37 class TwitchBaseIE(InfoExtractor):
38 _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
39
40 _API_BASE = 'https://api.twitch.tv'
41 _USHER_BASE = 'https://usher.ttvnw.net'
42 _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
43 _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
44 _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
45 _NETRC_MACHINE = 'twitch'
46
47 _OPERATION_HASHES = {
48 'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
49 'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
50 'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
51 'ChannelCollectionsContent': '447aec6a0cc1e8d0a8d7732d47eb0762c336a2294fdb009e9c9d854e49d484b9',
52 'StreamMetadata': 'a647c2a13599e5991e175155f798ca7f1ecddde73f7f341f39009c14dbf59962',
53 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
54 'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
55 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
56 'VideoMetadata': '49b5b8f268cdeb259d75b58dcb0c1a748e3b575003448a2333dc5cdafd49adad',
57 'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41',
58 'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6',
59 }
60
61 def _perform_login(self, username, password):
62 def fail(message):
63 raise ExtractorError(
64 'Unable to login. Twitch said: %s' % message, expected=True)
65
66 def login_step(page, urlh, note, data):
67 form = self._hidden_inputs(page)
68 form.update(data)
69
70 page_url = urlh.geturl()
71 post_url = self._search_regex(
72 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
73 'post url', default=self._LOGIN_POST_URL, group='url')
74 post_url = urljoin(page_url, post_url)
75
76 headers = {
77 'Referer': page_url,
78 'Origin': 'https://www.twitch.tv',
79 'Content-Type': 'text/plain;charset=UTF-8',
80 }
81
82 response = self._download_json(
83 post_url, None, note, data=json.dumps(form).encode(),
84 headers=headers, expected_status=400)
85 error = dict_get(response, ('error', 'error_description', 'error_code'))
86 if error:
87 fail(error)
88
89 if 'Authenticated successfully' in response.get('message', ''):
90 return None, None
91
92 redirect_url = urljoin(
93 post_url,
94 response.get('redirect') or response['redirect_path'])
95 return self._download_webpage_handle(
96 redirect_url, None, 'Downloading login redirect page',
97 headers=headers)
98
99 login_page, handle = self._download_webpage_handle(
100 self._LOGIN_FORM_URL, None, 'Downloading login page')
101
102 # Some TOR nodes and public proxies are blocked completely
103 if 'blacklist_message' in login_page:
104 fail(clean_html(login_page))
105
106 redirect_page, handle = login_step(
107 login_page, handle, 'Logging in', {
108 'username': username,
109 'password': password,
110 'client_id': self._CLIENT_ID,
111 })
112
113 # Successful login
114 if not redirect_page:
115 return
116
117 if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
118 # TODO: Add mechanism to request an SMS or phone call
119 tfa_token = self._get_tfa_info('two-factor authentication token')
120 login_step(redirect_page, handle, 'Submitting TFA token', {
121 'authy_token': tfa_token,
122 'remember_2fa': 'true',
123 })
124
125 def _prefer_source(self, formats):
126 try:
127 source = next(f for f in formats if f['format_id'] == 'Source')
128 source['quality'] = 10
129 except StopIteration:
130 for f in formats:
131 if '/chunked/' in f['url']:
132 f.update({
133 'quality': 10,
134 'format_note': 'Source',
135 })
136
137 def _download_base_gql(self, video_id, ops, note, fatal=True):
138 headers = {
139 'Content-Type': 'text/plain;charset=UTF-8',
140 'Client-ID': self._CLIENT_ID,
141 }
142 gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
143 if gql_auth:
144 headers['Authorization'] = 'OAuth ' + gql_auth.value
145 return self._download_json(
146 'https://gql.twitch.tv/gql', video_id, note,
147 data=json.dumps(ops).encode(),
148 headers=headers, fatal=fatal)
149
150 def _download_gql(self, video_id, ops, note, fatal=True):
151 for op in ops:
152 op['extensions'] = {
153 'persistedQuery': {
154 'version': 1,
155 'sha256Hash': self._OPERATION_HASHES[op['operationName']],
156 }
157 }
158 return self._download_base_gql(video_id, ops, note)
159
160 def _download_access_token(self, video_id, token_kind, param_name):
161 method = '%sPlaybackAccessToken' % token_kind
162 ops = {
163 'query': '''{
164 %s(
165 %s: "%s",
166 params: {
167 platform: "web",
168 playerBackend: "mediaplayer",
169 playerType: "site"
170 }
171 )
172 {
173 value
174 signature
175 }
176 }''' % (method, param_name, video_id),
177 }
178 return self._download_base_gql(
179 video_id, ops,
180 'Downloading %s access token GraphQL' % token_kind)['data'][method]
181
182 def _get_thumbnails(self, thumbnail):
183 return [{
184 'url': re.sub(r'\d+x\d+(\.\w+)($|(?=[?#]))', r'0x0\g<1>', thumbnail),
185 'preference': 1,
186 }, {
187 'url': thumbnail,
188 }] if thumbnail else None
189
190
191 class TwitchVodIE(TwitchBaseIE):
192 IE_NAME = 'twitch:vod'
193 _VALID_URL = r'''(?x)
194 https?://
195 (?:
196 (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
197 player\.twitch\.tv/\?.*?\bvideo=v?|
198 www\.twitch\.tv/[^/]+/schedule\?vodID=
199 )
200 (?P<id>\d+)
201 '''
202
203 _TESTS = [{
204 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
205 'info_dict': {
206 'id': 'v6528877',
207 'ext': 'mp4',
208 'title': 'LCK Summer Split - Week 6 Day 1',
209 'thumbnail': r're:^https?://.*\.jpg$',
210 'duration': 17208,
211 'timestamp': 1435131734,
212 'upload_date': '20150624',
213 'uploader': 'Riot Games',
214 'uploader_id': 'riotgames',
215 'view_count': int,
216 'start_time': 310,
217 'chapters': [
218 {
219 'start_time': 0,
220 'end_time': 17208,
221 'title': 'League of Legends'
222 }
223 ],
224 'live_status': 'was_live',
225 },
226 'params': {
227 # m3u8 download
228 'skip_download': True,
229 },
230 }, {
231 # Untitled broadcast (title is None)
232 'url': 'http://www.twitch.tv/belkao_o/v/11230755',
233 'info_dict': {
234 'id': 'v11230755',
235 'ext': 'mp4',
236 'title': 'Untitled Broadcast',
237 'thumbnail': r're:^https?://.*\.jpg$',
238 'duration': 1638,
239 'timestamp': 1439746708,
240 'upload_date': '20150816',
241 'uploader': 'BelkAO_o',
242 'uploader_id': 'belkao_o',
243 'view_count': int,
244 },
245 'params': {
246 # m3u8 download
247 'skip_download': True,
248 },
249 'skip': 'HTTP Error 404: Not Found',
250 }, {
251 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
252 'only_matching': True,
253 }, {
254 'url': 'https://www.twitch.tv/videos/6528877',
255 'only_matching': True,
256 }, {
257 'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
258 'only_matching': True,
259 }, {
260 'url': 'https://www.twitch.tv/northernlion/video/291940395',
261 'only_matching': True,
262 }, {
263 'url': 'https://player.twitch.tv/?video=480452374',
264 'only_matching': True,
265 }, {
266 'url': 'https://www.twitch.tv/videos/635475444',
267 'info_dict': {
268 'id': 'v635475444',
269 'ext': 'mp4',
270 'title': 'Riot Games',
271 'duration': 11643,
272 'uploader': 'Riot Games',
273 'uploader_id': 'riotgames',
274 'timestamp': 1590770569,
275 'upload_date': '20200529',
276 'chapters': [
277 {
278 'start_time': 0,
279 'end_time': 573,
280 'title': 'League of Legends'
281 },
282 {
283 'start_time': 573,
284 'end_time': 3922,
285 'title': 'Legends of Runeterra'
286 },
287 {
288 'start_time': 3922,
289 'end_time': 11643,
290 'title': 'Art'
291 }
292 ],
293 'live_status': 'was_live',
294 'thumbnail': r're:^https?://.*\.jpg$',
295 'view_count': int,
296 },
297 'params': {
298 'skip_download': True
299 },
300 }, {
301 'note': 'Storyboards',
302 'url': 'https://www.twitch.tv/videos/635475444',
303 'info_dict': {
304 'id': 'v635475444',
305 'format_id': 'sb0',
306 'ext': 'mhtml',
307 'title': 'Riot Games',
308 'duration': 11643,
309 'uploader': 'Riot Games',
310 'uploader_id': 'riotgames',
311 'timestamp': 1590770569,
312 'upload_date': '20200529',
313 'chapters': [
314 {
315 'start_time': 0,
316 'end_time': 573,
317 'title': 'League of Legends'
318 },
319 {
320 'start_time': 573,
321 'end_time': 3922,
322 'title': 'Legends of Runeterra'
323 },
324 {
325 'start_time': 3922,
326 'end_time': 11643,
327 'title': 'Art'
328 }
329 ],
330 'live_status': 'was_live',
331 'thumbnail': r're:^https?://.*\.jpg$',
332 'view_count': int,
333 'columns': int,
334 'rows': int,
335 },
336 'params': {
337 'format': 'mhtml',
338 'skip_download': True
339 }
340 }, {
341 'note': 'VOD with single chapter',
342 'url': 'https://www.twitch.tv/videos/1536751224',
343 'info_dict': {
344 'id': 'v1536751224',
345 'ext': 'mp4',
346 'title': 'Porter Robinson Star Guardian Stream Tour with LilyPichu',
347 'duration': 8353,
348 'uploader': 'Riot Games',
349 'uploader_id': 'riotgames',
350 'timestamp': 1658267731,
351 'upload_date': '20220719',
352 'chapters': [
353 {
354 'start_time': 0,
355 'end_time': 8353,
356 'title': 'League of Legends'
357 }
358 ],
359 'live_status': 'was_live',
360 'thumbnail': r're:^https?://.*\.jpg$',
361 'view_count': int,
362 },
363 'params': {
364 'skip_download': True
365 },
366 'expected_warnings': ['Unable to download JSON metadata: HTTP Error 403: Forbidden']
367 }, {
368 'url': 'https://www.twitch.tv/tangotek/schedule?vodID=1822395420',
369 'only_matching': True,
370 }]
371
372 def _download_info(self, item_id):
373 data = self._download_gql(
374 item_id, [{
375 'operationName': 'VideoMetadata',
376 'variables': {
377 'channelLogin': '',
378 'videoID': item_id,
379 },
380 }, {
381 'operationName': 'VideoPlayer_ChapterSelectButtonVideo',
382 'variables': {
383 'includePrivate': False,
384 'videoID': item_id,
385 },
386 }, {
387 'operationName': 'VideoPlayer_VODSeekbarPreviewVideo',
388 'variables': {
389 'includePrivate': False,
390 'videoID': item_id,
391 },
392 }],
393 'Downloading stream metadata GraphQL')
394
395 video = traverse_obj(data, (..., 'data', 'video'), get_all=False)
396 if video is None:
397 raise ExtractorError(f'Video {item_id} does not exist', expected=True)
398
399 video['moments'] = traverse_obj(data, (..., 'data', 'video', 'moments', 'edges', ..., 'node'))
400 video['storyboard'] = traverse_obj(
401 data, (..., 'data', 'video', 'seekPreviewsURL', {url_or_none}), get_all=False)
402
403 return video
404
405 def _extract_info(self, info):
406 status = info.get('status')
407 if status == 'recording':
408 is_live = True
409 elif status == 'recorded':
410 is_live = False
411 else:
412 is_live = None
413 _QUALITIES = ('small', 'medium', 'large')
414 quality_key = qualities(_QUALITIES)
415 thumbnails = []
416 preview = info.get('preview')
417 if isinstance(preview, dict):
418 for thumbnail_id, thumbnail_url in preview.items():
419 thumbnail_url = url_or_none(thumbnail_url)
420 if not thumbnail_url:
421 continue
422 if thumbnail_id not in _QUALITIES:
423 continue
424 thumbnails.append({
425 'url': thumbnail_url,
426 'preference': quality_key(thumbnail_id),
427 })
428 return {
429 'id': info['_id'],
430 'title': info.get('title') or 'Untitled Broadcast',
431 'description': info.get('description'),
432 'duration': int_or_none(info.get('length')),
433 'thumbnails': thumbnails,
434 'uploader': info.get('channel', {}).get('display_name'),
435 'uploader_id': info.get('channel', {}).get('name'),
436 'timestamp': parse_iso8601(info.get('recorded_at')),
437 'view_count': int_or_none(info.get('views')),
438 'is_live': is_live,
439 'was_live': True,
440 }
441
442 def _extract_chapters(self, info, item_id):
443 if not info.get('moments'):
444 game = traverse_obj(info, ('game', 'displayName'))
445 if game:
446 yield {'title': game}
447 return
448
449 for moment in info['moments']:
450 start_time = int_or_none(moment.get('positionMilliseconds'), 1000)
451 duration = int_or_none(moment.get('durationMilliseconds'), 1000)
452 name = str_or_none(moment.get('description'))
453
454 if start_time is None or duration is None:
455 self.report_warning(f'Important chapter information missing for chapter {name}', item_id)
456 continue
457 yield {
458 'start_time': start_time,
459 'end_time': start_time + duration,
460 'title': name,
461 }
462
463 def _extract_info_gql(self, info, item_id):
464 vod_id = info.get('id') or item_id
465 # id backward compatibility for download archives
466 if vod_id[0] != 'v':
467 vod_id = 'v%s' % vod_id
468 thumbnail = url_or_none(info.get('previewThumbnailURL'))
469 is_live = None
470 if thumbnail:
471 if re.findall(r'/404_processing_[^.?#]+\.png', thumbnail):
472 is_live, thumbnail = True, None
473 else:
474 is_live = False
475
476 return {
477 'id': vod_id,
478 'title': info.get('title') or 'Untitled Broadcast',
479 'description': info.get('description'),
480 'duration': int_or_none(info.get('lengthSeconds')),
481 'thumbnails': self._get_thumbnails(thumbnail),
482 'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
483 'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
484 'timestamp': unified_timestamp(info.get('publishedAt')),
485 'view_count': int_or_none(info.get('viewCount')),
486 'chapters': list(self._extract_chapters(info, item_id)),
487 'is_live': is_live,
488 'was_live': True,
489 }
490
491 def _extract_storyboard(self, item_id, storyboard_json_url, duration):
492 if not duration or not storyboard_json_url:
493 return
494 spec = self._download_json(storyboard_json_url, item_id, 'Downloading storyboard metadata JSON', fatal=False) or []
495 # sort from highest quality to lowest
496 # This makes sb0 the highest-quality format, sb1 - lower, etc which is consistent with youtube sb ordering
497 spec.sort(key=lambda x: int_or_none(x.get('width')) or 0, reverse=True)
498 base = base_url(storyboard_json_url)
499 for i, s in enumerate(spec):
500 count = int_or_none(s.get('count'))
501 images = s.get('images')
502 if not (images and count):
503 continue
504 fragment_duration = duration / len(images)
505 yield {
506 'format_id': f'sb{i}',
507 'format_note': 'storyboard',
508 'ext': 'mhtml',
509 'protocol': 'mhtml',
510 'acodec': 'none',
511 'vcodec': 'none',
512 'url': urljoin(base, images[0]),
513 'width': int_or_none(s.get('width')),
514 'height': int_or_none(s.get('height')),
515 'fps': count / duration,
516 'rows': int_or_none(s.get('rows')),
517 'columns': int_or_none(s.get('cols')),
518 'fragments': [{
519 'url': urljoin(base, path),
520 'duration': fragment_duration,
521 } for path in images],
522 }
523
524 def _real_extract(self, url):
525 vod_id = self._match_id(url)
526
527 video = self._download_info(vod_id)
528 info = self._extract_info_gql(video, vod_id)
529 access_token = self._download_access_token(vod_id, 'video', 'id')
530
531 formats = self._extract_m3u8_formats(
532 '%s/vod/%s.m3u8?%s' % (
533 self._USHER_BASE, vod_id,
534 compat_urllib_parse_urlencode({
535 'allow_source': 'true',
536 'allow_audio_only': 'true',
537 'allow_spectre': 'true',
538 'player': 'twitchweb',
539 'playlist_include_framerate': 'true',
540 'nauth': access_token['value'],
541 'nauthsig': access_token['signature'],
542 })),
543 vod_id, 'mp4', entry_protocol='m3u8_native')
544
545 formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration')))
546
547 self._prefer_source(formats)
548 info['formats'] = formats
549
550 parsed_url = compat_urllib_parse_urlparse(url)
551 query = compat_parse_qs(parsed_url.query)
552 if 't' in query:
553 info['start_time'] = parse_duration(query['t'][0])
554
555 if info.get('timestamp') is not None:
556 info['subtitles'] = {
557 'rechat': [{
558 'url': update_url_query(
559 'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, {
560 'client_id': self._CLIENT_ID,
561 }),
562 'ext': 'json',
563 }],
564 }
565
566 return info
567
568
569 def _make_video_result(node):
570 assert isinstance(node, dict)
571 video_id = node.get('id')
572 if not video_id:
573 return
574 return {
575 '_type': 'url_transparent',
576 'ie_key': TwitchVodIE.ie_key(),
577 'id': 'v' + video_id,
578 'url': 'https://www.twitch.tv/videos/%s' % video_id,
579 'title': node.get('title'),
580 'thumbnail': node.get('previewThumbnailURL'),
581 'duration': float_or_none(node.get('lengthSeconds')),
582 'view_count': int_or_none(node.get('viewCount')),
583 }
584
585
586 class TwitchCollectionIE(TwitchBaseIE):
587 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
588
589 _TESTS = [{
590 'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
591 'info_dict': {
592 'id': 'wlDCoH0zEBZZbQ',
593 'title': 'Overthrow Nook, capitalism for children',
594 },
595 'playlist_mincount': 13,
596 }]
597
598 _OPERATION_NAME = 'CollectionSideBar'
599
600 def _real_extract(self, url):
601 collection_id = self._match_id(url)
602 collection = self._download_gql(
603 collection_id, [{
604 'operationName': self._OPERATION_NAME,
605 'variables': {'collectionID': collection_id},
606 }],
607 'Downloading collection GraphQL')[0]['data']['collection']
608 title = collection.get('title')
609 entries = []
610 for edge in collection['items']['edges']:
611 if not isinstance(edge, dict):
612 continue
613 node = edge.get('node')
614 if not isinstance(node, dict):
615 continue
616 video = _make_video_result(node)
617 if video:
618 entries.append(video)
619 return self.playlist_result(
620 entries, playlist_id=collection_id, playlist_title=title)
621
622
623 class TwitchPlaylistBaseIE(TwitchBaseIE):
624 _PAGE_LIMIT = 100
625
626 def _entries(self, channel_name, *args):
627 cursor = None
628 variables_common = self._make_variables(channel_name, *args)
629 entries_key = '%ss' % self._ENTRY_KIND
630 for page_num in itertools.count(1):
631 variables = variables_common.copy()
632 variables['limit'] = self._PAGE_LIMIT
633 if cursor:
634 variables['cursor'] = cursor
635 page = self._download_gql(
636 channel_name, [{
637 'operationName': self._OPERATION_NAME,
638 'variables': variables,
639 }],
640 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
641 fatal=False)
642 if not page:
643 break
644 edges = try_get(
645 page, lambda x: x[0]['data']['user'][entries_key]['edges'], list)
646 if not edges:
647 break
648 for edge in edges:
649 if not isinstance(edge, dict):
650 continue
651 if edge.get('__typename') != self._EDGE_KIND:
652 continue
653 node = edge.get('node')
654 if not isinstance(node, dict):
655 continue
656 if node.get('__typename') != self._NODE_KIND:
657 continue
658 entry = self._extract_entry(node)
659 if entry:
660 cursor = edge.get('cursor')
661 yield entry
662 if not cursor or not isinstance(cursor, compat_str):
663 break
664
665
666 class TwitchVideosIE(TwitchPlaylistBaseIE):
667 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
668
669 _TESTS = [{
670 # All Videos sorted by Date
671 'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
672 'info_dict': {
673 'id': 'spamfish',
674 'title': 'spamfish - All Videos sorted by Date',
675 },
676 'playlist_mincount': 924,
677 }, {
678 # All Videos sorted by Popular
679 'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
680 'info_dict': {
681 'id': 'spamfish',
682 'title': 'spamfish - All Videos sorted by Popular',
683 },
684 'playlist_mincount': 931,
685 }, {
686 # Past Broadcasts sorted by Date
687 'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
688 'info_dict': {
689 'id': 'spamfish',
690 'title': 'spamfish - Past Broadcasts sorted by Date',
691 },
692 'playlist_mincount': 27,
693 }, {
694 # Highlights sorted by Date
695 'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights',
696 'info_dict': {
697 'id': 'spamfish',
698 'title': 'spamfish - Highlights sorted by Date',
699 },
700 'playlist_mincount': 901,
701 }, {
702 # Uploads sorted by Date
703 'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
704 'info_dict': {
705 'id': 'esl_csgo',
706 'title': 'esl_csgo - Uploads sorted by Date',
707 },
708 'playlist_mincount': 5,
709 }, {
710 # Past Premieres sorted by Date
711 'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
712 'info_dict': {
713 'id': 'spamfish',
714 'title': 'spamfish - Past Premieres sorted by Date',
715 },
716 'playlist_mincount': 1,
717 }, {
718 'url': 'https://www.twitch.tv/spamfish/videos/all',
719 'only_matching': True,
720 }, {
721 'url': 'https://m.twitch.tv/spamfish/videos/all',
722 'only_matching': True,
723 }, {
724 'url': 'https://www.twitch.tv/spamfish/videos',
725 'only_matching': True,
726 }]
727
728 Broadcast = collections.namedtuple('Broadcast', ['type', 'label'])
729
730 _DEFAULT_BROADCAST = Broadcast(None, 'All Videos')
731 _BROADCASTS = {
732 'archives': Broadcast('ARCHIVE', 'Past Broadcasts'),
733 'highlights': Broadcast('HIGHLIGHT', 'Highlights'),
734 'uploads': Broadcast('UPLOAD', 'Uploads'),
735 'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'),
736 'all': _DEFAULT_BROADCAST,
737 }
738
739 _DEFAULT_SORTED_BY = 'Date'
740 _SORTED_BY = {
741 'time': _DEFAULT_SORTED_BY,
742 'views': 'Popular',
743 }
744
745 _OPERATION_NAME = 'FilterableVideoTower_Videos'
746 _ENTRY_KIND = 'video'
747 _EDGE_KIND = 'VideoEdge'
748 _NODE_KIND = 'Video'
749
750 @classmethod
751 def suitable(cls, url):
752 return (False
753 if any(ie.suitable(url) for ie in (
754 TwitchVideosClipsIE,
755 TwitchVideosCollectionsIE))
756 else super(TwitchVideosIE, cls).suitable(url))
757
758 @staticmethod
759 def _make_variables(channel_name, broadcast_type, sort):
760 return {
761 'channelOwnerLogin': channel_name,
762 'broadcastType': broadcast_type,
763 'videoSort': sort.upper(),
764 }
765
766 @staticmethod
767 def _extract_entry(node):
768 return _make_video_result(node)
769
770 def _real_extract(self, url):
771 channel_name = self._match_id(url)
772 qs = parse_qs(url)
773 filter = qs.get('filter', ['all'])[0]
774 sort = qs.get('sort', ['time'])[0]
775 broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
776 return self.playlist_result(
777 self._entries(channel_name, broadcast.type, sort),
778 playlist_id=channel_name,
779 playlist_title='%s - %s sorted by %s'
780 % (channel_name, broadcast.label,
781 self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)))
782
783
784 class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
785 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
786
787 _TESTS = [{
788 # Clips
789 'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
790 'info_dict': {
791 'id': 'vanillatv',
792 'title': 'vanillatv - Clips Top All',
793 },
794 'playlist_mincount': 1,
795 }, {
796 'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d',
797 'only_matching': True,
798 }]
799
800 Clip = collections.namedtuple('Clip', ['filter', 'label'])
801
802 _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D')
803 _RANGE = {
804 '24hr': Clip('LAST_DAY', 'Top 24H'),
805 '7d': _DEFAULT_CLIP,
806 '30d': Clip('LAST_MONTH', 'Top 30D'),
807 'all': Clip('ALL_TIME', 'Top All'),
808 }
809
810 # NB: values other than 20 result in skipped videos
811 _PAGE_LIMIT = 20
812
813 _OPERATION_NAME = 'ClipsCards__User'
814 _ENTRY_KIND = 'clip'
815 _EDGE_KIND = 'ClipEdge'
816 _NODE_KIND = 'Clip'
817
818 @staticmethod
819 def _make_variables(channel_name, filter):
820 return {
821 'login': channel_name,
822 'criteria': {
823 'filter': filter,
824 },
825 }
826
827 @staticmethod
828 def _extract_entry(node):
829 assert isinstance(node, dict)
830 clip_url = url_or_none(node.get('url'))
831 if not clip_url:
832 return
833 return {
834 '_type': 'url_transparent',
835 'ie_key': TwitchClipsIE.ie_key(),
836 'id': node.get('id'),
837 'url': clip_url,
838 'title': node.get('title'),
839 'thumbnail': node.get('thumbnailURL'),
840 'duration': float_or_none(node.get('durationSeconds')),
841 'timestamp': unified_timestamp(node.get('createdAt')),
842 'view_count': int_or_none(node.get('viewCount')),
843 'language': node.get('language'),
844 }
845
846 def _real_extract(self, url):
847 channel_name = self._match_id(url)
848 qs = parse_qs(url)
849 range = qs.get('range', ['7d'])[0]
850 clip = self._RANGE.get(range, self._DEFAULT_CLIP)
851 return self.playlist_result(
852 self._entries(channel_name, clip.filter),
853 playlist_id=channel_name,
854 playlist_title='%s - Clips %s' % (channel_name, clip.label))
855
856
857 class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
858 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
859
860 _TESTS = [{
861 # Collections
862 'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
863 'info_dict': {
864 'id': 'spamfish',
865 'title': 'spamfish - Collections',
866 },
867 'playlist_mincount': 3,
868 }, {
869 'url': 'https://www.twitch.tv/monstercat/videos?filter=collections',
870 'info_dict': {
871 'id': 'monstercat',
872 'title': 'monstercat - Collections',
873 },
874 'playlist_mincount': 13,
875 }]
876
877 _OPERATION_NAME = 'ChannelCollectionsContent'
878 _ENTRY_KIND = 'collection'
879 _EDGE_KIND = 'CollectionsItemEdge'
880 _NODE_KIND = 'Collection'
881
882 @staticmethod
883 def _make_variables(channel_name):
884 return {
885 'ownerLogin': channel_name,
886 }
887
888 @staticmethod
889 def _extract_entry(node):
890 assert isinstance(node, dict)
891 collection_id = node.get('id')
892 if not collection_id:
893 return
894 return {
895 '_type': 'url_transparent',
896 'ie_key': TwitchCollectionIE.ie_key(),
897 'id': collection_id,
898 'url': 'https://www.twitch.tv/collections/%s' % collection_id,
899 'title': node.get('title'),
900 'thumbnail': node.get('thumbnailURL'),
901 'duration': float_or_none(node.get('lengthSeconds')),
902 'timestamp': unified_timestamp(node.get('updatedAt')),
903 'view_count': int_or_none(node.get('viewCount')),
904 }
905
906 def _real_extract(self, url):
907 channel_name = self._match_id(url)
908 return self.playlist_result(
909 self._entries(channel_name), playlist_id=channel_name,
910 playlist_title='%s - Collections' % channel_name)
911
912
913 class TwitchStreamIE(TwitchBaseIE):
914 IE_NAME = 'twitch:stream'
915 _VALID_URL = r'''(?x)
916 https?://
917 (?:
918 (?:(?:www|go|m)\.)?twitch\.tv/|
919 player\.twitch\.tv/\?.*?\bchannel=
920 )
921 (?P<id>[^/#?]+)
922 '''
923
924 _TESTS = [{
925 'url': 'http://www.twitch.tv/shroomztv',
926 'info_dict': {
927 'id': '12772022048',
928 'display_id': 'shroomztv',
929 'ext': 'mp4',
930 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
931 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
932 'is_live': True,
933 'timestamp': 1421928037,
934 'upload_date': '20150122',
935 'uploader': 'ShroomzTV',
936 'uploader_id': 'shroomztv',
937 'view_count': int,
938 },
939 'params': {
940 # m3u8 download
941 'skip_download': True,
942 },
943 'skip': 'User does not exist',
944 }, {
945 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
946 'only_matching': True,
947 }, {
948 'url': 'https://player.twitch.tv/?channel=lotsofs',
949 'only_matching': True,
950 }, {
951 'url': 'https://go.twitch.tv/food',
952 'only_matching': True,
953 }, {
954 'url': 'https://m.twitch.tv/food',
955 'only_matching': True,
956 }, {
957 'url': 'https://www.twitch.tv/monstercat',
958 'info_dict': {
959 'id': '40500071752',
960 'display_id': 'monstercat',
961 'title': 're:Monstercat',
962 'description': 'md5:0945ad625e615bc8f0469396537d87d9',
963 'is_live': True,
964 'timestamp': 1677107190,
965 'upload_date': '20230222',
966 'uploader': 'Monstercat',
967 'uploader_id': 'monstercat',
968 'live_status': 'is_live',
969 'thumbnail': 're:https://.*.jpg',
970 'ext': 'mp4',
971 },
972 'params': {
973 'skip_download': 'Livestream',
974 },
975 }]
976
977 @classmethod
978 def suitable(cls, url):
979 return (False
980 if any(ie.suitable(url) for ie in (
981 TwitchVodIE,
982 TwitchCollectionIE,
983 TwitchVideosIE,
984 TwitchVideosClipsIE,
985 TwitchVideosCollectionsIE,
986 TwitchClipsIE))
987 else super(TwitchStreamIE, cls).suitable(url))
988
989 def _real_extract(self, url):
990 channel_name = self._match_id(url).lower()
991
992 gql = self._download_gql(
993 channel_name, [{
994 'operationName': 'StreamMetadata',
995 'variables': {'channelLogin': channel_name},
996 }, {
997 'operationName': 'ComscoreStreamingQuery',
998 'variables': {
999 'channel': channel_name,
1000 'clipSlug': '',
1001 'isClip': False,
1002 'isLive': True,
1003 'isVodOrCollection': False,
1004 'vodID': '',
1005 },
1006 }, {
1007 'operationName': 'VideoPreviewOverlay',
1008 'variables': {'login': channel_name},
1009 }],
1010 'Downloading stream GraphQL')
1011
1012 user = gql[0]['data']['user']
1013
1014 if not user:
1015 raise ExtractorError(
1016 '%s does not exist' % channel_name, expected=True)
1017
1018 stream = user['stream']
1019
1020 if not stream:
1021 raise UserNotLive(video_id=channel_name)
1022
1023 access_token = self._download_access_token(
1024 channel_name, 'stream', 'channelName')
1025 token = access_token['value']
1026
1027 stream_id = stream.get('id') or channel_name
1028 query = {
1029 'allow_source': 'true',
1030 'allow_audio_only': 'true',
1031 'allow_spectre': 'true',
1032 'p': random.randint(1000000, 10000000),
1033 'player': 'twitchweb',
1034 'playlist_include_framerate': 'true',
1035 'segment_preference': '4',
1036 'sig': access_token['signature'].encode('utf-8'),
1037 'token': token.encode('utf-8'),
1038 }
1039 formats = self._extract_m3u8_formats(
1040 '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
1041 stream_id, 'mp4', query=query)
1042 self._prefer_source(formats)
1043
1044 view_count = stream.get('viewers')
1045 timestamp = unified_timestamp(stream.get('createdAt'))
1046
1047 sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
1048 uploader = sq_user.get('displayName')
1049 description = try_get(
1050 sq_user, lambda x: x['broadcastSettings']['title'], compat_str)
1051
1052 thumbnail = url_or_none(try_get(
1053 gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'],
1054 compat_str))
1055
1056 title = uploader or channel_name
1057 stream_type = stream.get('type')
1058 if stream_type in ['rerun', 'live']:
1059 title += ' (%s)' % stream_type
1060
1061 return {
1062 'id': stream_id,
1063 'display_id': channel_name,
1064 'title': title,
1065 'description': description,
1066 'thumbnails': self._get_thumbnails(thumbnail),
1067 'uploader': uploader,
1068 'uploader_id': channel_name,
1069 'timestamp': timestamp,
1070 'view_count': view_count,
1071 'formats': formats,
1072 'is_live': stream_type == 'live',
1073 }
1074
1075
1076 class TwitchClipsIE(TwitchBaseIE):
1077 IE_NAME = 'twitch:clips'
1078 _VALID_URL = r'''(?x)
1079 https?://
1080 (?:
1081 clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
1082 (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/)?clip/
1083 )
1084 (?P<id>[^/?#&]+)
1085 '''
1086
1087 _TESTS = [{
1088 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
1089 'md5': '761769e1eafce0ffebfb4089cb3847cd',
1090 'info_dict': {
1091 'id': '42850523',
1092 'display_id': 'FaintLightGullWholeWheat',
1093 'ext': 'mp4',
1094 'title': 'EA Play 2016 Live from the Novo Theatre',
1095 'thumbnail': r're:^https?://.*\.jpg',
1096 'timestamp': 1465767393,
1097 'upload_date': '20160612',
1098 'creator': 'EA',
1099 'uploader': 'stereotype_',
1100 'uploader_id': '43566419',
1101 },
1102 }, {
1103 # multiple formats
1104 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
1105 'only_matching': True,
1106 }, {
1107 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
1108 'only_matching': True,
1109 }, {
1110 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
1111 'only_matching': True,
1112 }, {
1113 'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
1114 'only_matching': True,
1115 }, {
1116 'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
1117 'only_matching': True,
1118 }, {
1119 'url': 'https://m.twitch.tv/clip/FaintLightGullWholeWheat',
1120 'only_matching': True,
1121 }]
1122
1123 def _real_extract(self, url):
1124 video_id = self._match_id(url)
1125
1126 clip = self._download_gql(
1127 video_id, [{
1128 'operationName': 'VideoAccessToken_Clip',
1129 'variables': {
1130 'slug': video_id,
1131 },
1132 }],
1133 'Downloading clip access token GraphQL')[0]['data']['clip']
1134
1135 if not clip:
1136 raise ExtractorError(
1137 'This clip is no longer available', expected=True)
1138
1139 access_query = {
1140 'sig': clip['playbackAccessToken']['signature'],
1141 'token': clip['playbackAccessToken']['value'],
1142 }
1143
1144 data = self._download_base_gql(
1145 video_id, {
1146 'query': '''{
1147 clip(slug: "%s") {
1148 broadcaster {
1149 displayName
1150 }
1151 createdAt
1152 curator {
1153 displayName
1154 id
1155 }
1156 durationSeconds
1157 id
1158 tiny: thumbnailURL(width: 86, height: 45)
1159 small: thumbnailURL(width: 260, height: 147)
1160 medium: thumbnailURL(width: 480, height: 272)
1161 title
1162 videoQualities {
1163 frameRate
1164 quality
1165 sourceURL
1166 }
1167 viewCount
1168 }
1169 }''' % video_id}, 'Downloading clip GraphQL', fatal=False)
1170
1171 if data:
1172 clip = try_get(data, lambda x: x['data']['clip'], dict) or clip
1173
1174 formats = []
1175 for option in clip.get('videoQualities', []):
1176 if not isinstance(option, dict):
1177 continue
1178 source = url_or_none(option.get('sourceURL'))
1179 if not source:
1180 continue
1181 formats.append({
1182 'url': update_url_query(source, access_query),
1183 'format_id': option.get('quality'),
1184 'height': int_or_none(option.get('quality')),
1185 'fps': int_or_none(option.get('frameRate')),
1186 })
1187
1188 thumbnails = []
1189 for thumbnail_id in ('tiny', 'small', 'medium'):
1190 thumbnail_url = clip.get(thumbnail_id)
1191 if not thumbnail_url:
1192 continue
1193 thumb = {
1194 'id': thumbnail_id,
1195 'url': thumbnail_url,
1196 }
1197 mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
1198 if mobj:
1199 thumb.update({
1200 'height': int(mobj.group(2)),
1201 'width': int(mobj.group(1)),
1202 })
1203 thumbnails.append(thumb)
1204
1205 old_id = self._search_regex(r'%7C(\d+)(?:-\d+)?.mp4', formats[-1]['url'], 'old id', default=None)
1206
1207 return {
1208 'id': clip.get('id') or video_id,
1209 '_old_archive_ids': [make_archive_id(self, old_id)] if old_id else None,
1210 'display_id': video_id,
1211 'title': clip.get('title'),
1212 'formats': formats,
1213 'duration': int_or_none(clip.get('durationSeconds')),
1214 'view_count': int_or_none(clip.get('viewCount')),
1215 'timestamp': unified_timestamp(clip.get('createdAt')),
1216 'thumbnails': thumbnails,
1217 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
1218 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
1219 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
1220 }