]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitch.py
[reddit] Prevent infinite loop
[yt-dlp.git] / yt_dlp / extractor / twitch.py
1 import collections
2 import itertools
3 import json
4 import random
5 import re
6
7 from .common import InfoExtractor
8 from ..compat import (
9 compat_parse_qs,
10 compat_str,
11 compat_urllib_parse_urlencode,
12 compat_urllib_parse_urlparse,
13 )
14 from ..utils import (
15 clean_html,
16 dict_get,
17 ExtractorError,
18 float_or_none,
19 int_or_none,
20 parse_duration,
21 parse_iso8601,
22 parse_qs,
23 qualities,
24 str_or_none,
25 traverse_obj,
26 try_get,
27 unified_timestamp,
28 update_url_query,
29 url_or_none,
30 urljoin,
31 )
32
33
34 class TwitchBaseIE(InfoExtractor):
35 _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
36
37 _API_BASE = 'https://api.twitch.tv'
38 _USHER_BASE = 'https://usher.ttvnw.net'
39 _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
40 _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
41 _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
42 _NETRC_MACHINE = 'twitch'
43
44 _OPERATION_HASHES = {
45 'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
46 'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
47 'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
48 'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
49 'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
50 'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
51 'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
52 'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
53 'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
54 'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41',
55 }
56
57 def _perform_login(self, username, password):
58 def fail(message):
59 raise ExtractorError(
60 'Unable to login. Twitch said: %s' % message, expected=True)
61
62 def login_step(page, urlh, note, data):
63 form = self._hidden_inputs(page)
64 form.update(data)
65
66 page_url = urlh.geturl()
67 post_url = self._search_regex(
68 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
69 'post url', default=self._LOGIN_POST_URL, group='url')
70 post_url = urljoin(page_url, post_url)
71
72 headers = {
73 'Referer': page_url,
74 'Origin': 'https://www.twitch.tv',
75 'Content-Type': 'text/plain;charset=UTF-8',
76 }
77
78 response = self._download_json(
79 post_url, None, note, data=json.dumps(form).encode(),
80 headers=headers, expected_status=400)
81 error = dict_get(response, ('error', 'error_description', 'error_code'))
82 if error:
83 fail(error)
84
85 if 'Authenticated successfully' in response.get('message', ''):
86 return None, None
87
88 redirect_url = urljoin(
89 post_url,
90 response.get('redirect') or response['redirect_path'])
91 return self._download_webpage_handle(
92 redirect_url, None, 'Downloading login redirect page',
93 headers=headers)
94
95 login_page, handle = self._download_webpage_handle(
96 self._LOGIN_FORM_URL, None, 'Downloading login page')
97
98 # Some TOR nodes and public proxies are blocked completely
99 if 'blacklist_message' in login_page:
100 fail(clean_html(login_page))
101
102 redirect_page, handle = login_step(
103 login_page, handle, 'Logging in', {
104 'username': username,
105 'password': password,
106 'client_id': self._CLIENT_ID,
107 })
108
109 # Successful login
110 if not redirect_page:
111 return
112
113 if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
114 # TODO: Add mechanism to request an SMS or phone call
115 tfa_token = self._get_tfa_info('two-factor authentication token')
116 login_step(redirect_page, handle, 'Submitting TFA token', {
117 'authy_token': tfa_token,
118 'remember_2fa': 'true',
119 })
120
121 def _prefer_source(self, formats):
122 try:
123 source = next(f for f in formats if f['format_id'] == 'Source')
124 source['quality'] = 10
125 except StopIteration:
126 for f in formats:
127 if '/chunked/' in f['url']:
128 f.update({
129 'quality': 10,
130 'format_note': 'Source',
131 })
132 self._sort_formats(formats)
133
134 def _download_base_gql(self, video_id, ops, note, fatal=True):
135 headers = {
136 'Content-Type': 'text/plain;charset=UTF-8',
137 'Client-ID': self._CLIENT_ID,
138 }
139 gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
140 if gql_auth:
141 headers['Authorization'] = 'OAuth ' + gql_auth.value
142 return self._download_json(
143 'https://gql.twitch.tv/gql', video_id, note,
144 data=json.dumps(ops).encode(),
145 headers=headers, fatal=fatal)
146
147 def _download_gql(self, video_id, ops, note, fatal=True):
148 for op in ops:
149 op['extensions'] = {
150 'persistedQuery': {
151 'version': 1,
152 'sha256Hash': self._OPERATION_HASHES[op['operationName']],
153 }
154 }
155 return self._download_base_gql(video_id, ops, note)
156
157 def _download_access_token(self, video_id, token_kind, param_name):
158 method = '%sPlaybackAccessToken' % token_kind
159 ops = {
160 'query': '''{
161 %s(
162 %s: "%s",
163 params: {
164 platform: "web",
165 playerBackend: "mediaplayer",
166 playerType: "site"
167 }
168 )
169 {
170 value
171 signature
172 }
173 }''' % (method, param_name, video_id),
174 }
175 return self._download_base_gql(
176 video_id, ops,
177 'Downloading %s access token GraphQL' % token_kind)['data'][method]
178
179
180 class TwitchVodIE(TwitchBaseIE):
181 IE_NAME = 'twitch:vod'
182 _VALID_URL = r'''(?x)
183 https?://
184 (?:
185 (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
186 player\.twitch\.tv/\?.*?\bvideo=v?
187 )
188 (?P<id>\d+)
189 '''
190
191 _TESTS = [{
192 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
193 'info_dict': {
194 'id': 'v6528877',
195 'ext': 'mp4',
196 'title': 'LCK Summer Split - Week 6 Day 1',
197 'thumbnail': r're:^https?://.*\.jpg$',
198 'duration': 17208,
199 'timestamp': 1435131734,
200 'upload_date': '20150624',
201 'uploader': 'Riot Games',
202 'uploader_id': 'riotgames',
203 'view_count': int,
204 'start_time': 310,
205 },
206 'params': {
207 # m3u8 download
208 'skip_download': True,
209 },
210 }, {
211 # Untitled broadcast (title is None)
212 'url': 'http://www.twitch.tv/belkao_o/v/11230755',
213 'info_dict': {
214 'id': 'v11230755',
215 'ext': 'mp4',
216 'title': 'Untitled Broadcast',
217 'thumbnail': r're:^https?://.*\.jpg$',
218 'duration': 1638,
219 'timestamp': 1439746708,
220 'upload_date': '20150816',
221 'uploader': 'BelkAO_o',
222 'uploader_id': 'belkao_o',
223 'view_count': int,
224 },
225 'params': {
226 # m3u8 download
227 'skip_download': True,
228 },
229 'skip': 'HTTP Error 404: Not Found',
230 }, {
231 'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
232 'only_matching': True,
233 }, {
234 'url': 'https://www.twitch.tv/videos/6528877',
235 'only_matching': True,
236 }, {
237 'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
238 'only_matching': True,
239 }, {
240 'url': 'https://www.twitch.tv/northernlion/video/291940395',
241 'only_matching': True,
242 }, {
243 'url': 'https://player.twitch.tv/?video=480452374',
244 'only_matching': True,
245 }, {
246 'url': 'https://www.twitch.tv/videos/635475444',
247 'info_dict': {
248 'id': 'v635475444',
249 'ext': 'mp4',
250 'title': 'Riot Games',
251 'duration': 11643,
252 'uploader': 'Riot Games',
253 'uploader_id': 'riotgames',
254 'timestamp': 1590770569,
255 'upload_date': '20200529',
256 'chapters': [
257 {
258 'start_time': 0,
259 'end_time': 573,
260 'title': 'League of Legends'
261 },
262 {
263 'start_time': 573,
264 'end_time': 3922,
265 'title': 'Legends of Runeterra'
266 },
267 {
268 'start_time': 3922,
269 'end_time': 11643,
270 'title': 'Art'
271 }
272 ],
273 },
274 'params': {
275 'skip_download': True
276 }
277 }]
278
279 def _download_info(self, item_id):
280 data = self._download_gql(
281 item_id, [{
282 'operationName': 'VideoMetadata',
283 'variables': {
284 'channelLogin': '',
285 'videoID': item_id,
286 },
287 }, {
288 'operationName': 'VideoPlayer_ChapterSelectButtonVideo',
289 'variables': {
290 'includePrivate': False,
291 'videoID': item_id,
292 },
293 }],
294 'Downloading stream metadata GraphQL')
295
296 video = traverse_obj(data, (0, 'data', 'video'))
297 video['moments'] = traverse_obj(data, (1, 'data', 'video', 'moments', 'edges', ..., 'node'))
298
299 if video is None:
300 raise ExtractorError(
301 'Video %s does not exist' % item_id, expected=True)
302 return self._extract_info_gql(video, item_id)
303
304 def _extract_info(self, info):
305 status = info.get('status')
306 if status == 'recording':
307 is_live = True
308 elif status == 'recorded':
309 is_live = False
310 else:
311 is_live = None
312 _QUALITIES = ('small', 'medium', 'large')
313 quality_key = qualities(_QUALITIES)
314 thumbnails = []
315 preview = info.get('preview')
316 if isinstance(preview, dict):
317 for thumbnail_id, thumbnail_url in preview.items():
318 thumbnail_url = url_or_none(thumbnail_url)
319 if not thumbnail_url:
320 continue
321 if thumbnail_id not in _QUALITIES:
322 continue
323 thumbnails.append({
324 'url': thumbnail_url,
325 'preference': quality_key(thumbnail_id),
326 })
327 return {
328 'id': info['_id'],
329 'title': info.get('title') or 'Untitled Broadcast',
330 'description': info.get('description'),
331 'duration': int_or_none(info.get('length')),
332 'thumbnails': thumbnails,
333 'uploader': info.get('channel', {}).get('display_name'),
334 'uploader_id': info.get('channel', {}).get('name'),
335 'timestamp': parse_iso8601(info.get('recorded_at')),
336 'view_count': int_or_none(info.get('views')),
337 'is_live': is_live,
338 'was_live': True,
339 }
340
341 def _extract_moments(self, info, item_id):
342 for moment in info.get('moments') or []:
343 start_time = int_or_none(moment.get('positionMilliseconds'), 1000)
344 duration = int_or_none(moment.get('durationMilliseconds'), 1000)
345 name = str_or_none(moment.get('description'))
346
347 if start_time is None or duration is None:
348 self.report_warning(f'Important chapter information missing for chapter {name}', item_id)
349 continue
350 yield {
351 'start_time': start_time,
352 'end_time': start_time + duration,
353 'title': name,
354 }
355
356 def _extract_info_gql(self, info, item_id):
357 vod_id = info.get('id') or item_id
358 # id backward compatibility for download archives
359 if vod_id[0] != 'v':
360 vod_id = 'v%s' % vod_id
361 thumbnail = url_or_none(info.get('previewThumbnailURL'))
362 is_live = None
363 if thumbnail:
364 if thumbnail.endswith('/404_processing_{width}x{height}.png'):
365 is_live, thumbnail = True, None
366 else:
367 is_live = False
368 for p in ('width', 'height'):
369 thumbnail = thumbnail.replace('{%s}' % p, '0')
370
371 return {
372 'id': vod_id,
373 'title': info.get('title') or 'Untitled Broadcast',
374 'description': info.get('description'),
375 'duration': int_or_none(info.get('lengthSeconds')),
376 'thumbnail': thumbnail,
377 'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
378 'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
379 'timestamp': unified_timestamp(info.get('publishedAt')),
380 'view_count': int_or_none(info.get('viewCount')),
381 'chapters': list(self._extract_moments(info, item_id)),
382 'is_live': is_live,
383 'was_live': True,
384 }
385
386 def _real_extract(self, url):
387 vod_id = self._match_id(url)
388
389 info = self._download_info(vod_id)
390 access_token = self._download_access_token(vod_id, 'video', 'id')
391
392 formats = self._extract_m3u8_formats(
393 '%s/vod/%s.m3u8?%s' % (
394 self._USHER_BASE, vod_id,
395 compat_urllib_parse_urlencode({
396 'allow_source': 'true',
397 'allow_audio_only': 'true',
398 'allow_spectre': 'true',
399 'player': 'twitchweb',
400 'playlist_include_framerate': 'true',
401 'nauth': access_token['value'],
402 'nauthsig': access_token['signature'],
403 })),
404 vod_id, 'mp4', entry_protocol='m3u8_native')
405
406 self._prefer_source(formats)
407 info['formats'] = formats
408
409 parsed_url = compat_urllib_parse_urlparse(url)
410 query = compat_parse_qs(parsed_url.query)
411 if 't' in query:
412 info['start_time'] = parse_duration(query['t'][0])
413
414 if info.get('timestamp') is not None:
415 info['subtitles'] = {
416 'rechat': [{
417 'url': update_url_query(
418 'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, {
419 'client_id': self._CLIENT_ID,
420 }),
421 'ext': 'json',
422 }],
423 }
424
425 return info
426
427
428 def _make_video_result(node):
429 assert isinstance(node, dict)
430 video_id = node.get('id')
431 if not video_id:
432 return
433 return {
434 '_type': 'url_transparent',
435 'ie_key': TwitchVodIE.ie_key(),
436 'id': 'v' + video_id,
437 'url': 'https://www.twitch.tv/videos/%s' % video_id,
438 'title': node.get('title'),
439 'thumbnail': node.get('previewThumbnailURL'),
440 'duration': float_or_none(node.get('lengthSeconds')),
441 'view_count': int_or_none(node.get('viewCount')),
442 }
443
444
445 class TwitchCollectionIE(TwitchBaseIE):
446 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
447
448 _TESTS = [{
449 'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
450 'info_dict': {
451 'id': 'wlDCoH0zEBZZbQ',
452 'title': 'Overthrow Nook, capitalism for children',
453 },
454 'playlist_mincount': 13,
455 }]
456
457 _OPERATION_NAME = 'CollectionSideBar'
458
459 def _real_extract(self, url):
460 collection_id = self._match_id(url)
461 collection = self._download_gql(
462 collection_id, [{
463 'operationName': self._OPERATION_NAME,
464 'variables': {'collectionID': collection_id},
465 }],
466 'Downloading collection GraphQL')[0]['data']['collection']
467 title = collection.get('title')
468 entries = []
469 for edge in collection['items']['edges']:
470 if not isinstance(edge, dict):
471 continue
472 node = edge.get('node')
473 if not isinstance(node, dict):
474 continue
475 video = _make_video_result(node)
476 if video:
477 entries.append(video)
478 return self.playlist_result(
479 entries, playlist_id=collection_id, playlist_title=title)
480
481
482 class TwitchPlaylistBaseIE(TwitchBaseIE):
483 _PAGE_LIMIT = 100
484
485 def _entries(self, channel_name, *args):
486 cursor = None
487 variables_common = self._make_variables(channel_name, *args)
488 entries_key = '%ss' % self._ENTRY_KIND
489 for page_num in itertools.count(1):
490 variables = variables_common.copy()
491 variables['limit'] = self._PAGE_LIMIT
492 if cursor:
493 variables['cursor'] = cursor
494 page = self._download_gql(
495 channel_name, [{
496 'operationName': self._OPERATION_NAME,
497 'variables': variables,
498 }],
499 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
500 fatal=False)
501 if not page:
502 break
503 edges = try_get(
504 page, lambda x: x[0]['data']['user'][entries_key]['edges'], list)
505 if not edges:
506 break
507 for edge in edges:
508 if not isinstance(edge, dict):
509 continue
510 if edge.get('__typename') != self._EDGE_KIND:
511 continue
512 node = edge.get('node')
513 if not isinstance(node, dict):
514 continue
515 if node.get('__typename') != self._NODE_KIND:
516 continue
517 entry = self._extract_entry(node)
518 if entry:
519 cursor = edge.get('cursor')
520 yield entry
521 if not cursor or not isinstance(cursor, compat_str):
522 break
523
524
525 class TwitchVideosIE(TwitchPlaylistBaseIE):
526 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
527
528 _TESTS = [{
529 # All Videos sorted by Date
530 'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
531 'info_dict': {
532 'id': 'spamfish',
533 'title': 'spamfish - All Videos sorted by Date',
534 },
535 'playlist_mincount': 924,
536 }, {
537 # All Videos sorted by Popular
538 'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
539 'info_dict': {
540 'id': 'spamfish',
541 'title': 'spamfish - All Videos sorted by Popular',
542 },
543 'playlist_mincount': 931,
544 }, {
545 # Past Broadcasts sorted by Date
546 'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
547 'info_dict': {
548 'id': 'spamfish',
549 'title': 'spamfish - Past Broadcasts sorted by Date',
550 },
551 'playlist_mincount': 27,
552 }, {
553 # Highlights sorted by Date
554 'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights',
555 'info_dict': {
556 'id': 'spamfish',
557 'title': 'spamfish - Highlights sorted by Date',
558 },
559 'playlist_mincount': 901,
560 }, {
561 # Uploads sorted by Date
562 'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
563 'info_dict': {
564 'id': 'esl_csgo',
565 'title': 'esl_csgo - Uploads sorted by Date',
566 },
567 'playlist_mincount': 5,
568 }, {
569 # Past Premieres sorted by Date
570 'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
571 'info_dict': {
572 'id': 'spamfish',
573 'title': 'spamfish - Past Premieres sorted by Date',
574 },
575 'playlist_mincount': 1,
576 }, {
577 'url': 'https://www.twitch.tv/spamfish/videos/all',
578 'only_matching': True,
579 }, {
580 'url': 'https://m.twitch.tv/spamfish/videos/all',
581 'only_matching': True,
582 }, {
583 'url': 'https://www.twitch.tv/spamfish/videos',
584 'only_matching': True,
585 }]
586
587 Broadcast = collections.namedtuple('Broadcast', ['type', 'label'])
588
589 _DEFAULT_BROADCAST = Broadcast(None, 'All Videos')
590 _BROADCASTS = {
591 'archives': Broadcast('ARCHIVE', 'Past Broadcasts'),
592 'highlights': Broadcast('HIGHLIGHT', 'Highlights'),
593 'uploads': Broadcast('UPLOAD', 'Uploads'),
594 'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'),
595 'all': _DEFAULT_BROADCAST,
596 }
597
598 _DEFAULT_SORTED_BY = 'Date'
599 _SORTED_BY = {
600 'time': _DEFAULT_SORTED_BY,
601 'views': 'Popular',
602 }
603
604 _OPERATION_NAME = 'FilterableVideoTower_Videos'
605 _ENTRY_KIND = 'video'
606 _EDGE_KIND = 'VideoEdge'
607 _NODE_KIND = 'Video'
608
609 @classmethod
610 def suitable(cls, url):
611 return (False
612 if any(ie.suitable(url) for ie in (
613 TwitchVideosClipsIE,
614 TwitchVideosCollectionsIE))
615 else super(TwitchVideosIE, cls).suitable(url))
616
617 @staticmethod
618 def _make_variables(channel_name, broadcast_type, sort):
619 return {
620 'channelOwnerLogin': channel_name,
621 'broadcastType': broadcast_type,
622 'videoSort': sort.upper(),
623 }
624
625 @staticmethod
626 def _extract_entry(node):
627 return _make_video_result(node)
628
629 def _real_extract(self, url):
630 channel_name = self._match_id(url)
631 qs = parse_qs(url)
632 filter = qs.get('filter', ['all'])[0]
633 sort = qs.get('sort', ['time'])[0]
634 broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
635 return self.playlist_result(
636 self._entries(channel_name, broadcast.type, sort),
637 playlist_id=channel_name,
638 playlist_title='%s - %s sorted by %s'
639 % (channel_name, broadcast.label,
640 self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)))
641
642
643 class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
644 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
645
646 _TESTS = [{
647 # Clips
648 'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
649 'info_dict': {
650 'id': 'vanillatv',
651 'title': 'vanillatv - Clips Top All',
652 },
653 'playlist_mincount': 1,
654 }, {
655 'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d',
656 'only_matching': True,
657 }]
658
659 Clip = collections.namedtuple('Clip', ['filter', 'label'])
660
661 _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D')
662 _RANGE = {
663 '24hr': Clip('LAST_DAY', 'Top 24H'),
664 '7d': _DEFAULT_CLIP,
665 '30d': Clip('LAST_MONTH', 'Top 30D'),
666 'all': Clip('ALL_TIME', 'Top All'),
667 }
668
669 # NB: values other than 20 result in skipped videos
670 _PAGE_LIMIT = 20
671
672 _OPERATION_NAME = 'ClipsCards__User'
673 _ENTRY_KIND = 'clip'
674 _EDGE_KIND = 'ClipEdge'
675 _NODE_KIND = 'Clip'
676
677 @staticmethod
678 def _make_variables(channel_name, filter):
679 return {
680 'login': channel_name,
681 'criteria': {
682 'filter': filter,
683 },
684 }
685
686 @staticmethod
687 def _extract_entry(node):
688 assert isinstance(node, dict)
689 clip_url = url_or_none(node.get('url'))
690 if not clip_url:
691 return
692 return {
693 '_type': 'url_transparent',
694 'ie_key': TwitchClipsIE.ie_key(),
695 'id': node.get('id'),
696 'url': clip_url,
697 'title': node.get('title'),
698 'thumbnail': node.get('thumbnailURL'),
699 'duration': float_or_none(node.get('durationSeconds')),
700 'timestamp': unified_timestamp(node.get('createdAt')),
701 'view_count': int_or_none(node.get('viewCount')),
702 'language': node.get('language'),
703 }
704
705 def _real_extract(self, url):
706 channel_name = self._match_id(url)
707 qs = parse_qs(url)
708 range = qs.get('range', ['7d'])[0]
709 clip = self._RANGE.get(range, self._DEFAULT_CLIP)
710 return self.playlist_result(
711 self._entries(channel_name, clip.filter),
712 playlist_id=channel_name,
713 playlist_title='%s - Clips %s' % (channel_name, clip.label))
714
715
716 class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
717 _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
718
719 _TESTS = [{
720 # Collections
721 'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
722 'info_dict': {
723 'id': 'spamfish',
724 'title': 'spamfish - Collections',
725 },
726 'playlist_mincount': 3,
727 }]
728
729 _OPERATION_NAME = 'ChannelCollectionsContent'
730 _ENTRY_KIND = 'collection'
731 _EDGE_KIND = 'CollectionsItemEdge'
732 _NODE_KIND = 'Collection'
733
734 @staticmethod
735 def _make_variables(channel_name):
736 return {
737 'ownerLogin': channel_name,
738 }
739
740 @staticmethod
741 def _extract_entry(node):
742 assert isinstance(node, dict)
743 collection_id = node.get('id')
744 if not collection_id:
745 return
746 return {
747 '_type': 'url_transparent',
748 'ie_key': TwitchCollectionIE.ie_key(),
749 'id': collection_id,
750 'url': 'https://www.twitch.tv/collections/%s' % collection_id,
751 'title': node.get('title'),
752 'thumbnail': node.get('thumbnailURL'),
753 'duration': float_or_none(node.get('lengthSeconds')),
754 'timestamp': unified_timestamp(node.get('updatedAt')),
755 'view_count': int_or_none(node.get('viewCount')),
756 }
757
758 def _real_extract(self, url):
759 channel_name = self._match_id(url)
760 return self.playlist_result(
761 self._entries(channel_name), playlist_id=channel_name,
762 playlist_title='%s - Collections' % channel_name)
763
764
765 class TwitchStreamIE(TwitchBaseIE):
766 IE_NAME = 'twitch:stream'
767 _VALID_URL = r'''(?x)
768 https?://
769 (?:
770 (?:(?:www|go|m)\.)?twitch\.tv/|
771 player\.twitch\.tv/\?.*?\bchannel=
772 )
773 (?P<id>[^/#?]+)
774 '''
775
776 _TESTS = [{
777 'url': 'http://www.twitch.tv/shroomztv',
778 'info_dict': {
779 'id': '12772022048',
780 'display_id': 'shroomztv',
781 'ext': 'mp4',
782 'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
783 'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
784 'is_live': True,
785 'timestamp': 1421928037,
786 'upload_date': '20150122',
787 'uploader': 'ShroomzTV',
788 'uploader_id': 'shroomztv',
789 'view_count': int,
790 },
791 'params': {
792 # m3u8 download
793 'skip_download': True,
794 },
795 }, {
796 'url': 'http://www.twitch.tv/miracle_doto#profile-0',
797 'only_matching': True,
798 }, {
799 'url': 'https://player.twitch.tv/?channel=lotsofs',
800 'only_matching': True,
801 }, {
802 'url': 'https://go.twitch.tv/food',
803 'only_matching': True,
804 }, {
805 'url': 'https://m.twitch.tv/food',
806 'only_matching': True,
807 }]
808
809 @classmethod
810 def suitable(cls, url):
811 return (False
812 if any(ie.suitable(url) for ie in (
813 TwitchVodIE,
814 TwitchCollectionIE,
815 TwitchVideosIE,
816 TwitchVideosClipsIE,
817 TwitchVideosCollectionsIE,
818 TwitchClipsIE))
819 else super(TwitchStreamIE, cls).suitable(url))
820
821 def _real_extract(self, url):
822 channel_name = self._match_id(url).lower()
823
824 gql = self._download_gql(
825 channel_name, [{
826 'operationName': 'StreamMetadata',
827 'variables': {'channelLogin': channel_name},
828 }, {
829 'operationName': 'ComscoreStreamingQuery',
830 'variables': {
831 'channel': channel_name,
832 'clipSlug': '',
833 'isClip': False,
834 'isLive': True,
835 'isVodOrCollection': False,
836 'vodID': '',
837 },
838 }, {
839 'operationName': 'VideoPreviewOverlay',
840 'variables': {'login': channel_name},
841 }],
842 'Downloading stream GraphQL')
843
844 user = gql[0]['data']['user']
845
846 if not user:
847 raise ExtractorError(
848 '%s does not exist' % channel_name, expected=True)
849
850 stream = user['stream']
851
852 if not stream:
853 raise ExtractorError('%s is offline' % channel_name, expected=True)
854
855 access_token = self._download_access_token(
856 channel_name, 'stream', 'channelName')
857 token = access_token['value']
858
859 stream_id = stream.get('id') or channel_name
860 query = {
861 'allow_source': 'true',
862 'allow_audio_only': 'true',
863 'allow_spectre': 'true',
864 'p': random.randint(1000000, 10000000),
865 'player': 'twitchweb',
866 'playlist_include_framerate': 'true',
867 'segment_preference': '4',
868 'sig': access_token['signature'].encode('utf-8'),
869 'token': token.encode('utf-8'),
870 }
871 formats = self._extract_m3u8_formats(
872 '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
873 stream_id, 'mp4', query=query)
874 self._prefer_source(formats)
875
876 view_count = stream.get('viewers')
877 timestamp = unified_timestamp(stream.get('createdAt'))
878
879 sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
880 uploader = sq_user.get('displayName')
881 description = try_get(
882 sq_user, lambda x: x['broadcastSettings']['title'], compat_str)
883
884 thumbnail = url_or_none(try_get(
885 gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'],
886 compat_str))
887
888 title = uploader or channel_name
889 stream_type = stream.get('type')
890 if stream_type in ['rerun', 'live']:
891 title += ' (%s)' % stream_type
892
893 return {
894 'id': stream_id,
895 'display_id': channel_name,
896 'title': title,
897 'description': description,
898 'thumbnail': thumbnail,
899 'uploader': uploader,
900 'uploader_id': channel_name,
901 'timestamp': timestamp,
902 'view_count': view_count,
903 'formats': formats,
904 'is_live': stream_type == 'live',
905 }
906
907
908 class TwitchClipsIE(TwitchBaseIE):
909 IE_NAME = 'twitch:clips'
910 _VALID_URL = r'''(?x)
911 https?://
912 (?:
913 clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
914 (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
915 )
916 (?P<id>[^/?#&]+)
917 '''
918
919 _TESTS = [{
920 'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
921 'md5': '761769e1eafce0ffebfb4089cb3847cd',
922 'info_dict': {
923 'id': '42850523',
924 'display_id': 'FaintLightGullWholeWheat',
925 'ext': 'mp4',
926 'title': 'EA Play 2016 Live from the Novo Theatre',
927 'thumbnail': r're:^https?://.*\.jpg',
928 'timestamp': 1465767393,
929 'upload_date': '20160612',
930 'creator': 'EA',
931 'uploader': 'stereotype_',
932 'uploader_id': '43566419',
933 },
934 }, {
935 # multiple formats
936 'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
937 'only_matching': True,
938 }, {
939 'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
940 'only_matching': True,
941 }, {
942 'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
943 'only_matching': True,
944 }, {
945 'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
946 'only_matching': True,
947 }, {
948 'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
949 'only_matching': True,
950 }]
951
952 def _real_extract(self, url):
953 video_id = self._match_id(url)
954
955 clip = self._download_gql(
956 video_id, [{
957 'operationName': 'VideoAccessToken_Clip',
958 'variables': {
959 'slug': video_id,
960 },
961 }],
962 'Downloading clip access token GraphQL')[0]['data']['clip']
963
964 if not clip:
965 raise ExtractorError(
966 'This clip is no longer available', expected=True)
967
968 access_query = {
969 'sig': clip['playbackAccessToken']['signature'],
970 'token': clip['playbackAccessToken']['value'],
971 }
972
973 data = self._download_base_gql(
974 video_id, {
975 'query': '''{
976 clip(slug: "%s") {
977 broadcaster {
978 displayName
979 }
980 createdAt
981 curator {
982 displayName
983 id
984 }
985 durationSeconds
986 id
987 tiny: thumbnailURL(width: 86, height: 45)
988 small: thumbnailURL(width: 260, height: 147)
989 medium: thumbnailURL(width: 480, height: 272)
990 title
991 videoQualities {
992 frameRate
993 quality
994 sourceURL
995 }
996 viewCount
997 }
998 }''' % video_id}, 'Downloading clip GraphQL', fatal=False)
999
1000 if data:
1001 clip = try_get(data, lambda x: x['data']['clip'], dict) or clip
1002
1003 formats = []
1004 for option in clip.get('videoQualities', []):
1005 if not isinstance(option, dict):
1006 continue
1007 source = url_or_none(option.get('sourceURL'))
1008 if not source:
1009 continue
1010 formats.append({
1011 'url': update_url_query(source, access_query),
1012 'format_id': option.get('quality'),
1013 'height': int_or_none(option.get('quality')),
1014 'fps': int_or_none(option.get('frameRate')),
1015 })
1016 self._sort_formats(formats)
1017
1018 thumbnails = []
1019 for thumbnail_id in ('tiny', 'small', 'medium'):
1020 thumbnail_url = clip.get(thumbnail_id)
1021 if not thumbnail_url:
1022 continue
1023 thumb = {
1024 'id': thumbnail_id,
1025 'url': thumbnail_url,
1026 }
1027 mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
1028 if mobj:
1029 thumb.update({
1030 'height': int(mobj.group(2)),
1031 'width': int(mobj.group(1)),
1032 })
1033 thumbnails.append(thumb)
1034
1035 return {
1036 'id': clip.get('id') or video_id,
1037 'display_id': video_id,
1038 'title': clip.get('title') or video_id,
1039 'formats': formats,
1040 'duration': int_or_none(clip.get('durationSeconds')),
1041 'view_count': int_or_none(clip.get('viewCount')),
1042 'timestamp': unified_timestamp(clip.get('createdAt')),
1043 'thumbnails': thumbnails,
1044 'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
1045 'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
1046 'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
1047 }