yt_dlp/extractor/twitch.py

   1 import collections
   2 import itertools
   3 import json
   4 import random
   5 import re
   6
   7 from .common import InfoExtractor
   8 from ..compat import (
   9     compat_parse_qs,
  10     compat_str,
  11     compat_urllib_parse_urlencode,
  12     compat_urllib_parse_urlparse,
  13 )
  14 from ..utils import (
  15     base_url,
  16     clean_html,
  17     dict_get,
  18     ExtractorError,
  19     float_or_none,
  20     int_or_none,
  21     parse_duration,
  22     parse_iso8601,
  23     parse_qs,
  24     qualities,
  25     str_or_none,
  26     traverse_obj,
  27     try_get,
  28     unified_timestamp,
  29     update_url_query,
  30     url_or_none,
  31     urljoin,
  32 )
  33
  34
  35 class TwitchBaseIE(InfoExtractor):
  36     _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
  37
  38     _API_BASE = 'https://api.twitch.tv'
  39     _USHER_BASE = 'https://usher.ttvnw.net'
  40     _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
  41     _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
  42     _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
  43     _NETRC_MACHINE = 'twitch'
  44
  45     _OPERATION_HASHES = {
  46         'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
  47         'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
  48         'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
  49         'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
  50         'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
  51         'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
  52         'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
  53         'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
  54         'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
  55         'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41',
  56         'VideoPlayer_VODSeekbarPreviewVideo': '07e99e4d56c5a7c67117a154777b0baf85a5ffefa393b213f4bc712ccaf85dd6',
  57     }
  58
  59     def _perform_login(self, username, password):
  60         def fail(message):
  61             raise ExtractorError(
  62                 'Unable to login. Twitch said: %s' % message, expected=True)
  63
  64         def login_step(page, urlh, note, data):
  65             form = self._hidden_inputs(page)
  66             form.update(data)
  67
  68             page_url = urlh.geturl()
  69             post_url = self._search_regex(
  70                 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
  71                 'post url', default=self._LOGIN_POST_URL, group='url')
  72             post_url = urljoin(page_url, post_url)
  73
  74             headers = {
  75                 'Referer': page_url,
  76                 'Origin': 'https://www.twitch.tv',
  77                 'Content-Type': 'text/plain;charset=UTF-8',
  78             }
  79
  80             response = self._download_json(
  81                 post_url, None, note, data=json.dumps(form).encode(),
  82                 headers=headers, expected_status=400)
  83             error = dict_get(response, ('error', 'error_description', 'error_code'))
  84             if error:
  85                 fail(error)
  86
  87             if 'Authenticated successfully' in response.get('message', ''):
  88                 return None, None
  89
  90             redirect_url = urljoin(
  91                 post_url,
  92                 response.get('redirect') or response['redirect_path'])
  93             return self._download_webpage_handle(
  94                 redirect_url, None, 'Downloading login redirect page',
  95                 headers=headers)
  96
  97         login_page, handle = self._download_webpage_handle(
  98             self._LOGIN_FORM_URL, None, 'Downloading login page')
  99
 100         # Some TOR nodes and public proxies are blocked completely
 101         if 'blacklist_message' in login_page:
 102             fail(clean_html(login_page))
 103
 104         redirect_page, handle = login_step(
 105             login_page, handle, 'Logging in', {
 106                 'username': username,
 107                 'password': password,
 108                 'client_id': self._CLIENT_ID,
 109             })
 110
 111         # Successful login
 112         if not redirect_page:
 113             return
 114
 115         if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
 116             # TODO: Add mechanism to request an SMS or phone call
 117             tfa_token = self._get_tfa_info('two-factor authentication token')
 118             login_step(redirect_page, handle, 'Submitting TFA token', {
 119                 'authy_token': tfa_token,
 120                 'remember_2fa': 'true',
 121             })
 122
 123     def _prefer_source(self, formats):
 124         try:
 125             source = next(f for f in formats if f['format_id'] == 'Source')
 126             source['quality'] = 10
 127         except StopIteration:
 128             for f in formats:
 129                 if '/chunked/' in f['url']:
 130                     f.update({
 131                         'quality': 10,
 132                         'format_note': 'Source',
 133                     })
 134         self._sort_formats(formats)
 135
 136     def _download_base_gql(self, video_id, ops, note, fatal=True):
 137         headers = {
 138             'Content-Type': 'text/plain;charset=UTF-8',
 139             'Client-ID': self._CLIENT_ID,
 140         }
 141         gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
 142         if gql_auth:
 143             headers['Authorization'] = 'OAuth ' + gql_auth.value
 144         return self._download_json(
 145             'https://gql.twitch.tv/gql', video_id, note,
 146             data=json.dumps(ops).encode(),
 147             headers=headers, fatal=fatal)
 148
 149     def _download_gql(self, video_id, ops, note, fatal=True):
 150         for op in ops:
 151             op['extensions'] = {
 152                 'persistedQuery': {
 153                     'version': 1,
 154                     'sha256Hash': self._OPERATION_HASHES[op['operationName']],
 155                 }
 156             }
 157         return self._download_base_gql(video_id, ops, note)
 158
 159     def _download_access_token(self, video_id, token_kind, param_name):
 160         method = '%sPlaybackAccessToken' % token_kind
 161         ops = {
 162             'query': '''{
 163               %s(
 164                 %s: "%s",
 165                 params: {
 166                   platform: "web",
 167                   playerBackend: "mediaplayer",
 168                   playerType: "site"
 169                 }
 170               )
 171               {
 172                 value
 173                 signature
 174               }
 175             }''' % (method, param_name, video_id),
 176         }
 177         return self._download_base_gql(
 178             video_id, ops,
 179             'Downloading %s access token GraphQL' % token_kind)['data'][method]
 180
 181
 182 class TwitchVodIE(TwitchBaseIE):
 183     IE_NAME = 'twitch:vod'
 184     _VALID_URL = r'''(?x)
 185                     https?://
 186                         (?:
 187                             (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
 188                             player\.twitch\.tv/\?.*?\bvideo=v?
 189                         )
 190                         (?P<id>\d+)
 191                     '''
 192
 193     _TESTS = [{
 194         'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
 195         'info_dict': {
 196             'id': 'v6528877',
 197             'ext': 'mp4',
 198             'title': 'LCK Summer Split - Week 6 Day 1',
 199             'thumbnail': r're:^https?://.*\.jpg$',
 200             'duration': 17208,
 201             'timestamp': 1435131734,
 202             'upload_date': '20150624',
 203             'uploader': 'Riot Games',
 204             'uploader_id': 'riotgames',
 205             'view_count': int,
 206             'start_time': 310,
 207             'chapters': [],
 208             'live_status': 'was_live',
 209         },
 210         'params': {
 211             # m3u8 download
 212             'skip_download': True,
 213         },
 214     }, {
 215         # Untitled broadcast (title is None)
 216         'url': 'http://www.twitch.tv/belkao_o/v/11230755',
 217         'info_dict': {
 218             'id': 'v11230755',
 219             'ext': 'mp4',
 220             'title': 'Untitled Broadcast',
 221             'thumbnail': r're:^https?://.*\.jpg$',
 222             'duration': 1638,
 223             'timestamp': 1439746708,
 224             'upload_date': '20150816',
 225             'uploader': 'BelkAO_o',
 226             'uploader_id': 'belkao_o',
 227             'view_count': int,
 228         },
 229         'params': {
 230             # m3u8 download
 231             'skip_download': True,
 232         },
 233         'skip': 'HTTP Error 404: Not Found',
 234     }, {
 235         'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
 236         'only_matching': True,
 237     }, {
 238         'url': 'https://www.twitch.tv/videos/6528877',
 239         'only_matching': True,
 240     }, {
 241         'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
 242         'only_matching': True,
 243     }, {
 244         'url': 'https://www.twitch.tv/northernlion/video/291940395',
 245         'only_matching': True,
 246     }, {
 247         'url': 'https://player.twitch.tv/?video=480452374',
 248         'only_matching': True,
 249     }, {
 250         'url': 'https://www.twitch.tv/videos/635475444',
 251         'info_dict': {
 252             'id': 'v635475444',
 253             'ext': 'mp4',
 254             'title': 'Riot Games',
 255             'duration': 11643,
 256             'uploader': 'Riot Games',
 257             'uploader_id': 'riotgames',
 258             'timestamp': 1590770569,
 259             'upload_date': '20200529',
 260             'chapters': [
 261                 {
 262                     'start_time': 0,
 263                     'end_time': 573,
 264                     'title': 'League of Legends'
 265                 },
 266                 {
 267                     'start_time': 573,
 268                     'end_time': 3922,
 269                     'title': 'Legends of Runeterra'
 270                 },
 271                 {
 272                     'start_time': 3922,
 273                     'end_time': 11643,
 274                     'title': 'Art'
 275                 }
 276             ],
 277             'live_status': 'was_live',
 278             'thumbnail': r're:^https?://.*\.jpg$',
 279             'view_count': int,
 280         },
 281         'params': {
 282             'skip_download': True
 283         },
 284     }, {
 285         'note': 'Storyboards',
 286         'url': 'https://www.twitch.tv/videos/635475444',
 287         'info_dict': {
 288             'id': 'v635475444',
 289             'format_id': 'sb0',
 290             'ext': 'mhtml',
 291             'title': 'Riot Games',
 292             'duration': 11643,
 293             'uploader': 'Riot Games',
 294             'uploader_id': 'riotgames',
 295             'timestamp': 1590770569,
 296             'upload_date': '20200529',
 297             'chapters': [
 298                 {
 299                     'start_time': 0,
 300                     'end_time': 573,
 301                     'title': 'League of Legends'
 302                 },
 303                 {
 304                     'start_time': 573,
 305                     'end_time': 3922,
 306                     'title': 'Legends of Runeterra'
 307                 },
 308                 {
 309                     'start_time': 3922,
 310                     'end_time': 11643,
 311                     'title': 'Art'
 312                 }
 313             ],
 314             'live_status': 'was_live',
 315             'thumbnail': r're:^https?://.*\.jpg$',
 316             'view_count': int,
 317             'columns': int,
 318             'rows': int,
 319         },
 320         'params': {
 321             'format': 'mhtml',
 322             'skip_download': True
 323         }
 324     }]
 325
 326     def _download_info(self, item_id):
 327         data = self._download_gql(
 328             item_id, [{
 329                 'operationName': 'VideoMetadata',
 330                 'variables': {
 331                     'channelLogin': '',
 332                     'videoID': item_id,
 333                 },
 334             }, {
 335                 'operationName': 'VideoPlayer_ChapterSelectButtonVideo',
 336                 'variables': {
 337                     'includePrivate': False,
 338                     'videoID': item_id,
 339                 },
 340             }, {
 341                 'operationName': 'VideoPlayer_VODSeekbarPreviewVideo',
 342                 'variables': {
 343                     'includePrivate': False,
 344                     'videoID': item_id,
 345                 },
 346             }],
 347             'Downloading stream metadata GraphQL')
 348
 349         video = traverse_obj(data, (0, 'data', 'video'))
 350         video['moments'] = traverse_obj(data, (1, 'data', 'video', 'moments', 'edges', ..., 'node'))
 351         video['storyboard'] = traverse_obj(data, (2, 'data', 'video', 'seekPreviewsURL'), expected_type=url_or_none)
 352
 353         if video is None:
 354             raise ExtractorError(
 355                 'Video %s does not exist' % item_id, expected=True)
 356         return video
 357
 358     def _extract_info(self, info):
 359         status = info.get('status')
 360         if status == 'recording':
 361             is_live = True
 362         elif status == 'recorded':
 363             is_live = False
 364         else:
 365             is_live = None
 366         _QUALITIES = ('small', 'medium', 'large')
 367         quality_key = qualities(_QUALITIES)
 368         thumbnails = []
 369         preview = info.get('preview')
 370         if isinstance(preview, dict):
 371             for thumbnail_id, thumbnail_url in preview.items():
 372                 thumbnail_url = url_or_none(thumbnail_url)
 373                 if not thumbnail_url:
 374                     continue
 375                 if thumbnail_id not in _QUALITIES:
 376                     continue
 377                 thumbnails.append({
 378                     'url': thumbnail_url,
 379                     'preference': quality_key(thumbnail_id),
 380                 })
 381         return {
 382             'id': info['_id'],
 383             'title': info.get('title') or 'Untitled Broadcast',
 384             'description': info.get('description'),
 385             'duration': int_or_none(info.get('length')),
 386             'thumbnails': thumbnails,
 387             'uploader': info.get('channel', {}).get('display_name'),
 388             'uploader_id': info.get('channel', {}).get('name'),
 389             'timestamp': parse_iso8601(info.get('recorded_at')),
 390             'view_count': int_or_none(info.get('views')),
 391             'is_live': is_live,
 392             'was_live': True,
 393         }
 394
 395     def _extract_moments(self, info, item_id):
 396         for moment in info.get('moments') or []:
 397             start_time = int_or_none(moment.get('positionMilliseconds'), 1000)
 398             duration = int_or_none(moment.get('durationMilliseconds'), 1000)
 399             name = str_or_none(moment.get('description'))
 400
 401             if start_time is None or duration is None:
 402                 self.report_warning(f'Important chapter information missing for chapter {name}', item_id)
 403                 continue
 404             yield {
 405                 'start_time': start_time,
 406                 'end_time': start_time + duration,
 407                 'title': name,
 408             }
 409
 410     def _extract_info_gql(self, info, item_id):
 411         vod_id = info.get('id') or item_id
 412         # id backward compatibility for download archives
 413         if vod_id[0] != 'v':
 414             vod_id = 'v%s' % vod_id
 415         thumbnail = url_or_none(info.get('previewThumbnailURL'))
 416         is_live = None
 417         if thumbnail:
 418             if thumbnail.endswith('/404_processing_{width}x{height}.png'):
 419                 is_live, thumbnail = True, None
 420             else:
 421                 is_live = False
 422                 for p in ('width', 'height'):
 423                     thumbnail = thumbnail.replace('{%s}' % p, '0')
 424
 425         return {
 426             'id': vod_id,
 427             'title': info.get('title') or 'Untitled Broadcast',
 428             'description': info.get('description'),
 429             'duration': int_or_none(info.get('lengthSeconds')),
 430             'thumbnail': thumbnail,
 431             'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
 432             'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
 433             'timestamp': unified_timestamp(info.get('publishedAt')),
 434             'view_count': int_or_none(info.get('viewCount')),
 435             'chapters': list(self._extract_moments(info, item_id)),
 436             'is_live': is_live,
 437             'was_live': True,
 438         }
 439
 440     def _extract_storyboard(self, item_id, storyboard_json_url, duration):
 441         if not duration or not storyboard_json_url:
 442             return
 443         spec = self._download_json(storyboard_json_url, item_id, 'Downloading storyboard metadata JSON', fatal=False) or []
 444         # sort from highest quality to lowest
 445         # This makes sb0 the highest-quality format, sb1 - lower, etc which is consistent with youtube sb ordering
 446         spec.sort(key=lambda x: int_or_none(x.get('width')) or 0, reverse=True)
 447         base = base_url(storyboard_json_url)
 448         for i, s in enumerate(spec):
 449             count = int_or_none(s.get('count'))
 450             images = s.get('images')
 451             if not (images and count):
 452                 continue
 453             fragment_duration = duration / len(images)
 454             yield {
 455                 'format_id': f'sb{i}',
 456                 'format_note': 'storyboard',
 457                 'ext': 'mhtml',
 458                 'protocol': 'mhtml',
 459                 'acodec': 'none',
 460                 'vcodec': 'none',
 461                 'url': urljoin(base, images[0]),
 462                 'width': int_or_none(s.get('width')),
 463                 'height': int_or_none(s.get('height')),
 464                 'fps': count / duration,
 465                 'rows': int_or_none(s.get('rows')),
 466                 'columns': int_or_none(s.get('cols')),
 467                 'fragments': [{
 468                     'url': urljoin(base, path),
 469                     'duration': fragment_duration,
 470                 } for path in images],
 471             }
 472
 473     def _real_extract(self, url):
 474         vod_id = self._match_id(url)
 475
 476         video = self._download_info(vod_id)
 477         info = self._extract_info_gql(video, vod_id)
 478         access_token = self._download_access_token(vod_id, 'video', 'id')
 479
 480         formats = self._extract_m3u8_formats(
 481             '%s/vod/%s.m3u8?%s' % (
 482                 self._USHER_BASE, vod_id,
 483                 compat_urllib_parse_urlencode({
 484                     'allow_source': 'true',
 485                     'allow_audio_only': 'true',
 486                     'allow_spectre': 'true',
 487                     'player': 'twitchweb',
 488                     'playlist_include_framerate': 'true',
 489                     'nauth': access_token['value'],
 490                     'nauthsig': access_token['signature'],
 491                 })),
 492             vod_id, 'mp4', entry_protocol='m3u8_native')
 493
 494         formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration')))
 495
 496         self._prefer_source(formats)
 497         info['formats'] = formats
 498
 499         parsed_url = compat_urllib_parse_urlparse(url)
 500         query = compat_parse_qs(parsed_url.query)
 501         if 't' in query:
 502             info['start_time'] = parse_duration(query['t'][0])
 503
 504         if info.get('timestamp') is not None:
 505             info['subtitles'] = {
 506                 'rechat': [{
 507                     'url': update_url_query(
 508                         'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, {
 509                             'client_id': self._CLIENT_ID,
 510                         }),
 511                     'ext': 'json',
 512                 }],
 513             }
 514
 515         return info
 516
 517
 518 def _make_video_result(node):
 519     assert isinstance(node, dict)
 520     video_id = node.get('id')
 521     if not video_id:
 522         return
 523     return {
 524         '_type': 'url_transparent',
 525         'ie_key': TwitchVodIE.ie_key(),
 526         'id': 'v' + video_id,
 527         'url': 'https://www.twitch.tv/videos/%s' % video_id,
 528         'title': node.get('title'),
 529         'thumbnail': node.get('previewThumbnailURL'),
 530         'duration': float_or_none(node.get('lengthSeconds')),
 531         'view_count': int_or_none(node.get('viewCount')),
 532     }
 533
 534
 535 class TwitchCollectionIE(TwitchBaseIE):
 536     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
 537
 538     _TESTS = [{
 539         'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
 540         'info_dict': {
 541             'id': 'wlDCoH0zEBZZbQ',
 542             'title': 'Overthrow Nook, capitalism for children',
 543         },
 544         'playlist_mincount': 13,
 545     }]
 546
 547     _OPERATION_NAME = 'CollectionSideBar'
 548
 549     def _real_extract(self, url):
 550         collection_id = self._match_id(url)
 551         collection = self._download_gql(
 552             collection_id, [{
 553                 'operationName': self._OPERATION_NAME,
 554                 'variables': {'collectionID': collection_id},
 555             }],
 556             'Downloading collection GraphQL')[0]['data']['collection']
 557         title = collection.get('title')
 558         entries = []
 559         for edge in collection['items']['edges']:
 560             if not isinstance(edge, dict):
 561                 continue
 562             node = edge.get('node')
 563             if not isinstance(node, dict):
 564                 continue
 565             video = _make_video_result(node)
 566             if video:
 567                 entries.append(video)
 568         return self.playlist_result(
 569             entries, playlist_id=collection_id, playlist_title=title)
 570
 571
 572 class TwitchPlaylistBaseIE(TwitchBaseIE):
 573     _PAGE_LIMIT = 100
 574
 575     def _entries(self, channel_name, *args):
 576         cursor = None
 577         variables_common = self._make_variables(channel_name, *args)
 578         entries_key = '%ss' % self._ENTRY_KIND
 579         for page_num in itertools.count(1):
 580             variables = variables_common.copy()
 581             variables['limit'] = self._PAGE_LIMIT
 582             if cursor:
 583                 variables['cursor'] = cursor
 584             page = self._download_gql(
 585                 channel_name, [{
 586                     'operationName': self._OPERATION_NAME,
 587                     'variables': variables,
 588                 }],
 589                 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
 590                 fatal=False)
 591             if not page:
 592                 break
 593             edges = try_get(
 594                 page, lambda x: x[0]['data']['user'][entries_key]['edges'], list)
 595             if not edges:
 596                 break
 597             for edge in edges:
 598                 if not isinstance(edge, dict):
 599                     continue
 600                 if edge.get('__typename') != self._EDGE_KIND:
 601                     continue
 602                 node = edge.get('node')
 603                 if not isinstance(node, dict):
 604                     continue
 605                 if node.get('__typename') != self._NODE_KIND:
 606                     continue
 607                 entry = self._extract_entry(node)
 608                 if entry:
 609                     cursor = edge.get('cursor')
 610                     yield entry
 611             if not cursor or not isinstance(cursor, compat_str):
 612                 break
 613
 614
 615 class TwitchVideosIE(TwitchPlaylistBaseIE):
 616     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
 617
 618     _TESTS = [{
 619         # All Videos sorted by Date
 620         'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
 621         'info_dict': {
 622             'id': 'spamfish',
 623             'title': 'spamfish - All Videos sorted by Date',
 624         },
 625         'playlist_mincount': 924,
 626     }, {
 627         # All Videos sorted by Popular
 628         'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
 629         'info_dict': {
 630             'id': 'spamfish',
 631             'title': 'spamfish - All Videos sorted by Popular',
 632         },
 633         'playlist_mincount': 931,
 634     }, {
 635         # Past Broadcasts sorted by Date
 636         'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
 637         'info_dict': {
 638             'id': 'spamfish',
 639             'title': 'spamfish - Past Broadcasts sorted by Date',
 640         },
 641         'playlist_mincount': 27,
 642     }, {
 643         # Highlights sorted by Date
 644         'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights',
 645         'info_dict': {
 646             'id': 'spamfish',
 647             'title': 'spamfish - Highlights sorted by Date',
 648         },
 649         'playlist_mincount': 901,
 650     }, {
 651         # Uploads sorted by Date
 652         'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
 653         'info_dict': {
 654             'id': 'esl_csgo',
 655             'title': 'esl_csgo - Uploads sorted by Date',
 656         },
 657         'playlist_mincount': 5,
 658     }, {
 659         # Past Premieres sorted by Date
 660         'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
 661         'info_dict': {
 662             'id': 'spamfish',
 663             'title': 'spamfish - Past Premieres sorted by Date',
 664         },
 665         'playlist_mincount': 1,
 666     }, {
 667         'url': 'https://www.twitch.tv/spamfish/videos/all',
 668         'only_matching': True,
 669     }, {
 670         'url': 'https://m.twitch.tv/spamfish/videos/all',
 671         'only_matching': True,
 672     }, {
 673         'url': 'https://www.twitch.tv/spamfish/videos',
 674         'only_matching': True,
 675     }]
 676
 677     Broadcast = collections.namedtuple('Broadcast', ['type', 'label'])
 678
 679     _DEFAULT_BROADCAST = Broadcast(None, 'All Videos')
 680     _BROADCASTS = {
 681         'archives': Broadcast('ARCHIVE', 'Past Broadcasts'),
 682         'highlights': Broadcast('HIGHLIGHT', 'Highlights'),
 683         'uploads': Broadcast('UPLOAD', 'Uploads'),
 684         'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'),
 685         'all': _DEFAULT_BROADCAST,
 686     }
 687
 688     _DEFAULT_SORTED_BY = 'Date'
 689     _SORTED_BY = {
 690         'time': _DEFAULT_SORTED_BY,
 691         'views': 'Popular',
 692     }
 693
 694     _OPERATION_NAME = 'FilterableVideoTower_Videos'
 695     _ENTRY_KIND = 'video'
 696     _EDGE_KIND = 'VideoEdge'
 697     _NODE_KIND = 'Video'
 698
 699     @classmethod
 700     def suitable(cls, url):
 701         return (False
 702                 if any(ie.suitable(url) for ie in (
 703                     TwitchVideosClipsIE,
 704                     TwitchVideosCollectionsIE))
 705                 else super(TwitchVideosIE, cls).suitable(url))
 706
 707     @staticmethod
 708     def _make_variables(channel_name, broadcast_type, sort):
 709         return {
 710             'channelOwnerLogin': channel_name,
 711             'broadcastType': broadcast_type,
 712             'videoSort': sort.upper(),
 713         }
 714
 715     @staticmethod
 716     def _extract_entry(node):
 717         return _make_video_result(node)
 718
 719     def _real_extract(self, url):
 720         channel_name = self._match_id(url)
 721         qs = parse_qs(url)
 722         filter = qs.get('filter', ['all'])[0]
 723         sort = qs.get('sort', ['time'])[0]
 724         broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
 725         return self.playlist_result(
 726             self._entries(channel_name, broadcast.type, sort),
 727             playlist_id=channel_name,
 728             playlist_title='%s - %s sorted by %s'
 729             % (channel_name, broadcast.label,
 730                self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)))
 731
 732
 733 class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
 734     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
 735
 736     _TESTS = [{
 737         # Clips
 738         'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
 739         'info_dict': {
 740             'id': 'vanillatv',
 741             'title': 'vanillatv - Clips Top All',
 742         },
 743         'playlist_mincount': 1,
 744     }, {
 745         'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d',
 746         'only_matching': True,
 747     }]
 748
 749     Clip = collections.namedtuple('Clip', ['filter', 'label'])
 750
 751     _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D')
 752     _RANGE = {
 753         '24hr': Clip('LAST_DAY', 'Top 24H'),
 754         '7d': _DEFAULT_CLIP,
 755         '30d': Clip('LAST_MONTH', 'Top 30D'),
 756         'all': Clip('ALL_TIME', 'Top All'),
 757     }
 758
 759     # NB: values other than 20 result in skipped videos
 760     _PAGE_LIMIT = 20
 761
 762     _OPERATION_NAME = 'ClipsCards__User'
 763     _ENTRY_KIND = 'clip'
 764     _EDGE_KIND = 'ClipEdge'
 765     _NODE_KIND = 'Clip'
 766
 767     @staticmethod
 768     def _make_variables(channel_name, filter):
 769         return {
 770             'login': channel_name,
 771             'criteria': {
 772                 'filter': filter,
 773             },
 774         }
 775
 776     @staticmethod
 777     def _extract_entry(node):
 778         assert isinstance(node, dict)
 779         clip_url = url_or_none(node.get('url'))
 780         if not clip_url:
 781             return
 782         return {
 783             '_type': 'url_transparent',
 784             'ie_key': TwitchClipsIE.ie_key(),
 785             'id': node.get('id'),
 786             'url': clip_url,
 787             'title': node.get('title'),
 788             'thumbnail': node.get('thumbnailURL'),
 789             'duration': float_or_none(node.get('durationSeconds')),
 790             'timestamp': unified_timestamp(node.get('createdAt')),
 791             'view_count': int_or_none(node.get('viewCount')),
 792             'language': node.get('language'),
 793         }
 794
 795     def _real_extract(self, url):
 796         channel_name = self._match_id(url)
 797         qs = parse_qs(url)
 798         range = qs.get('range', ['7d'])[0]
 799         clip = self._RANGE.get(range, self._DEFAULT_CLIP)
 800         return self.playlist_result(
 801             self._entries(channel_name, clip.filter),
 802             playlist_id=channel_name,
 803             playlist_title='%s - Clips %s' % (channel_name, clip.label))
 804
 805
 806 class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
 807     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
 808
 809     _TESTS = [{
 810         # Collections
 811         'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
 812         'info_dict': {
 813             'id': 'spamfish',
 814             'title': 'spamfish - Collections',
 815         },
 816         'playlist_mincount': 3,
 817     }]
 818
 819     _OPERATION_NAME = 'ChannelCollectionsContent'
 820     _ENTRY_KIND = 'collection'
 821     _EDGE_KIND = 'CollectionsItemEdge'
 822     _NODE_KIND = 'Collection'
 823
 824     @staticmethod
 825     def _make_variables(channel_name):
 826         return {
 827             'ownerLogin': channel_name,
 828         }
 829
 830     @staticmethod
 831     def _extract_entry(node):
 832         assert isinstance(node, dict)
 833         collection_id = node.get('id')
 834         if not collection_id:
 835             return
 836         return {
 837             '_type': 'url_transparent',
 838             'ie_key': TwitchCollectionIE.ie_key(),
 839             'id': collection_id,
 840             'url': 'https://www.twitch.tv/collections/%s' % collection_id,
 841             'title': node.get('title'),
 842             'thumbnail': node.get('thumbnailURL'),
 843             'duration': float_or_none(node.get('lengthSeconds')),
 844             'timestamp': unified_timestamp(node.get('updatedAt')),
 845             'view_count': int_or_none(node.get('viewCount')),
 846         }
 847
 848     def _real_extract(self, url):
 849         channel_name = self._match_id(url)
 850         return self.playlist_result(
 851             self._entries(channel_name), playlist_id=channel_name,
 852             playlist_title='%s - Collections' % channel_name)
 853
 854
 855 class TwitchStreamIE(TwitchBaseIE):
 856     IE_NAME = 'twitch:stream'
 857     _VALID_URL = r'''(?x)
 858                     https?://
 859                         (?:
 860                             (?:(?:www|go|m)\.)?twitch\.tv/|
 861                             player\.twitch\.tv/\?.*?\bchannel=
 862                         )
 863                         (?P<id>[^/#?]+)
 864                     '''
 865
 866     _TESTS = [{
 867         'url': 'http://www.twitch.tv/shroomztv',
 868         'info_dict': {
 869             'id': '12772022048',
 870             'display_id': 'shroomztv',
 871             'ext': 'mp4',
 872             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 873             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
 874             'is_live': True,
 875             'timestamp': 1421928037,
 876             'upload_date': '20150122',
 877             'uploader': 'ShroomzTV',
 878             'uploader_id': 'shroomztv',
 879             'view_count': int,
 880         },
 881         'params': {
 882             # m3u8 download
 883             'skip_download': True,
 884         },
 885     }, {
 886         'url': 'http://www.twitch.tv/miracle_doto#profile-0',
 887         'only_matching': True,
 888     }, {
 889         'url': 'https://player.twitch.tv/?channel=lotsofs',
 890         'only_matching': True,
 891     }, {
 892         'url': 'https://go.twitch.tv/food',
 893         'only_matching': True,
 894     }, {
 895         'url': 'https://m.twitch.tv/food',
 896         'only_matching': True,
 897     }]
 898
 899     @classmethod
 900     def suitable(cls, url):
 901         return (False
 902                 if any(ie.suitable(url) for ie in (
 903                     TwitchVodIE,
 904                     TwitchCollectionIE,
 905                     TwitchVideosIE,
 906                     TwitchVideosClipsIE,
 907                     TwitchVideosCollectionsIE,
 908                     TwitchClipsIE))
 909                 else super(TwitchStreamIE, cls).suitable(url))
 910
 911     def _real_extract(self, url):
 912         channel_name = self._match_id(url).lower()
 913
 914         gql = self._download_gql(
 915             channel_name, [{
 916                 'operationName': 'StreamMetadata',
 917                 'variables': {'channelLogin': channel_name},
 918             }, {
 919                 'operationName': 'ComscoreStreamingQuery',
 920                 'variables': {
 921                     'channel': channel_name,
 922                     'clipSlug': '',
 923                     'isClip': False,
 924                     'isLive': True,
 925                     'isVodOrCollection': False,
 926                     'vodID': '',
 927                 },
 928             }, {
 929                 'operationName': 'VideoPreviewOverlay',
 930                 'variables': {'login': channel_name},
 931             }],
 932             'Downloading stream GraphQL')
 933
 934         user = gql[0]['data']['user']
 935
 936         if not user:
 937             raise ExtractorError(
 938                 '%s does not exist' % channel_name, expected=True)
 939
 940         stream = user['stream']
 941
 942         if not stream:
 943             raise ExtractorError('%s is offline' % channel_name, expected=True)
 944
 945         access_token = self._download_access_token(
 946             channel_name, 'stream', 'channelName')
 947         token = access_token['value']
 948
 949         stream_id = stream.get('id') or channel_name
 950         query = {
 951             'allow_source': 'true',
 952             'allow_audio_only': 'true',
 953             'allow_spectre': 'true',
 954             'p': random.randint(1000000, 10000000),
 955             'player': 'twitchweb',
 956             'playlist_include_framerate': 'true',
 957             'segment_preference': '4',
 958             'sig': access_token['signature'].encode('utf-8'),
 959             'token': token.encode('utf-8'),
 960         }
 961         formats = self._extract_m3u8_formats(
 962             '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
 963             stream_id, 'mp4', query=query)
 964         self._prefer_source(formats)
 965
 966         view_count = stream.get('viewers')
 967         timestamp = unified_timestamp(stream.get('createdAt'))
 968
 969         sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
 970         uploader = sq_user.get('displayName')
 971         description = try_get(
 972             sq_user, lambda x: x['broadcastSettings']['title'], compat_str)
 973
 974         thumbnail = url_or_none(try_get(
 975             gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'],
 976             compat_str))
 977
 978         title = uploader or channel_name
 979         stream_type = stream.get('type')
 980         if stream_type in ['rerun', 'live']:
 981             title += ' (%s)' % stream_type
 982
 983         return {
 984             'id': stream_id,
 985             'display_id': channel_name,
 986             'title': title,
 987             'description': description,
 988             'thumbnail': thumbnail,
 989             'uploader': uploader,
 990             'uploader_id': channel_name,
 991             'timestamp': timestamp,
 992             'view_count': view_count,
 993             'formats': formats,
 994             'is_live': stream_type == 'live',
 995         }
 996
 997
 998 class TwitchClipsIE(TwitchBaseIE):
 999     IE_NAME = 'twitch:clips'
1000     _VALID_URL = r'''(?x)
1001                     https?://
1002                         (?:
1003                             clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
1004                             (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
1005                         )
1006                         (?P<id>[^/?#&]+)
1007                     '''
1008
1009     _TESTS = [{
1010         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
1011         'md5': '761769e1eafce0ffebfb4089cb3847cd',
1012         'info_dict': {
1013             'id': '42850523',
1014             'display_id': 'FaintLightGullWholeWheat',
1015             'ext': 'mp4',
1016             'title': 'EA Play 2016 Live from the Novo Theatre',
1017             'thumbnail': r're:^https?://.*\.jpg',
1018             'timestamp': 1465767393,
1019             'upload_date': '20160612',
1020             'creator': 'EA',
1021             'uploader': 'stereotype_',
1022             'uploader_id': '43566419',
1023         },
1024     }, {
1025         # multiple formats
1026         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
1027         'only_matching': True,
1028     }, {
1029         'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
1030         'only_matching': True,
1031     }, {
1032         'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
1033         'only_matching': True,
1034     }, {
1035         'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
1036         'only_matching': True,
1037     }, {
1038         'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
1039         'only_matching': True,
1040     }]
1041
1042     def _real_extract(self, url):
1043         video_id = self._match_id(url)
1044
1045         clip = self._download_gql(
1046             video_id, [{
1047                 'operationName': 'VideoAccessToken_Clip',
1048                 'variables': {
1049                     'slug': video_id,
1050                 },
1051             }],
1052             'Downloading clip access token GraphQL')[0]['data']['clip']
1053
1054         if not clip:
1055             raise ExtractorError(
1056                 'This clip is no longer available', expected=True)
1057
1058         access_query = {
1059             'sig': clip['playbackAccessToken']['signature'],
1060             'token': clip['playbackAccessToken']['value'],
1061         }
1062
1063         data = self._download_base_gql(
1064             video_id, {
1065                 'query': '''{
1066   clip(slug: "%s") {
1067     broadcaster {
1068       displayName
1069     }
1070     createdAt
1071     curator {
1072       displayName
1073       id
1074     }
1075     durationSeconds
1076     id
1077     tiny: thumbnailURL(width: 86, height: 45)
1078     small: thumbnailURL(width: 260, height: 147)
1079     medium: thumbnailURL(width: 480, height: 272)
1080     title
1081     videoQualities {
1082       frameRate
1083       quality
1084       sourceURL
1085     }
1086     viewCount
1087   }
1088 }''' % video_id}, 'Downloading clip GraphQL', fatal=False)
1089
1090         if data:
1091             clip = try_get(data, lambda x: x['data']['clip'], dict) or clip
1092
1093         formats = []
1094         for option in clip.get('videoQualities', []):
1095             if not isinstance(option, dict):
1096                 continue
1097             source = url_or_none(option.get('sourceURL'))
1098             if not source:
1099                 continue
1100             formats.append({
1101                 'url': update_url_query(source, access_query),
1102                 'format_id': option.get('quality'),
1103                 'height': int_or_none(option.get('quality')),
1104                 'fps': int_or_none(option.get('frameRate')),
1105             })
1106         self._sort_formats(formats)
1107
1108         thumbnails = []
1109         for thumbnail_id in ('tiny', 'small', 'medium'):
1110             thumbnail_url = clip.get(thumbnail_id)
1111             if not thumbnail_url:
1112                 continue
1113             thumb = {
1114                 'id': thumbnail_id,
1115                 'url': thumbnail_url,
1116             }
1117             mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
1118             if mobj:
1119                 thumb.update({
1120                     'height': int(mobj.group(2)),
1121                     'width': int(mobj.group(1)),
1122                 })
1123             thumbnails.append(thumb)
1124
1125         return {
1126             'id': clip.get('id') or video_id,
1127             'display_id': video_id,
1128             'title': clip.get('title') or video_id,
1129             'formats': formats,
1130             'duration': int_or_none(clip.get('durationSeconds')),
1131             'view_count': int_or_none(clip.get('viewCount')),
1132             'timestamp': unified_timestamp(clip.get('createdAt')),
1133             'thumbnails': thumbnails,
1134             'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
1135             'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
1136             'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
1137         }