yt_dlp/extractor/twitch.py

   1 import collections
   2 import itertools
   3 import json
   4 import random
   5 import re
   6
   7 from .common import InfoExtractor
   8 from ..compat import (
   9     compat_parse_qs,
  10     compat_str,
  11     compat_urllib_parse_urlencode,
  12     compat_urllib_parse_urlparse,
  13 )
  14 from ..utils import (
  15     clean_html,
  16     dict_get,
  17     ExtractorError,
  18     float_or_none,
  19     int_or_none,
  20     parse_duration,
  21     parse_iso8601,
  22     parse_qs,
  23     qualities,
  24     str_or_none,
  25     traverse_obj,
  26     try_get,
  27     unified_timestamp,
  28     update_url_query,
  29     url_or_none,
  30     urljoin,
  31 )
  32
  33
  34 class TwitchBaseIE(InfoExtractor):
  35     _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
  36
  37     _API_BASE = 'https://api.twitch.tv'
  38     _USHER_BASE = 'https://usher.ttvnw.net'
  39     _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
  40     _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
  41     _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
  42     _NETRC_MACHINE = 'twitch'
  43
  44     _OPERATION_HASHES = {
  45         'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
  46         'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
  47         'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
  48         'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
  49         'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
  50         'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
  51         'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
  52         'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
  53         'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
  54         'VideoPlayer_ChapterSelectButtonVideo': '8d2793384aac3773beab5e59bd5d6f585aedb923d292800119e03d40cd0f9b41',
  55     }
  56
  57     def _perform_login(self, username, password):
  58         def fail(message):
  59             raise ExtractorError(
  60                 'Unable to login. Twitch said: %s' % message, expected=True)
  61
  62         def login_step(page, urlh, note, data):
  63             form = self._hidden_inputs(page)
  64             form.update(data)
  65
  66             page_url = urlh.geturl()
  67             post_url = self._search_regex(
  68                 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
  69                 'post url', default=self._LOGIN_POST_URL, group='url')
  70             post_url = urljoin(page_url, post_url)
  71
  72             headers = {
  73                 'Referer': page_url,
  74                 'Origin': 'https://www.twitch.tv',
  75                 'Content-Type': 'text/plain;charset=UTF-8',
  76             }
  77
  78             response = self._download_json(
  79                 post_url, None, note, data=json.dumps(form).encode(),
  80                 headers=headers, expected_status=400)
  81             error = dict_get(response, ('error', 'error_description', 'error_code'))
  82             if error:
  83                 fail(error)
  84
  85             if 'Authenticated successfully' in response.get('message', ''):
  86                 return None, None
  87
  88             redirect_url = urljoin(
  89                 post_url,
  90                 response.get('redirect') or response['redirect_path'])
  91             return self._download_webpage_handle(
  92                 redirect_url, None, 'Downloading login redirect page',
  93                 headers=headers)
  94
  95         login_page, handle = self._download_webpage_handle(
  96             self._LOGIN_FORM_URL, None, 'Downloading login page')
  97
  98         # Some TOR nodes and public proxies are blocked completely
  99         if 'blacklist_message' in login_page:
 100             fail(clean_html(login_page))
 101
 102         redirect_page, handle = login_step(
 103             login_page, handle, 'Logging in', {
 104                 'username': username,
 105                 'password': password,
 106                 'client_id': self._CLIENT_ID,
 107             })
 108
 109         # Successful login
 110         if not redirect_page:
 111             return
 112
 113         if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
 114             # TODO: Add mechanism to request an SMS or phone call
 115             tfa_token = self._get_tfa_info('two-factor authentication token')
 116             login_step(redirect_page, handle, 'Submitting TFA token', {
 117                 'authy_token': tfa_token,
 118                 'remember_2fa': 'true',
 119             })
 120
 121     def _prefer_source(self, formats):
 122         try:
 123             source = next(f for f in formats if f['format_id'] == 'Source')
 124             source['quality'] = 10
 125         except StopIteration:
 126             for f in formats:
 127                 if '/chunked/' in f['url']:
 128                     f.update({
 129                         'quality': 10,
 130                         'format_note': 'Source',
 131                     })
 132         self._sort_formats(formats)
 133
 134     def _download_base_gql(self, video_id, ops, note, fatal=True):
 135         headers = {
 136             'Content-Type': 'text/plain;charset=UTF-8',
 137             'Client-ID': self._CLIENT_ID,
 138         }
 139         gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
 140         if gql_auth:
 141             headers['Authorization'] = 'OAuth ' + gql_auth.value
 142         return self._download_json(
 143             'https://gql.twitch.tv/gql', video_id, note,
 144             data=json.dumps(ops).encode(),
 145             headers=headers, fatal=fatal)
 146
 147     def _download_gql(self, video_id, ops, note, fatal=True):
 148         for op in ops:
 149             op['extensions'] = {
 150                 'persistedQuery': {
 151                     'version': 1,
 152                     'sha256Hash': self._OPERATION_HASHES[op['operationName']],
 153                 }
 154             }
 155         return self._download_base_gql(video_id, ops, note)
 156
 157     def _download_access_token(self, video_id, token_kind, param_name):
 158         method = '%sPlaybackAccessToken' % token_kind
 159         ops = {
 160             'query': '''{
 161               %s(
 162                 %s: "%s",
 163                 params: {
 164                   platform: "web",
 165                   playerBackend: "mediaplayer",
 166                   playerType: "site"
 167                 }
 168               )
 169               {
 170                 value
 171                 signature
 172               }
 173             }''' % (method, param_name, video_id),
 174         }
 175         return self._download_base_gql(
 176             video_id, ops,
 177             'Downloading %s access token GraphQL' % token_kind)['data'][method]
 178
 179
 180 class TwitchVodIE(TwitchBaseIE):
 181     IE_NAME = 'twitch:vod'
 182     _VALID_URL = r'''(?x)
 183                     https?://
 184                         (?:
 185                             (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
 186                             player\.twitch\.tv/\?.*?\bvideo=v?
 187                         )
 188                         (?P<id>\d+)
 189                     '''
 190
 191     _TESTS = [{
 192         'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
 193         'info_dict': {
 194             'id': 'v6528877',
 195             'ext': 'mp4',
 196             'title': 'LCK Summer Split - Week 6 Day 1',
 197             'thumbnail': r're:^https?://.*\.jpg$',
 198             'duration': 17208,
 199             'timestamp': 1435131734,
 200             'upload_date': '20150624',
 201             'uploader': 'Riot Games',
 202             'uploader_id': 'riotgames',
 203             'view_count': int,
 204             'start_time': 310,
 205         },
 206         'params': {
 207             # m3u8 download
 208             'skip_download': True,
 209         },
 210     }, {
 211         # Untitled broadcast (title is None)
 212         'url': 'http://www.twitch.tv/belkao_o/v/11230755',
 213         'info_dict': {
 214             'id': 'v11230755',
 215             'ext': 'mp4',
 216             'title': 'Untitled Broadcast',
 217             'thumbnail': r're:^https?://.*\.jpg$',
 218             'duration': 1638,
 219             'timestamp': 1439746708,
 220             'upload_date': '20150816',
 221             'uploader': 'BelkAO_o',
 222             'uploader_id': 'belkao_o',
 223             'view_count': int,
 224         },
 225         'params': {
 226             # m3u8 download
 227             'skip_download': True,
 228         },
 229         'skip': 'HTTP Error 404: Not Found',
 230     }, {
 231         'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
 232         'only_matching': True,
 233     }, {
 234         'url': 'https://www.twitch.tv/videos/6528877',
 235         'only_matching': True,
 236     }, {
 237         'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
 238         'only_matching': True,
 239     }, {
 240         'url': 'https://www.twitch.tv/northernlion/video/291940395',
 241         'only_matching': True,
 242     }, {
 243         'url': 'https://player.twitch.tv/?video=480452374',
 244         'only_matching': True,
 245     }, {
 246         'url': 'https://www.twitch.tv/videos/635475444',
 247         'info_dict': {
 248             'id': 'v635475444',
 249             'ext': 'mp4',
 250             'title': 'Riot Games',
 251             'duration': 11643,
 252             'uploader': 'Riot Games',
 253             'uploader_id': 'riotgames',
 254             'timestamp': 1590770569,
 255             'upload_date': '20200529',
 256             'chapters': [
 257                 {
 258                     'start_time': 0,
 259                     'end_time': 573,
 260                     'title': 'League of Legends'
 261                 },
 262                 {
 263                     'start_time': 573,
 264                     'end_time': 3922,
 265                     'title': 'Legends of Runeterra'
 266                 },
 267                 {
 268                     'start_time': 3922,
 269                     'end_time': 11643,
 270                     'title': 'Art'
 271                 }
 272             ],
 273         },
 274         'params': {
 275             'skip_download': True
 276         }
 277     }]
 278
 279     def _download_info(self, item_id):
 280         data = self._download_gql(
 281             item_id, [{
 282                 'operationName': 'VideoMetadata',
 283                 'variables': {
 284                     'channelLogin': '',
 285                     'videoID': item_id,
 286                 },
 287             }, {
 288                 'operationName': 'VideoPlayer_ChapterSelectButtonVideo',
 289                 'variables': {
 290                     'includePrivate': False,
 291                     'videoID': item_id,
 292                 },
 293             }],
 294             'Downloading stream metadata GraphQL')
 295
 296         video = traverse_obj(data, (0, 'data', 'video'))
 297         video['moments'] = traverse_obj(data, (1, 'data', 'video', 'moments', 'edges', ..., 'node'))
 298
 299         if video is None:
 300             raise ExtractorError(
 301                 'Video %s does not exist' % item_id, expected=True)
 302         return self._extract_info_gql(video, item_id)
 303
 304     def _extract_info(self, info):
 305         status = info.get('status')
 306         if status == 'recording':
 307             is_live = True
 308         elif status == 'recorded':
 309             is_live = False
 310         else:
 311             is_live = None
 312         _QUALITIES = ('small', 'medium', 'large')
 313         quality_key = qualities(_QUALITIES)
 314         thumbnails = []
 315         preview = info.get('preview')
 316         if isinstance(preview, dict):
 317             for thumbnail_id, thumbnail_url in preview.items():
 318                 thumbnail_url = url_or_none(thumbnail_url)
 319                 if not thumbnail_url:
 320                     continue
 321                 if thumbnail_id not in _QUALITIES:
 322                     continue
 323                 thumbnails.append({
 324                     'url': thumbnail_url,
 325                     'preference': quality_key(thumbnail_id),
 326                 })
 327         return {
 328             'id': info['_id'],
 329             'title': info.get('title') or 'Untitled Broadcast',
 330             'description': info.get('description'),
 331             'duration': int_or_none(info.get('length')),
 332             'thumbnails': thumbnails,
 333             'uploader': info.get('channel', {}).get('display_name'),
 334             'uploader_id': info.get('channel', {}).get('name'),
 335             'timestamp': parse_iso8601(info.get('recorded_at')),
 336             'view_count': int_or_none(info.get('views')),
 337             'is_live': is_live,
 338             'was_live': True,
 339         }
 340
 341     def _extract_moments(self, info, item_id):
 342         for moment in info.get('moments') or []:
 343             start_time = int_or_none(moment.get('positionMilliseconds'), 1000)
 344             duration = int_or_none(moment.get('durationMilliseconds'), 1000)
 345             name = str_or_none(moment.get('description'))
 346
 347             if start_time is None or duration is None:
 348                 self.report_warning(f'Important chapter information missing for chapter {name}', item_id)
 349                 continue
 350             yield {
 351                 'start_time': start_time,
 352                 'end_time': start_time + duration,
 353                 'title': name,
 354             }
 355
 356     def _extract_info_gql(self, info, item_id):
 357         vod_id = info.get('id') or item_id
 358         # id backward compatibility for download archives
 359         if vod_id[0] != 'v':
 360             vod_id = 'v%s' % vod_id
 361         thumbnail = url_or_none(info.get('previewThumbnailURL'))
 362         is_live = None
 363         if thumbnail:
 364             if thumbnail.endswith('/404_processing_{width}x{height}.png'):
 365                 is_live, thumbnail = True, None
 366             else:
 367                 is_live = False
 368                 for p in ('width', 'height'):
 369                     thumbnail = thumbnail.replace('{%s}' % p, '0')
 370
 371         return {
 372             'id': vod_id,
 373             'title': info.get('title') or 'Untitled Broadcast',
 374             'description': info.get('description'),
 375             'duration': int_or_none(info.get('lengthSeconds')),
 376             'thumbnail': thumbnail,
 377             'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
 378             'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
 379             'timestamp': unified_timestamp(info.get('publishedAt')),
 380             'view_count': int_or_none(info.get('viewCount')),
 381             'chapters': list(self._extract_moments(info, item_id)),
 382             'is_live': is_live,
 383             'was_live': True,
 384         }
 385
 386     def _real_extract(self, url):
 387         vod_id = self._match_id(url)
 388
 389         info = self._download_info(vod_id)
 390         access_token = self._download_access_token(vod_id, 'video', 'id')
 391
 392         formats = self._extract_m3u8_formats(
 393             '%s/vod/%s.m3u8?%s' % (
 394                 self._USHER_BASE, vod_id,
 395                 compat_urllib_parse_urlencode({
 396                     'allow_source': 'true',
 397                     'allow_audio_only': 'true',
 398                     'allow_spectre': 'true',
 399                     'player': 'twitchweb',
 400                     'playlist_include_framerate': 'true',
 401                     'nauth': access_token['value'],
 402                     'nauthsig': access_token['signature'],
 403                 })),
 404             vod_id, 'mp4', entry_protocol='m3u8_native')
 405
 406         self._prefer_source(formats)
 407         info['formats'] = formats
 408
 409         parsed_url = compat_urllib_parse_urlparse(url)
 410         query = compat_parse_qs(parsed_url.query)
 411         if 't' in query:
 412             info['start_time'] = parse_duration(query['t'][0])
 413
 414         if info.get('timestamp') is not None:
 415             info['subtitles'] = {
 416                 'rechat': [{
 417                     'url': update_url_query(
 418                         'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, {
 419                             'client_id': self._CLIENT_ID,
 420                         }),
 421                     'ext': 'json',
 422                 }],
 423             }
 424
 425         return info
 426
 427
 428 def _make_video_result(node):
 429     assert isinstance(node, dict)
 430     video_id = node.get('id')
 431     if not video_id:
 432         return
 433     return {
 434         '_type': 'url_transparent',
 435         'ie_key': TwitchVodIE.ie_key(),
 436         'id': 'v' + video_id,
 437         'url': 'https://www.twitch.tv/videos/%s' % video_id,
 438         'title': node.get('title'),
 439         'thumbnail': node.get('previewThumbnailURL'),
 440         'duration': float_or_none(node.get('lengthSeconds')),
 441         'view_count': int_or_none(node.get('viewCount')),
 442     }
 443
 444
 445 class TwitchCollectionIE(TwitchBaseIE):
 446     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
 447
 448     _TESTS = [{
 449         'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
 450         'info_dict': {
 451             'id': 'wlDCoH0zEBZZbQ',
 452             'title': 'Overthrow Nook, capitalism for children',
 453         },
 454         'playlist_mincount': 13,
 455     }]
 456
 457     _OPERATION_NAME = 'CollectionSideBar'
 458
 459     def _real_extract(self, url):
 460         collection_id = self._match_id(url)
 461         collection = self._download_gql(
 462             collection_id, [{
 463                 'operationName': self._OPERATION_NAME,
 464                 'variables': {'collectionID': collection_id},
 465             }],
 466             'Downloading collection GraphQL')[0]['data']['collection']
 467         title = collection.get('title')
 468         entries = []
 469         for edge in collection['items']['edges']:
 470             if not isinstance(edge, dict):
 471                 continue
 472             node = edge.get('node')
 473             if not isinstance(node, dict):
 474                 continue
 475             video = _make_video_result(node)
 476             if video:
 477                 entries.append(video)
 478         return self.playlist_result(
 479             entries, playlist_id=collection_id, playlist_title=title)
 480
 481
 482 class TwitchPlaylistBaseIE(TwitchBaseIE):
 483     _PAGE_LIMIT = 100
 484
 485     def _entries(self, channel_name, *args):
 486         cursor = None
 487         variables_common = self._make_variables(channel_name, *args)
 488         entries_key = '%ss' % self._ENTRY_KIND
 489         for page_num in itertools.count(1):
 490             variables = variables_common.copy()
 491             variables['limit'] = self._PAGE_LIMIT
 492             if cursor:
 493                 variables['cursor'] = cursor
 494             page = self._download_gql(
 495                 channel_name, [{
 496                     'operationName': self._OPERATION_NAME,
 497                     'variables': variables,
 498                 }],
 499                 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
 500                 fatal=False)
 501             if not page:
 502                 break
 503             edges = try_get(
 504                 page, lambda x: x[0]['data']['user'][entries_key]['edges'], list)
 505             if not edges:
 506                 break
 507             for edge in edges:
 508                 if not isinstance(edge, dict):
 509                     continue
 510                 if edge.get('__typename') != self._EDGE_KIND:
 511                     continue
 512                 node = edge.get('node')
 513                 if not isinstance(node, dict):
 514                     continue
 515                 if node.get('__typename') != self._NODE_KIND:
 516                     continue
 517                 entry = self._extract_entry(node)
 518                 if entry:
 519                     cursor = edge.get('cursor')
 520                     yield entry
 521             if not cursor or not isinstance(cursor, compat_str):
 522                 break
 523
 524
 525 class TwitchVideosIE(TwitchPlaylistBaseIE):
 526     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
 527
 528     _TESTS = [{
 529         # All Videos sorted by Date
 530         'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
 531         'info_dict': {
 532             'id': 'spamfish',
 533             'title': 'spamfish - All Videos sorted by Date',
 534         },
 535         'playlist_mincount': 924,
 536     }, {
 537         # All Videos sorted by Popular
 538         'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
 539         'info_dict': {
 540             'id': 'spamfish',
 541             'title': 'spamfish - All Videos sorted by Popular',
 542         },
 543         'playlist_mincount': 931,
 544     }, {
 545         # Past Broadcasts sorted by Date
 546         'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
 547         'info_dict': {
 548             'id': 'spamfish',
 549             'title': 'spamfish - Past Broadcasts sorted by Date',
 550         },
 551         'playlist_mincount': 27,
 552     }, {
 553         # Highlights sorted by Date
 554         'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights',
 555         'info_dict': {
 556             'id': 'spamfish',
 557             'title': 'spamfish - Highlights sorted by Date',
 558         },
 559         'playlist_mincount': 901,
 560     }, {
 561         # Uploads sorted by Date
 562         'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
 563         'info_dict': {
 564             'id': 'esl_csgo',
 565             'title': 'esl_csgo - Uploads sorted by Date',
 566         },
 567         'playlist_mincount': 5,
 568     }, {
 569         # Past Premieres sorted by Date
 570         'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
 571         'info_dict': {
 572             'id': 'spamfish',
 573             'title': 'spamfish - Past Premieres sorted by Date',
 574         },
 575         'playlist_mincount': 1,
 576     }, {
 577         'url': 'https://www.twitch.tv/spamfish/videos/all',
 578         'only_matching': True,
 579     }, {
 580         'url': 'https://m.twitch.tv/spamfish/videos/all',
 581         'only_matching': True,
 582     }, {
 583         'url': 'https://www.twitch.tv/spamfish/videos',
 584         'only_matching': True,
 585     }]
 586
 587     Broadcast = collections.namedtuple('Broadcast', ['type', 'label'])
 588
 589     _DEFAULT_BROADCAST = Broadcast(None, 'All Videos')
 590     _BROADCASTS = {
 591         'archives': Broadcast('ARCHIVE', 'Past Broadcasts'),
 592         'highlights': Broadcast('HIGHLIGHT', 'Highlights'),
 593         'uploads': Broadcast('UPLOAD', 'Uploads'),
 594         'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'),
 595         'all': _DEFAULT_BROADCAST,
 596     }
 597
 598     _DEFAULT_SORTED_BY = 'Date'
 599     _SORTED_BY = {
 600         'time': _DEFAULT_SORTED_BY,
 601         'views': 'Popular',
 602     }
 603
 604     _OPERATION_NAME = 'FilterableVideoTower_Videos'
 605     _ENTRY_KIND = 'video'
 606     _EDGE_KIND = 'VideoEdge'
 607     _NODE_KIND = 'Video'
 608
 609     @classmethod
 610     def suitable(cls, url):
 611         return (False
 612                 if any(ie.suitable(url) for ie in (
 613                     TwitchVideosClipsIE,
 614                     TwitchVideosCollectionsIE))
 615                 else super(TwitchVideosIE, cls).suitable(url))
 616
 617     @staticmethod
 618     def _make_variables(channel_name, broadcast_type, sort):
 619         return {
 620             'channelOwnerLogin': channel_name,
 621             'broadcastType': broadcast_type,
 622             'videoSort': sort.upper(),
 623         }
 624
 625     @staticmethod
 626     def _extract_entry(node):
 627         return _make_video_result(node)
 628
 629     def _real_extract(self, url):
 630         channel_name = self._match_id(url)
 631         qs = parse_qs(url)
 632         filter = qs.get('filter', ['all'])[0]
 633         sort = qs.get('sort', ['time'])[0]
 634         broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
 635         return self.playlist_result(
 636             self._entries(channel_name, broadcast.type, sort),
 637             playlist_id=channel_name,
 638             playlist_title='%s - %s sorted by %s'
 639             % (channel_name, broadcast.label,
 640                self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)))
 641
 642
 643 class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
 644     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
 645
 646     _TESTS = [{
 647         # Clips
 648         'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
 649         'info_dict': {
 650             'id': 'vanillatv',
 651             'title': 'vanillatv - Clips Top All',
 652         },
 653         'playlist_mincount': 1,
 654     }, {
 655         'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d',
 656         'only_matching': True,
 657     }]
 658
 659     Clip = collections.namedtuple('Clip', ['filter', 'label'])
 660
 661     _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D')
 662     _RANGE = {
 663         '24hr': Clip('LAST_DAY', 'Top 24H'),
 664         '7d': _DEFAULT_CLIP,
 665         '30d': Clip('LAST_MONTH', 'Top 30D'),
 666         'all': Clip('ALL_TIME', 'Top All'),
 667     }
 668
 669     # NB: values other than 20 result in skipped videos
 670     _PAGE_LIMIT = 20
 671
 672     _OPERATION_NAME = 'ClipsCards__User'
 673     _ENTRY_KIND = 'clip'
 674     _EDGE_KIND = 'ClipEdge'
 675     _NODE_KIND = 'Clip'
 676
 677     @staticmethod
 678     def _make_variables(channel_name, filter):
 679         return {
 680             'login': channel_name,
 681             'criteria': {
 682                 'filter': filter,
 683             },
 684         }
 685
 686     @staticmethod
 687     def _extract_entry(node):
 688         assert isinstance(node, dict)
 689         clip_url = url_or_none(node.get('url'))
 690         if not clip_url:
 691             return
 692         return {
 693             '_type': 'url_transparent',
 694             'ie_key': TwitchClipsIE.ie_key(),
 695             'id': node.get('id'),
 696             'url': clip_url,
 697             'title': node.get('title'),
 698             'thumbnail': node.get('thumbnailURL'),
 699             'duration': float_or_none(node.get('durationSeconds')),
 700             'timestamp': unified_timestamp(node.get('createdAt')),
 701             'view_count': int_or_none(node.get('viewCount')),
 702             'language': node.get('language'),
 703         }
 704
 705     def _real_extract(self, url):
 706         channel_name = self._match_id(url)
 707         qs = parse_qs(url)
 708         range = qs.get('range', ['7d'])[0]
 709         clip = self._RANGE.get(range, self._DEFAULT_CLIP)
 710         return self.playlist_result(
 711             self._entries(channel_name, clip.filter),
 712             playlist_id=channel_name,
 713             playlist_title='%s - Clips %s' % (channel_name, clip.label))
 714
 715
 716 class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
 717     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
 718
 719     _TESTS = [{
 720         # Collections
 721         'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
 722         'info_dict': {
 723             'id': 'spamfish',
 724             'title': 'spamfish - Collections',
 725         },
 726         'playlist_mincount': 3,
 727     }]
 728
 729     _OPERATION_NAME = 'ChannelCollectionsContent'
 730     _ENTRY_KIND = 'collection'
 731     _EDGE_KIND = 'CollectionsItemEdge'
 732     _NODE_KIND = 'Collection'
 733
 734     @staticmethod
 735     def _make_variables(channel_name):
 736         return {
 737             'ownerLogin': channel_name,
 738         }
 739
 740     @staticmethod
 741     def _extract_entry(node):
 742         assert isinstance(node, dict)
 743         collection_id = node.get('id')
 744         if not collection_id:
 745             return
 746         return {
 747             '_type': 'url_transparent',
 748             'ie_key': TwitchCollectionIE.ie_key(),
 749             'id': collection_id,
 750             'url': 'https://www.twitch.tv/collections/%s' % collection_id,
 751             'title': node.get('title'),
 752             'thumbnail': node.get('thumbnailURL'),
 753             'duration': float_or_none(node.get('lengthSeconds')),
 754             'timestamp': unified_timestamp(node.get('updatedAt')),
 755             'view_count': int_or_none(node.get('viewCount')),
 756         }
 757
 758     def _real_extract(self, url):
 759         channel_name = self._match_id(url)
 760         return self.playlist_result(
 761             self._entries(channel_name), playlist_id=channel_name,
 762             playlist_title='%s - Collections' % channel_name)
 763
 764
 765 class TwitchStreamIE(TwitchBaseIE):
 766     IE_NAME = 'twitch:stream'
 767     _VALID_URL = r'''(?x)
 768                     https?://
 769                         (?:
 770                             (?:(?:www|go|m)\.)?twitch\.tv/|
 771                             player\.twitch\.tv/\?.*?\bchannel=
 772                         )
 773                         (?P<id>[^/#?]+)
 774                     '''
 775
 776     _TESTS = [{
 777         'url': 'http://www.twitch.tv/shroomztv',
 778         'info_dict': {
 779             'id': '12772022048',
 780             'display_id': 'shroomztv',
 781             'ext': 'mp4',
 782             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 783             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
 784             'is_live': True,
 785             'timestamp': 1421928037,
 786             'upload_date': '20150122',
 787             'uploader': 'ShroomzTV',
 788             'uploader_id': 'shroomztv',
 789             'view_count': int,
 790         },
 791         'params': {
 792             # m3u8 download
 793             'skip_download': True,
 794         },
 795     }, {
 796         'url': 'http://www.twitch.tv/miracle_doto#profile-0',
 797         'only_matching': True,
 798     }, {
 799         'url': 'https://player.twitch.tv/?channel=lotsofs',
 800         'only_matching': True,
 801     }, {
 802         'url': 'https://go.twitch.tv/food',
 803         'only_matching': True,
 804     }, {
 805         'url': 'https://m.twitch.tv/food',
 806         'only_matching': True,
 807     }]
 808
 809     @classmethod
 810     def suitable(cls, url):
 811         return (False
 812                 if any(ie.suitable(url) for ie in (
 813                     TwitchVodIE,
 814                     TwitchCollectionIE,
 815                     TwitchVideosIE,
 816                     TwitchVideosClipsIE,
 817                     TwitchVideosCollectionsIE,
 818                     TwitchClipsIE))
 819                 else super(TwitchStreamIE, cls).suitable(url))
 820
 821     def _real_extract(self, url):
 822         channel_name = self._match_id(url).lower()
 823
 824         gql = self._download_gql(
 825             channel_name, [{
 826                 'operationName': 'StreamMetadata',
 827                 'variables': {'channelLogin': channel_name},
 828             }, {
 829                 'operationName': 'ComscoreStreamingQuery',
 830                 'variables': {
 831                     'channel': channel_name,
 832                     'clipSlug': '',
 833                     'isClip': False,
 834                     'isLive': True,
 835                     'isVodOrCollection': False,
 836                     'vodID': '',
 837                 },
 838             }, {
 839                 'operationName': 'VideoPreviewOverlay',
 840                 'variables': {'login': channel_name},
 841             }],
 842             'Downloading stream GraphQL')
 843
 844         user = gql[0]['data']['user']
 845
 846         if not user:
 847             raise ExtractorError(
 848                 '%s does not exist' % channel_name, expected=True)
 849
 850         stream = user['stream']
 851
 852         if not stream:
 853             raise ExtractorError('%s is offline' % channel_name, expected=True)
 854
 855         access_token = self._download_access_token(
 856             channel_name, 'stream', 'channelName')
 857         token = access_token['value']
 858
 859         stream_id = stream.get('id') or channel_name
 860         query = {
 861             'allow_source': 'true',
 862             'allow_audio_only': 'true',
 863             'allow_spectre': 'true',
 864             'p': random.randint(1000000, 10000000),
 865             'player': 'twitchweb',
 866             'playlist_include_framerate': 'true',
 867             'segment_preference': '4',
 868             'sig': access_token['signature'].encode('utf-8'),
 869             'token': token.encode('utf-8'),
 870         }
 871         formats = self._extract_m3u8_formats(
 872             '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
 873             stream_id, 'mp4', query=query)
 874         self._prefer_source(formats)
 875
 876         view_count = stream.get('viewers')
 877         timestamp = unified_timestamp(stream.get('createdAt'))
 878
 879         sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
 880         uploader = sq_user.get('displayName')
 881         description = try_get(
 882             sq_user, lambda x: x['broadcastSettings']['title'], compat_str)
 883
 884         thumbnail = url_or_none(try_get(
 885             gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'],
 886             compat_str))
 887
 888         title = uploader or channel_name
 889         stream_type = stream.get('type')
 890         if stream_type in ['rerun', 'live']:
 891             title += ' (%s)' % stream_type
 892
 893         return {
 894             'id': stream_id,
 895             'display_id': channel_name,
 896             'title': title,
 897             'description': description,
 898             'thumbnail': thumbnail,
 899             'uploader': uploader,
 900             'uploader_id': channel_name,
 901             'timestamp': timestamp,
 902             'view_count': view_count,
 903             'formats': formats,
 904             'is_live': stream_type == 'live',
 905         }
 906
 907
 908 class TwitchClipsIE(TwitchBaseIE):
 909     IE_NAME = 'twitch:clips'
 910     _VALID_URL = r'''(?x)
 911                     https?://
 912                         (?:
 913                             clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
 914                             (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
 915                         )
 916                         (?P<id>[^/?#&]+)
 917                     '''
 918
 919     _TESTS = [{
 920         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
 921         'md5': '761769e1eafce0ffebfb4089cb3847cd',
 922         'info_dict': {
 923             'id': '42850523',
 924             'display_id': 'FaintLightGullWholeWheat',
 925             'ext': 'mp4',
 926             'title': 'EA Play 2016 Live from the Novo Theatre',
 927             'thumbnail': r're:^https?://.*\.jpg',
 928             'timestamp': 1465767393,
 929             'upload_date': '20160612',
 930             'creator': 'EA',
 931             'uploader': 'stereotype_',
 932             'uploader_id': '43566419',
 933         },
 934     }, {
 935         # multiple formats
 936         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
 937         'only_matching': True,
 938     }, {
 939         'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
 940         'only_matching': True,
 941     }, {
 942         'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
 943         'only_matching': True,
 944     }, {
 945         'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
 946         'only_matching': True,
 947     }, {
 948         'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
 949         'only_matching': True,
 950     }]
 951
 952     def _real_extract(self, url):
 953         video_id = self._match_id(url)
 954
 955         clip = self._download_gql(
 956             video_id, [{
 957                 'operationName': 'VideoAccessToken_Clip',
 958                 'variables': {
 959                     'slug': video_id,
 960                 },
 961             }],
 962             'Downloading clip access token GraphQL')[0]['data']['clip']
 963
 964         if not clip:
 965             raise ExtractorError(
 966                 'This clip is no longer available', expected=True)
 967
 968         access_query = {
 969             'sig': clip['playbackAccessToken']['signature'],
 970             'token': clip['playbackAccessToken']['value'],
 971         }
 972
 973         data = self._download_base_gql(
 974             video_id, {
 975                 'query': '''{
 976   clip(slug: "%s") {
 977     broadcaster {
 978       displayName
 979     }
 980     createdAt
 981     curator {
 982       displayName
 983       id
 984     }
 985     durationSeconds
 986     id
 987     tiny: thumbnailURL(width: 86, height: 45)
 988     small: thumbnailURL(width: 260, height: 147)
 989     medium: thumbnailURL(width: 480, height: 272)
 990     title
 991     videoQualities {
 992       frameRate
 993       quality
 994       sourceURL
 995     }
 996     viewCount
 997   }
 998 }''' % video_id}, 'Downloading clip GraphQL', fatal=False)
 999
1000         if data:
1001             clip = try_get(data, lambda x: x['data']['clip'], dict) or clip
1002
1003         formats = []
1004         for option in clip.get('videoQualities', []):
1005             if not isinstance(option, dict):
1006                 continue
1007             source = url_or_none(option.get('sourceURL'))
1008             if not source:
1009                 continue
1010             formats.append({
1011                 'url': update_url_query(source, access_query),
1012                 'format_id': option.get('quality'),
1013                 'height': int_or_none(option.get('quality')),
1014                 'fps': int_or_none(option.get('frameRate')),
1015             })
1016         self._sort_formats(formats)
1017
1018         thumbnails = []
1019         for thumbnail_id in ('tiny', 'small', 'medium'):
1020             thumbnail_url = clip.get(thumbnail_id)
1021             if not thumbnail_url:
1022                 continue
1023             thumb = {
1024                 'id': thumbnail_id,
1025                 'url': thumbnail_url,
1026             }
1027             mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
1028             if mobj:
1029                 thumb.update({
1030                     'height': int(mobj.group(2)),
1031                     'width': int(mobj.group(1)),
1032                 })
1033             thumbnails.append(thumb)
1034
1035         return {
1036             'id': clip.get('id') or video_id,
1037             'display_id': video_id,
1038             'title': clip.get('title') or video_id,
1039             'formats': formats,
1040             'duration': int_or_none(clip.get('durationSeconds')),
1041             'view_count': int_or_none(clip.get('viewCount')),
1042             'timestamp': unified_timestamp(clip.get('createdAt')),
1043             'thumbnails': thumbnails,
1044             'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
1045             'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
1046             'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
1047         }