yt_dlp/extractor/twitch.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import collections
   5 import itertools
   6 import json
   7 import random
   8 import re
   9
  10 from .common import InfoExtractor
  11 from ..compat import (
  12     compat_parse_qs,
  13     compat_str,
  14     compat_urlparse,
  15     compat_urllib_parse_urlencode,
  16     compat_urllib_parse_urlparse,
  17 )
  18 from ..utils import (
  19     clean_html,
  20     dict_get,
  21     ExtractorError,
  22     float_or_none,
  23     int_or_none,
  24     parse_duration,
  25     parse_iso8601,
  26     qualities,
  27     try_get,
  28     unified_timestamp,
  29     update_url_query,
  30     url_or_none,
  31     urljoin,
  32 )
  33
  34
  35 class TwitchBaseIE(InfoExtractor):
  36     _VALID_URL_BASE = r'https?://(?:(?:www|go|m)\.)?twitch\.tv'
  37
  38     _API_BASE = 'https://api.twitch.tv'
  39     _USHER_BASE = 'https://usher.ttvnw.net'
  40     _LOGIN_FORM_URL = 'https://www.twitch.tv/login'
  41     _LOGIN_POST_URL = 'https://passport.twitch.tv/login'
  42     _CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'
  43     _NETRC_MACHINE = 'twitch'
  44
  45     _OPERATION_HASHES = {
  46         'CollectionSideBar': '27111f1b382effad0b6def325caef1909c733fe6a4fbabf54f8d491ef2cf2f14',
  47         'FilterableVideoTower_Videos': 'a937f1d22e269e39a03b509f65a7490f9fc247d7f83d6ac1421523e3b68042cb',
  48         'ClipsCards__User': 'b73ad2bfaecfd30a9e6c28fada15bd97032c83ec77a0440766a56fe0bd632777',
  49         'ChannelCollectionsContent': '07e3691a1bad77a36aba590c351180439a40baefc1c275356f40fc7082419a84',
  50         'StreamMetadata': '1c719a40e481453e5c48d9bb585d971b8b372f8ebb105b17076722264dfa5b3e',
  51         'ComscoreStreamingQuery': 'e1edae8122517d013405f237ffcc124515dc6ded82480a88daef69c83b53ac01',
  52         'VideoAccessToken_Clip': '36b89d2507fce29e5ca551df756d27c1cfe079e2609642b4390aa4c35796eb11',
  53         'VideoPreviewOverlay': '3006e77e51b128d838fa4e835723ca4dc9a05c5efd4466c1085215c6e437e65c',
  54         'VideoMetadata': '226edb3e692509f727fd56821f5653c05740242c82b0388883e0c0e75dcbf687',
  55     }
  56
  57     def _real_initialize(self):
  58         self._login()
  59
  60     def _login(self):
  61         username, password = self._get_login_info()
  62         if username is None:
  63             return
  64
  65         def fail(message):
  66             raise ExtractorError(
  67                 'Unable to login. Twitch said: %s' % message, expected=True)
  68
  69         def login_step(page, urlh, note, data):
  70             form = self._hidden_inputs(page)
  71             form.update(data)
  72
  73             page_url = urlh.geturl()
  74             post_url = self._search_regex(
  75                 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page,
  76                 'post url', default=self._LOGIN_POST_URL, group='url')
  77             post_url = urljoin(page_url, post_url)
  78
  79             headers = {
  80                 'Referer': page_url,
  81                 'Origin': 'https://www.twitch.tv',
  82                 'Content-Type': 'text/plain;charset=UTF-8',
  83             }
  84
  85             response = self._download_json(
  86                 post_url, None, note, data=json.dumps(form).encode(),
  87                 headers=headers, expected_status=400)
  88             error = dict_get(response, ('error', 'error_description', 'error_code'))
  89             if error:
  90                 fail(error)
  91
  92             if 'Authenticated successfully' in response.get('message', ''):
  93                 return None, None
  94
  95             redirect_url = urljoin(
  96                 post_url,
  97                 response.get('redirect') or response['redirect_path'])
  98             return self._download_webpage_handle(
  99                 redirect_url, None, 'Downloading login redirect page',
 100                 headers=headers)
 101
 102         login_page, handle = self._download_webpage_handle(
 103             self._LOGIN_FORM_URL, None, 'Downloading login page')
 104
 105         # Some TOR nodes and public proxies are blocked completely
 106         if 'blacklist_message' in login_page:
 107             fail(clean_html(login_page))
 108
 109         redirect_page, handle = login_step(
 110             login_page, handle, 'Logging in', {
 111                 'username': username,
 112                 'password': password,
 113                 'client_id': self._CLIENT_ID,
 114             })
 115
 116         # Successful login
 117         if not redirect_page:
 118             return
 119
 120         if re.search(r'(?i)<form[^>]+id="two-factor-submit"', redirect_page) is not None:
 121             # TODO: Add mechanism to request an SMS or phone call
 122             tfa_token = self._get_tfa_info('two-factor authentication token')
 123             login_step(redirect_page, handle, 'Submitting TFA token', {
 124                 'authy_token': tfa_token,
 125                 'remember_2fa': 'true',
 126             })
 127
 128     def _prefer_source(self, formats):
 129         try:
 130             source = next(f for f in formats if f['format_id'] == 'Source')
 131             source['quality'] = 10
 132         except StopIteration:
 133             for f in formats:
 134                 if '/chunked/' in f['url']:
 135                     f.update({
 136                         'quality': 10,
 137                         'format_note': 'Source',
 138                     })
 139         self._sort_formats(formats)
 140
 141     def _download_base_gql(self, video_id, ops, note, fatal=True):
 142         headers = {
 143             'Content-Type': 'text/plain;charset=UTF-8',
 144             'Client-ID': self._CLIENT_ID,
 145         }
 146         gql_auth = self._get_cookies('https://gql.twitch.tv').get('auth-token')
 147         if gql_auth:
 148             headers['Authorization'] = 'OAuth ' + gql_auth.value
 149         return self._download_json(
 150             'https://gql.twitch.tv/gql', video_id, note,
 151             data=json.dumps(ops).encode(),
 152             headers=headers, fatal=fatal)
 153
 154     def _download_gql(self, video_id, ops, note, fatal=True):
 155         for op in ops:
 156             op['extensions'] = {
 157                 'persistedQuery': {
 158                     'version': 1,
 159                     'sha256Hash': self._OPERATION_HASHES[op['operationName']],
 160                 }
 161             }
 162         return self._download_base_gql(video_id, ops, note)
 163
 164     def _download_access_token(self, video_id, token_kind, param_name):
 165         method = '%sPlaybackAccessToken' % token_kind
 166         ops = {
 167             'query': '''{
 168               %s(
 169                 %s: "%s",
 170                 params: {
 171                   platform: "web",
 172                   playerBackend: "mediaplayer",
 173                   playerType: "site"
 174                 }
 175               )
 176               {
 177                 value
 178                 signature
 179               }
 180             }''' % (method, param_name, video_id),
 181         }
 182         return self._download_base_gql(
 183             video_id, ops,
 184             'Downloading %s access token GraphQL' % token_kind)['data'][method]
 185
 186
 187 class TwitchVodIE(TwitchBaseIE):
 188     IE_NAME = 'twitch:vod'
 189     _VALID_URL = r'''(?x)
 190                     https?://
 191                         (?:
 192                             (?:(?:www|go|m)\.)?twitch\.tv/(?:[^/]+/v(?:ideo)?|videos)/|
 193                             player\.twitch\.tv/\?.*?\bvideo=v?
 194                         )
 195                         (?P<id>\d+)
 196                     '''
 197
 198     _TESTS = [{
 199         'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s',
 200         'info_dict': {
 201             'id': 'v6528877',
 202             'ext': 'mp4',
 203             'title': 'LCK Summer Split - Week 6 Day 1',
 204             'thumbnail': r're:^https?://.*\.jpg$',
 205             'duration': 17208,
 206             'timestamp': 1435131734,
 207             'upload_date': '20150624',
 208             'uploader': 'Riot Games',
 209             'uploader_id': 'riotgames',
 210             'view_count': int,
 211             'start_time': 310,
 212         },
 213         'params': {
 214             # m3u8 download
 215             'skip_download': True,
 216         },
 217     }, {
 218         # Untitled broadcast (title is None)
 219         'url': 'http://www.twitch.tv/belkao_o/v/11230755',
 220         'info_dict': {
 221             'id': 'v11230755',
 222             'ext': 'mp4',
 223             'title': 'Untitled Broadcast',
 224             'thumbnail': r're:^https?://.*\.jpg$',
 225             'duration': 1638,
 226             'timestamp': 1439746708,
 227             'upload_date': '20150816',
 228             'uploader': 'BelkAO_o',
 229             'uploader_id': 'belkao_o',
 230             'view_count': int,
 231         },
 232         'params': {
 233             # m3u8 download
 234             'skip_download': True,
 235         },
 236         'skip': 'HTTP Error 404: Not Found',
 237     }, {
 238         'url': 'http://player.twitch.tv/?t=5m10s&video=v6528877',
 239         'only_matching': True,
 240     }, {
 241         'url': 'https://www.twitch.tv/videos/6528877',
 242         'only_matching': True,
 243     }, {
 244         'url': 'https://m.twitch.tv/beagsandjam/v/247478721',
 245         'only_matching': True,
 246     }, {
 247         'url': 'https://www.twitch.tv/northernlion/video/291940395',
 248         'only_matching': True,
 249     }, {
 250         'url': 'https://player.twitch.tv/?video=480452374',
 251         'only_matching': True,
 252     }]
 253
 254     def _download_info(self, item_id):
 255         data = self._download_gql(
 256             item_id, [{
 257                 'operationName': 'VideoMetadata',
 258                 'variables': {
 259                     'channelLogin': '',
 260                     'videoID': item_id,
 261                 },
 262             }],
 263             'Downloading stream metadata GraphQL')[0]['data']
 264         video = data.get('video')
 265         if video is None:
 266             raise ExtractorError(
 267                 'Video %s does not exist' % item_id, expected=True)
 268         return self._extract_info_gql(video, item_id)
 269
 270     @staticmethod
 271     def _extract_info(info):
 272         status = info.get('status')
 273         if status == 'recording':
 274             is_live = True
 275         elif status == 'recorded':
 276             is_live = False
 277         else:
 278             is_live = None
 279         _QUALITIES = ('small', 'medium', 'large')
 280         quality_key = qualities(_QUALITIES)
 281         thumbnails = []
 282         preview = info.get('preview')
 283         if isinstance(preview, dict):
 284             for thumbnail_id, thumbnail_url in preview.items():
 285                 thumbnail_url = url_or_none(thumbnail_url)
 286                 if not thumbnail_url:
 287                     continue
 288                 if thumbnail_id not in _QUALITIES:
 289                     continue
 290                 thumbnails.append({
 291                     'url': thumbnail_url,
 292                     'preference': quality_key(thumbnail_id),
 293                 })
 294         return {
 295             'id': info['_id'],
 296             'title': info.get('title') or 'Untitled Broadcast',
 297             'description': info.get('description'),
 298             'duration': int_or_none(info.get('length')),
 299             'thumbnails': thumbnails,
 300             'uploader': info.get('channel', {}).get('display_name'),
 301             'uploader_id': info.get('channel', {}).get('name'),
 302             'timestamp': parse_iso8601(info.get('recorded_at')),
 303             'view_count': int_or_none(info.get('views')),
 304             'is_live': is_live,
 305         }
 306
 307     @staticmethod
 308     def _extract_info_gql(info, item_id):
 309         vod_id = info.get('id') or item_id
 310         # id backward compatibility for download archives
 311         if vod_id[0] != 'v':
 312             vod_id = 'v%s' % vod_id
 313         thumbnail = url_or_none(info.get('previewThumbnailURL'))
 314         if thumbnail:
 315             for p in ('width', 'height'):
 316                 thumbnail = thumbnail.replace('{%s}' % p, '0')
 317         return {
 318             'id': vod_id,
 319             'title': info.get('title') or 'Untitled Broadcast',
 320             'description': info.get('description'),
 321             'duration': int_or_none(info.get('lengthSeconds')),
 322             'thumbnail': thumbnail,
 323             'uploader': try_get(info, lambda x: x['owner']['displayName'], compat_str),
 324             'uploader_id': try_get(info, lambda x: x['owner']['login'], compat_str),
 325             'timestamp': unified_timestamp(info.get('publishedAt')),
 326             'view_count': int_or_none(info.get('viewCount')),
 327         }
 328
 329     def _real_extract(self, url):
 330         vod_id = self._match_id(url)
 331
 332         info = self._download_info(vod_id)
 333         access_token = self._download_access_token(vod_id, 'video', 'id')
 334
 335         formats = self._extract_m3u8_formats(
 336             '%s/vod/%s.m3u8?%s' % (
 337                 self._USHER_BASE, vod_id,
 338                 compat_urllib_parse_urlencode({
 339                     'allow_source': 'true',
 340                     'allow_audio_only': 'true',
 341                     'allow_spectre': 'true',
 342                     'player': 'twitchweb',
 343                     'playlist_include_framerate': 'true',
 344                     'nauth': access_token['value'],
 345                     'nauthsig': access_token['signature'],
 346                 })),
 347             vod_id, 'mp4', entry_protocol='m3u8_native')
 348
 349         self._prefer_source(formats)
 350         info['formats'] = formats
 351
 352         parsed_url = compat_urllib_parse_urlparse(url)
 353         query = compat_parse_qs(parsed_url.query)
 354         if 't' in query:
 355             info['start_time'] = parse_duration(query['t'][0])
 356
 357         if info.get('timestamp') is not None:
 358             info['subtitles'] = {
 359                 'rechat': [{
 360                     'url': update_url_query(
 361                         'https://api.twitch.tv/v5/videos/%s/comments' % vod_id, {
 362                             'client_id': self._CLIENT_ID,
 363                         }),
 364                     'ext': 'json',
 365                 }],
 366             }
 367
 368         return info
 369
 370
 371 def _make_video_result(node):
 372     assert isinstance(node, dict)
 373     video_id = node.get('id')
 374     if not video_id:
 375         return
 376     return {
 377         '_type': 'url_transparent',
 378         'ie_key': TwitchVodIE.ie_key(),
 379         'id': 'v' + video_id,
 380         'url': 'https://www.twitch.tv/videos/%s' % video_id,
 381         'title': node.get('title'),
 382         'thumbnail': node.get('previewThumbnailURL'),
 383         'duration': float_or_none(node.get('lengthSeconds')),
 384         'view_count': int_or_none(node.get('viewCount')),
 385     }
 386
 387
 388 class TwitchCollectionIE(TwitchBaseIE):
 389     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/collections/(?P<id>[^/]+)'
 390
 391     _TESTS = [{
 392         'url': 'https://www.twitch.tv/collections/wlDCoH0zEBZZbQ',
 393         'info_dict': {
 394             'id': 'wlDCoH0zEBZZbQ',
 395             'title': 'Overthrow Nook, capitalism for children',
 396         },
 397         'playlist_mincount': 13,
 398     }]
 399
 400     _OPERATION_NAME = 'CollectionSideBar'
 401
 402     def _real_extract(self, url):
 403         collection_id = self._match_id(url)
 404         collection = self._download_gql(
 405             collection_id, [{
 406                 'operationName': self._OPERATION_NAME,
 407                 'variables': {'collectionID': collection_id},
 408             }],
 409             'Downloading collection GraphQL')[0]['data']['collection']
 410         title = collection.get('title')
 411         entries = []
 412         for edge in collection['items']['edges']:
 413             if not isinstance(edge, dict):
 414                 continue
 415             node = edge.get('node')
 416             if not isinstance(node, dict):
 417                 continue
 418             video = _make_video_result(node)
 419             if video:
 420                 entries.append(video)
 421         return self.playlist_result(
 422             entries, playlist_id=collection_id, playlist_title=title)
 423
 424
 425 class TwitchPlaylistBaseIE(TwitchBaseIE):
 426     _PAGE_LIMIT = 100
 427
 428     def _entries(self, channel_name, *args):
 429         cursor = None
 430         variables_common = self._make_variables(channel_name, *args)
 431         entries_key = '%ss' % self._ENTRY_KIND
 432         for page_num in itertools.count(1):
 433             variables = variables_common.copy()
 434             variables['limit'] = self._PAGE_LIMIT
 435             if cursor:
 436                 variables['cursor'] = cursor
 437             page = self._download_gql(
 438                 channel_name, [{
 439                     'operationName': self._OPERATION_NAME,
 440                     'variables': variables,
 441                 }],
 442                 'Downloading %ss GraphQL page %s' % (self._NODE_KIND, page_num),
 443                 fatal=False)
 444             if not page:
 445                 break
 446             edges = try_get(
 447                 page, lambda x: x[0]['data']['user'][entries_key]['edges'], list)
 448             if not edges:
 449                 break
 450             for edge in edges:
 451                 if not isinstance(edge, dict):
 452                     continue
 453                 if edge.get('__typename') != self._EDGE_KIND:
 454                     continue
 455                 node = edge.get('node')
 456                 if not isinstance(node, dict):
 457                     continue
 458                 if node.get('__typename') != self._NODE_KIND:
 459                     continue
 460                 entry = self._extract_entry(node)
 461                 if entry:
 462                     cursor = edge.get('cursor')
 463                     yield entry
 464             if not cursor or not isinstance(cursor, compat_str):
 465                 break
 466
 467
 468 class TwitchVideosIE(TwitchPlaylistBaseIE):
 469     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:videos|profile)'
 470
 471     _TESTS = [{
 472         # All Videos sorted by Date
 473         'url': 'https://www.twitch.tv/spamfish/videos?filter=all',
 474         'info_dict': {
 475             'id': 'spamfish',
 476             'title': 'spamfish - All Videos sorted by Date',
 477         },
 478         'playlist_mincount': 924,
 479     }, {
 480         # All Videos sorted by Popular
 481         'url': 'https://www.twitch.tv/spamfish/videos?filter=all&sort=views',
 482         'info_dict': {
 483             'id': 'spamfish',
 484             'title': 'spamfish - All Videos sorted by Popular',
 485         },
 486         'playlist_mincount': 931,
 487     }, {
 488         # Past Broadcasts sorted by Date
 489         'url': 'https://www.twitch.tv/spamfish/videos?filter=archives',
 490         'info_dict': {
 491             'id': 'spamfish',
 492             'title': 'spamfish - Past Broadcasts sorted by Date',
 493         },
 494         'playlist_mincount': 27,
 495     }, {
 496         # Highlights sorted by Date
 497         'url': 'https://www.twitch.tv/spamfish/videos?filter=highlights',
 498         'info_dict': {
 499             'id': 'spamfish',
 500             'title': 'spamfish - Highlights sorted by Date',
 501         },
 502         'playlist_mincount': 901,
 503     }, {
 504         # Uploads sorted by Date
 505         'url': 'https://www.twitch.tv/esl_csgo/videos?filter=uploads&sort=time',
 506         'info_dict': {
 507             'id': 'esl_csgo',
 508             'title': 'esl_csgo - Uploads sorted by Date',
 509         },
 510         'playlist_mincount': 5,
 511     }, {
 512         # Past Premieres sorted by Date
 513         'url': 'https://www.twitch.tv/spamfish/videos?filter=past_premieres',
 514         'info_dict': {
 515             'id': 'spamfish',
 516             'title': 'spamfish - Past Premieres sorted by Date',
 517         },
 518         'playlist_mincount': 1,
 519     }, {
 520         'url': 'https://www.twitch.tv/spamfish/videos/all',
 521         'only_matching': True,
 522     }, {
 523         'url': 'https://m.twitch.tv/spamfish/videos/all',
 524         'only_matching': True,
 525     }, {
 526         'url': 'https://www.twitch.tv/spamfish/videos',
 527         'only_matching': True,
 528     }]
 529
 530     Broadcast = collections.namedtuple('Broadcast', ['type', 'label'])
 531
 532     _DEFAULT_BROADCAST = Broadcast(None, 'All Videos')
 533     _BROADCASTS = {
 534         'archives': Broadcast('ARCHIVE', 'Past Broadcasts'),
 535         'highlights': Broadcast('HIGHLIGHT', 'Highlights'),
 536         'uploads': Broadcast('UPLOAD', 'Uploads'),
 537         'past_premieres': Broadcast('PAST_PREMIERE', 'Past Premieres'),
 538         'all': _DEFAULT_BROADCAST,
 539     }
 540
 541     _DEFAULT_SORTED_BY = 'Date'
 542     _SORTED_BY = {
 543         'time': _DEFAULT_SORTED_BY,
 544         'views': 'Popular',
 545     }
 546
 547     _OPERATION_NAME = 'FilterableVideoTower_Videos'
 548     _ENTRY_KIND = 'video'
 549     _EDGE_KIND = 'VideoEdge'
 550     _NODE_KIND = 'Video'
 551
 552     @classmethod
 553     def suitable(cls, url):
 554         return (False
 555                 if any(ie.suitable(url) for ie in (
 556                     TwitchVideosClipsIE,
 557                     TwitchVideosCollectionsIE))
 558                 else super(TwitchVideosIE, cls).suitable(url))
 559
 560     @staticmethod
 561     def _make_variables(channel_name, broadcast_type, sort):
 562         return {
 563             'channelOwnerLogin': channel_name,
 564             'broadcastType': broadcast_type,
 565             'videoSort': sort.upper(),
 566         }
 567
 568     @staticmethod
 569     def _extract_entry(node):
 570         return _make_video_result(node)
 571
 572     def _real_extract(self, url):
 573         channel_name = self._match_id(url)
 574         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
 575         filter = qs.get('filter', ['all'])[0]
 576         sort = qs.get('sort', ['time'])[0]
 577         broadcast = self._BROADCASTS.get(filter, self._DEFAULT_BROADCAST)
 578         return self.playlist_result(
 579             self._entries(channel_name, broadcast.type, sort),
 580             playlist_id=channel_name,
 581             playlist_title='%s - %s sorted by %s'
 582             % (channel_name, broadcast.label,
 583                self._SORTED_BY.get(sort, self._DEFAULT_SORTED_BY)))
 584
 585
 586 class TwitchVideosClipsIE(TwitchPlaylistBaseIE):
 587     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/(?:clips|videos/*?\?.*?\bfilter=clips)'
 588
 589     _TESTS = [{
 590         # Clips
 591         'url': 'https://www.twitch.tv/vanillatv/clips?filter=clips&range=all',
 592         'info_dict': {
 593             'id': 'vanillatv',
 594             'title': 'vanillatv - Clips Top All',
 595         },
 596         'playlist_mincount': 1,
 597     }, {
 598         'url': 'https://www.twitch.tv/dota2ruhub/videos?filter=clips&range=7d',
 599         'only_matching': True,
 600     }]
 601
 602     Clip = collections.namedtuple('Clip', ['filter', 'label'])
 603
 604     _DEFAULT_CLIP = Clip('LAST_WEEK', 'Top 7D')
 605     _RANGE = {
 606         '24hr': Clip('LAST_DAY', 'Top 24H'),
 607         '7d': _DEFAULT_CLIP,
 608         '30d': Clip('LAST_MONTH', 'Top 30D'),
 609         'all': Clip('ALL_TIME', 'Top All'),
 610     }
 611
 612     # NB: values other than 20 result in skipped videos
 613     _PAGE_LIMIT = 20
 614
 615     _OPERATION_NAME = 'ClipsCards__User'
 616     _ENTRY_KIND = 'clip'
 617     _EDGE_KIND = 'ClipEdge'
 618     _NODE_KIND = 'Clip'
 619
 620     @staticmethod
 621     def _make_variables(channel_name, filter):
 622         return {
 623             'login': channel_name,
 624             'criteria': {
 625                 'filter': filter,
 626             },
 627         }
 628
 629     @staticmethod
 630     def _extract_entry(node):
 631         assert isinstance(node, dict)
 632         clip_url = url_or_none(node.get('url'))
 633         if not clip_url:
 634             return
 635         return {
 636             '_type': 'url_transparent',
 637             'ie_key': TwitchClipsIE.ie_key(),
 638             'id': node.get('id'),
 639             'url': clip_url,
 640             'title': node.get('title'),
 641             'thumbnail': node.get('thumbnailURL'),
 642             'duration': float_or_none(node.get('durationSeconds')),
 643             'timestamp': unified_timestamp(node.get('createdAt')),
 644             'view_count': int_or_none(node.get('viewCount')),
 645             'language': node.get('language'),
 646         }
 647
 648     def _real_extract(self, url):
 649         channel_name = self._match_id(url)
 650         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
 651         range = qs.get('range', ['7d'])[0]
 652         clip = self._RANGE.get(range, self._DEFAULT_CLIP)
 653         return self.playlist_result(
 654             self._entries(channel_name, clip.filter),
 655             playlist_id=channel_name,
 656             playlist_title='%s - Clips %s' % (channel_name, clip.label))
 657
 658
 659 class TwitchVideosCollectionsIE(TwitchPlaylistBaseIE):
 660     _VALID_URL = r'https?://(?:(?:www|go|m)\.)?twitch\.tv/(?P<id>[^/]+)/videos/*?\?.*?\bfilter=collections'
 661
 662     _TESTS = [{
 663         # Collections
 664         'url': 'https://www.twitch.tv/spamfish/videos?filter=collections',
 665         'info_dict': {
 666             'id': 'spamfish',
 667             'title': 'spamfish - Collections',
 668         },
 669         'playlist_mincount': 3,
 670     }]
 671
 672     _OPERATION_NAME = 'ChannelCollectionsContent'
 673     _ENTRY_KIND = 'collection'
 674     _EDGE_KIND = 'CollectionsItemEdge'
 675     _NODE_KIND = 'Collection'
 676
 677     @staticmethod
 678     def _make_variables(channel_name):
 679         return {
 680             'ownerLogin': channel_name,
 681         }
 682
 683     @staticmethod
 684     def _extract_entry(node):
 685         assert isinstance(node, dict)
 686         collection_id = node.get('id')
 687         if not collection_id:
 688             return
 689         return {
 690             '_type': 'url_transparent',
 691             'ie_key': TwitchCollectionIE.ie_key(),
 692             'id': collection_id,
 693             'url': 'https://www.twitch.tv/collections/%s' % collection_id,
 694             'title': node.get('title'),
 695             'thumbnail': node.get('thumbnailURL'),
 696             'duration': float_or_none(node.get('lengthSeconds')),
 697             'timestamp': unified_timestamp(node.get('updatedAt')),
 698             'view_count': int_or_none(node.get('viewCount')),
 699         }
 700
 701     def _real_extract(self, url):
 702         channel_name = self._match_id(url)
 703         return self.playlist_result(
 704             self._entries(channel_name), playlist_id=channel_name,
 705             playlist_title='%s - Collections' % channel_name)
 706
 707
 708 class TwitchStreamIE(TwitchBaseIE):
 709     IE_NAME = 'twitch:stream'
 710     _VALID_URL = r'''(?x)
 711                     https?://
 712                         (?:
 713                             (?:(?:www|go|m)\.)?twitch\.tv/|
 714                             player\.twitch\.tv/\?.*?\bchannel=
 715                         )
 716                         (?P<id>[^/#?]+)
 717                     '''
 718
 719     _TESTS = [{
 720         'url': 'http://www.twitch.tv/shroomztv',
 721         'info_dict': {
 722             'id': '12772022048',
 723             'display_id': 'shroomztv',
 724             'ext': 'mp4',
 725             'title': 're:^ShroomzTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
 726             'description': 'H1Z1 - lonewolfing with ShroomzTV | A3 Battle Royale later - @ShroomzTV',
 727             'is_live': True,
 728             'timestamp': 1421928037,
 729             'upload_date': '20150122',
 730             'uploader': 'ShroomzTV',
 731             'uploader_id': 'shroomztv',
 732             'view_count': int,
 733         },
 734         'params': {
 735             # m3u8 download
 736             'skip_download': True,
 737         },
 738     }, {
 739         'url': 'http://www.twitch.tv/miracle_doto#profile-0',
 740         'only_matching': True,
 741     }, {
 742         'url': 'https://player.twitch.tv/?channel=lotsofs',
 743         'only_matching': True,
 744     }, {
 745         'url': 'https://go.twitch.tv/food',
 746         'only_matching': True,
 747     }, {
 748         'url': 'https://m.twitch.tv/food',
 749         'only_matching': True,
 750     }]
 751
 752     @classmethod
 753     def suitable(cls, url):
 754         return (False
 755                 if any(ie.suitable(url) for ie in (
 756                     TwitchVodIE,
 757                     TwitchCollectionIE,
 758                     TwitchVideosIE,
 759                     TwitchVideosClipsIE,
 760                     TwitchVideosCollectionsIE,
 761                     TwitchClipsIE))
 762                 else super(TwitchStreamIE, cls).suitable(url))
 763
 764     def _real_extract(self, url):
 765         channel_name = self._match_id(url).lower()
 766
 767         gql = self._download_gql(
 768             channel_name, [{
 769                 'operationName': 'StreamMetadata',
 770                 'variables': {'channelLogin': channel_name},
 771             }, {
 772                 'operationName': 'ComscoreStreamingQuery',
 773                 'variables': {
 774                     'channel': channel_name,
 775                     'clipSlug': '',
 776                     'isClip': False,
 777                     'isLive': True,
 778                     'isVodOrCollection': False,
 779                     'vodID': '',
 780                 },
 781             }, {
 782                 'operationName': 'VideoPreviewOverlay',
 783                 'variables': {'login': channel_name},
 784             }],
 785             'Downloading stream GraphQL')
 786
 787         user = gql[0]['data']['user']
 788
 789         if not user:
 790             raise ExtractorError(
 791                 '%s does not exist' % channel_name, expected=True)
 792
 793         stream = user['stream']
 794
 795         if not stream:
 796             raise ExtractorError('%s is offline' % channel_name, expected=True)
 797
 798         access_token = self._download_access_token(
 799             channel_name, 'stream', 'channelName')
 800         token = access_token['value']
 801
 802         stream_id = stream.get('id') or channel_name
 803         query = {
 804             'allow_source': 'true',
 805             'allow_audio_only': 'true',
 806             'allow_spectre': 'true',
 807             'p': random.randint(1000000, 10000000),
 808             'player': 'twitchweb',
 809             'playlist_include_framerate': 'true',
 810             'segment_preference': '4',
 811             'sig': access_token['signature'].encode('utf-8'),
 812             'token': token.encode('utf-8'),
 813         }
 814         formats = self._extract_m3u8_formats(
 815             '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
 816             stream_id, 'mp4', query=query)
 817         self._prefer_source(formats)
 818
 819         view_count = stream.get('viewers')
 820         timestamp = unified_timestamp(stream.get('createdAt'))
 821
 822         sq_user = try_get(gql, lambda x: x[1]['data']['user'], dict) or {}
 823         uploader = sq_user.get('displayName')
 824         description = try_get(
 825             sq_user, lambda x: x['broadcastSettings']['title'], compat_str)
 826
 827         thumbnail = url_or_none(try_get(
 828             gql, lambda x: x[2]['data']['user']['stream']['previewImageURL'],
 829             compat_str))
 830
 831         title = uploader or channel_name
 832         stream_type = stream.get('type')
 833         if stream_type in ['rerun', 'live']:
 834             title += ' (%s)' % stream_type
 835
 836         return {
 837             'id': stream_id,
 838             'display_id': channel_name,
 839             'title': self._live_title(title),
 840             'description': description,
 841             'thumbnail': thumbnail,
 842             'uploader': uploader,
 843             'uploader_id': channel_name,
 844             'timestamp': timestamp,
 845             'view_count': view_count,
 846             'formats': formats,
 847             'is_live': stream_type == 'live',
 848         }
 849
 850
 851 class TwitchClipsIE(TwitchBaseIE):
 852     IE_NAME = 'twitch:clips'
 853     _VALID_URL = r'''(?x)
 854                     https?://
 855                         (?:
 856                             clips\.twitch\.tv/(?:embed\?.*?\bclip=|(?:[^/]+/)*)|
 857                             (?:(?:www|go|m)\.)?twitch\.tv/[^/]+/clip/
 858                         )
 859                         (?P<id>[^/?#&]+)
 860                     '''
 861
 862     _TESTS = [{
 863         'url': 'https://clips.twitch.tv/FaintLightGullWholeWheat',
 864         'md5': '761769e1eafce0ffebfb4089cb3847cd',
 865         'info_dict': {
 866             'id': '42850523',
 867             'ext': 'mp4',
 868             'title': 'EA Play 2016 Live from the Novo Theatre',
 869             'thumbnail': r're:^https?://.*\.jpg',
 870             'timestamp': 1465767393,
 871             'upload_date': '20160612',
 872             'creator': 'EA',
 873             'uploader': 'stereotype_',
 874             'uploader_id': '43566419',
 875         },
 876     }, {
 877         # multiple formats
 878         'url': 'https://clips.twitch.tv/rflegendary/UninterestedBeeDAESuppy',
 879         'only_matching': True,
 880     }, {
 881         'url': 'https://www.twitch.tv/sergeynixon/clip/StormyThankfulSproutFutureMan',
 882         'only_matching': True,
 883     }, {
 884         'url': 'https://clips.twitch.tv/embed?clip=InquisitiveBreakableYogurtJebaited',
 885         'only_matching': True,
 886     }, {
 887         'url': 'https://m.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
 888         'only_matching': True,
 889     }, {
 890         'url': 'https://go.twitch.tv/rossbroadcast/clip/ConfidentBraveHumanChefFrank',
 891         'only_matching': True,
 892     }]
 893
 894     def _real_extract(self, url):
 895         video_id = self._match_id(url)
 896
 897         clip = self._download_gql(
 898             video_id, [{
 899                 'operationName': 'VideoAccessToken_Clip',
 900                 'variables': {
 901                     'slug': video_id,
 902                 },
 903             }],
 904             'Downloading clip access token GraphQL')[0]['data']['clip']
 905
 906         if not clip:
 907             raise ExtractorError(
 908                 'This clip is no longer available', expected=True)
 909
 910         access_query = {
 911             'sig': clip['playbackAccessToken']['signature'],
 912             'token': clip['playbackAccessToken']['value'],
 913         }
 914
 915         data = self._download_base_gql(
 916             video_id, {
 917                 'query': '''{
 918   clip(slug: "%s") {
 919     broadcaster {
 920       displayName
 921     }
 922     createdAt
 923     curator {
 924       displayName
 925       id
 926     }
 927     durationSeconds
 928     id
 929     tiny: thumbnailURL(width: 86, height: 45)
 930     small: thumbnailURL(width: 260, height: 147)
 931     medium: thumbnailURL(width: 480, height: 272)
 932     title
 933     videoQualities {
 934       frameRate
 935       quality
 936       sourceURL
 937     }
 938     viewCount
 939   }
 940 }''' % video_id}, 'Downloading clip GraphQL', fatal=False)
 941
 942         if data:
 943             clip = try_get(data, lambda x: x['data']['clip'], dict) or clip
 944
 945         formats = []
 946         for option in clip.get('videoQualities', []):
 947             if not isinstance(option, dict):
 948                 continue
 949             source = url_or_none(option.get('sourceURL'))
 950             if not source:
 951                 continue
 952             formats.append({
 953                 'url': update_url_query(source, access_query),
 954                 'format_id': option.get('quality'),
 955                 'height': int_or_none(option.get('quality')),
 956                 'fps': int_or_none(option.get('frameRate')),
 957             })
 958         self._sort_formats(formats)
 959
 960         thumbnails = []
 961         for thumbnail_id in ('tiny', 'small', 'medium'):
 962             thumbnail_url = clip.get(thumbnail_id)
 963             if not thumbnail_url:
 964                 continue
 965             thumb = {
 966                 'id': thumbnail_id,
 967                 'url': thumbnail_url,
 968             }
 969             mobj = re.search(r'-(\d+)x(\d+)\.', thumbnail_url)
 970             if mobj:
 971                 thumb.update({
 972                     'height': int(mobj.group(2)),
 973                     'width': int(mobj.group(1)),
 974                 })
 975             thumbnails.append(thumb)
 976
 977         return {
 978             'id': clip.get('id') or video_id,
 979             'title': clip.get('title') or video_id,
 980             'formats': formats,
 981             'duration': int_or_none(clip.get('durationSeconds')),
 982             'views': int_or_none(clip.get('viewCount')),
 983             'timestamp': unified_timestamp(clip.get('createdAt')),
 984             'thumbnails': thumbnails,
 985             'creator': try_get(clip, lambda x: x['broadcaster']['displayName'], compat_str),
 986             'uploader': try_get(clip, lambda x: x['curator']['displayName'], compat_str),
 987             'uploader_id': try_get(clip, lambda x: x['curator']['id'], compat_str),
 988         }