yt_dlp/extractor/twitter.py

   1 import json
   2 import re
   3 import urllib.error
   4
   5 from .common import InfoExtractor
   6 from .periscope import PeriscopeBaseIE, PeriscopeIE
   7 from ..compat import functools  # isort: split
   8 from ..compat import (
   9     compat_parse_qs,
  10     compat_urllib_parse_unquote,
  11     compat_urllib_parse_urlparse,
  12 )
  13 from ..utils import (
  14     ExtractorError,
  15     dict_get,
  16     float_or_none,
  17     format_field,
  18     int_or_none,
  19     make_archive_id,
  20     str_or_none,
  21     strip_or_none,
  22     traverse_obj,
  23     try_call,
  24     try_get,
  25     unified_timestamp,
  26     update_url_query,
  27     url_or_none,
  28     xpath_text,
  29 )
  30
  31
  32 class TwitterBaseIE(InfoExtractor):
  33     _API_BASE = 'https://api.twitter.com/1.1/'
  34     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
  35     _TOKENS = {
  36         'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
  37         'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
  38     }
  39     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  40
  41     def _extract_variant_formats(self, variant, video_id):
  42         variant_url = variant.get('url')
  43         if not variant_url:
  44             return [], {}
  45         elif '.m3u8' in variant_url:
  46             return self._extract_m3u8_formats_and_subtitles(
  47                 variant_url, video_id, 'mp4', 'm3u8_native',
  48                 m3u8_id='hls', fatal=False)
  49         else:
  50             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
  51             f = {
  52                 'url': variant_url,
  53                 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
  54                 'tbr': tbr,
  55             }
  56             self._search_dimensions_in_video_url(f, variant_url)
  57             return [f], {}
  58
  59     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
  60         vmap_url = url_or_none(vmap_url)
  61         if not vmap_url:
  62             return [], {}
  63         vmap_data = self._download_xml(vmap_url, video_id)
  64         formats = []
  65         subtitles = {}
  66         urls = []
  67         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
  68             video_variant.attrib['url'] = compat_urllib_parse_unquote(
  69                 video_variant.attrib['url'])
  70             urls.append(video_variant.attrib['url'])
  71             fmts, subs = self._extract_variant_formats(
  72                 video_variant.attrib, video_id)
  73             formats.extend(fmts)
  74             subtitles = self._merge_subtitles(subtitles, subs)
  75         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
  76         if video_url not in urls:
  77             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
  78             formats.extend(fmts)
  79             subtitles = self._merge_subtitles(subtitles, subs)
  80         return formats, subtitles
  81
  82     @staticmethod
  83     def _search_dimensions_in_video_url(a_format, video_url):
  84         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
  85         if m:
  86             a_format.update({
  87                 'width': int(m.group('width')),
  88                 'height': int(m.group('height')),
  89             })
  90
  91     @functools.cached_property
  92     def is_logged_in(self):
  93         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
  94
  95     def _call_api(self, path, video_id, query={}, graphql=False):
  96         cookies = self._get_cookies(self._API_BASE)
  97         headers = {}
  98
  99         csrf_cookie = cookies.get('ct0')
 100         if csrf_cookie:
 101             headers['x-csrf-token'] = csrf_cookie.value
 102
 103         if self.is_logged_in:
 104             headers.update({
 105                 'x-twitter-auth-type': 'OAuth2Session',
 106                 'x-twitter-client-language': 'en',
 107                 'x-twitter-active-user': 'yes',
 108             })
 109
 110         last_error = None
 111         for bearer_token in self._TOKENS:
 112             for first_attempt in (True, False):
 113                 headers['Authorization'] = f'Bearer {bearer_token}'
 114
 115                 if not self.is_logged_in:
 116                     if not self._TOKENS[bearer_token]:
 117                         headers.pop('x-guest-token', None)
 118                         guest_token_response = self._download_json(
 119                             self._API_BASE + 'guest/activate.json', video_id,
 120                             'Downloading guest token', data=b'', headers=headers)
 121
 122                         self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
 123                         if not self._TOKENS[bearer_token]:
 124                             raise ExtractorError('Could not retrieve guest token')
 125
 126                     headers['x-guest-token'] = self._TOKENS[bearer_token]
 127
 128                 try:
 129                     allowed_status = {400, 403, 404} if graphql else {403}
 130                     result = self._download_json(
 131                         (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
 132                         video_id, headers=headers, query=query, expected_status=allowed_status)
 133
 134                 except ExtractorError as e:
 135                     if last_error:
 136                         raise last_error
 137
 138                     if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
 139                         raise
 140
 141                     last_error = e
 142                     self.report_warning(
 143                         'Twitter API gave 404 response, retrying with deprecated auth token. '
 144                         'Only one media item can be extracted')
 145                     break  # continue outer loop with next bearer_token
 146
 147                 if result.get('errors'):
 148                     errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
 149                     if first_attempt and any('bad guest token' in error.lower() for error in errors):
 150                         self.to_screen('Guest token has expired. Refreshing guest token')
 151                         self._TOKENS[bearer_token] = None
 152                         continue
 153
 154                     error_message = ', '.join(set(errors)) or 'Unknown error'
 155                     raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
 156
 157                 return result
 158
 159     def _build_graphql_query(self, media_id):
 160         raise NotImplementedError('Method must be implemented to support GraphQL')
 161
 162     def _call_graphql_api(self, endpoint, media_id):
 163         data = self._build_graphql_query(media_id)
 164         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 165         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 166
 167
 168 class TwitterCardIE(InfoExtractor):
 169     IE_NAME = 'twitter:card'
 170     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 171     _TESTS = [
 172         {
 173             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 174             # MD5 checksums are different in different places
 175             'info_dict': {
 176                 'id': '560070131976392705',
 177                 'ext': 'mp4',
 178                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 179                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 180                 'uploader': 'Twitter',
 181                 'uploader_id': 'Twitter',
 182                 'thumbnail': r're:^https?://.*\.jpg',
 183                 'duration': 30.033,
 184                 'timestamp': 1422366112,
 185                 'upload_date': '20150127',
 186                 'age_limit': 0,
 187                 'comment_count': int,
 188                 'tags': [],
 189                 'repost_count': int,
 190                 'like_count': int,
 191                 'display_id': '560070183650213889',
 192                 'uploader_url': 'https://twitter.com/Twitter',
 193             },
 194         },
 195         {
 196             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 197             'md5': '7137eca597f72b9abbe61e5ae0161399',
 198             'info_dict': {
 199                 'id': '623160978427936768',
 200                 'ext': 'mp4',
 201                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 202                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 203                 'uploader': 'NASA',
 204                 'uploader_id': 'NASA',
 205                 'timestamp': 1437408129,
 206                 'upload_date': '20150720',
 207                 'uploader_url': 'https://twitter.com/NASA',
 208                 'age_limit': 0,
 209                 'comment_count': int,
 210                 'like_count': int,
 211                 'repost_count': int,
 212                 'tags': ['PlutoFlyby'],
 213             },
 214             'params': {'format': '[protocol=https]'}
 215         },
 216         {
 217             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 218             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 219             'info_dict': {
 220                 'id': 'dq4Oj5quskI',
 221                 'ext': 'mp4',
 222                 'title': 'Ubuntu 11.10 Overview',
 223                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 224                 'upload_date': '20111013',
 225                 'uploader': 'OMG! UBUNTU!',
 226                 'uploader_id': 'omgubuntu',
 227                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 228                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 229                 'channel_follower_count': int,
 230                 'chapters': 'count:8',
 231                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 232                 'duration': 138,
 233                 'categories': ['Film & Animation'],
 234                 'age_limit': 0,
 235                 'comment_count': int,
 236                 'availability': 'public',
 237                 'like_count': int,
 238                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 239                 'view_count': int,
 240                 'tags': 'count:12',
 241                 'channel': 'OMG! UBUNTU!',
 242                 'playable_in_embed': True,
 243             },
 244             'add_ie': ['Youtube'],
 245         },
 246         {
 247             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 248             'info_dict': {
 249                 'id': 'iBb2x00UVlv',
 250                 'ext': 'mp4',
 251                 'upload_date': '20151113',
 252                 'uploader_id': '1189339351084113920',
 253                 'uploader': 'ArsenalTerje',
 254                 'title': 'Vine by ArsenalTerje',
 255                 'timestamp': 1447451307,
 256                 'alt_title': 'Vine by ArsenalTerje',
 257                 'comment_count': int,
 258                 'like_count': int,
 259                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 260                 'view_count': int,
 261                 'repost_count': int,
 262             },
 263             'add_ie': ['Vine'],
 264             'params': {'skip_download': 'm3u8'},
 265         },
 266         {
 267             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 268             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 269             'info_dict': {
 270                 'id': '705235433198714880',
 271                 'ext': 'mp4',
 272                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 273                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 274                 'uploader': 'Brent Yarina',
 275                 'uploader_id': 'BTNBrentYarina',
 276                 'timestamp': 1456976204,
 277                 'upload_date': '20160303',
 278             },
 279             'skip': 'This content is no longer available.',
 280         },
 281         {
 282             'url': 'https://twitter.com/i/videos/752274308186120192',
 283             'only_matching': True,
 284         },
 285     ]
 286
 287     def _real_extract(self, url):
 288         status_id = self._match_id(url)
 289         return self.url_result(
 290             'https://twitter.com/statuses/' + status_id,
 291             TwitterIE.ie_key(), status_id)
 292
 293
 294 class TwitterIE(TwitterBaseIE):
 295     IE_NAME = 'twitter'
 296     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/video/(?P<index>\d+))?'
 297
 298     _TESTS = [{
 299         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 300         'info_dict': {
 301             'id': '643211870443208704',
 302             'display_id': '643211948184596480',
 303             'ext': 'mp4',
 304             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 305             'thumbnail': r're:^https?://.*\.jpg',
 306             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 307             'uploader': 'FREE THE NIPPLE',
 308             'uploader_id': 'freethenipple',
 309             'duration': 12.922,
 310             'timestamp': 1442188653,
 311             'upload_date': '20150913',
 312             'uploader_url': 'https://twitter.com/freethenipple',
 313             'comment_count': int,
 314             'repost_count': int,
 315             'like_count': int,
 316             'tags': [],
 317             'age_limit': 18,
 318         },
 319     }, {
 320         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 321         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 322         'info_dict': {
 323             'id': '657991469417025536',
 324             'ext': 'mp4',
 325             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 326             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 327             'thumbnail': r're:^https?://.*\.png',
 328             'uploader': 'Gifs',
 329             'uploader_id': 'giphz',
 330         },
 331         'expected_warnings': ['height', 'width'],
 332         'skip': 'Account suspended',
 333     }, {
 334         'url': 'https://twitter.com/starwars/status/665052190608723968',
 335         'info_dict': {
 336             'id': '665052190608723968',
 337             'display_id': '665052190608723968',
 338             'ext': 'mp4',
 339             'title': 'md5:e99588f17b3dd0503814ffb560e64731',
 340             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 341             'uploader_id': 'starwars',
 342             'uploader': r're:Star Wars.*',
 343             'timestamp': 1447395772,
 344             'upload_date': '20151113',
 345             'uploader_url': 'https://twitter.com/starwars',
 346             'comment_count': int,
 347             'repost_count': int,
 348             'like_count': int,
 349             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 350             'age_limit': 0,
 351         },
 352     }, {
 353         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 354         'info_dict': {
 355             'id': '705235433198714880',
 356             'ext': 'mp4',
 357             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 358             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 359             'uploader_id': 'BTNBrentYarina',
 360             'uploader': 'Brent Yarina',
 361             'timestamp': 1456976204,
 362             'upload_date': '20160303',
 363             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 364             'comment_count': int,
 365             'repost_count': int,
 366             'like_count': int,
 367             'tags': [],
 368             'age_limit': 0,
 369         },
 370         'params': {
 371             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 372             # Test case of TwitterCardIE
 373             'skip_download': True,
 374         },
 375         'skip': 'Dead external link',
 376     }, {
 377         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 378         'info_dict': {
 379             'id': '700207414000242688',
 380             'display_id': '700207533655363584',
 381             'ext': 'mp4',
 382             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 383             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 384             'thumbnail': r're:^https?://.*\.jpg',
 385             'uploader': 'jaydin donte geer',
 386             'uploader_id': 'jaydingeer',
 387             'duration': 30.0,
 388             'timestamp': 1455777459,
 389             'upload_date': '20160218',
 390             'uploader_url': 'https://twitter.com/jaydingeer',
 391             'comment_count': int,
 392             'repost_count': int,
 393             'like_count': int,
 394             'tags': ['Damndaniel'],
 395             'age_limit': 0,
 396         },
 397     }, {
 398         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 399         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 400         'info_dict': {
 401             'id': 'MIOxnrUteUd',
 402             'ext': 'mp4',
 403             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 404             'uploader': 'TAKUMA',
 405             'uploader_id': '1004126642786242560',
 406             'timestamp': 1402826626,
 407             'upload_date': '20140615',
 408             'thumbnail': r're:^https?://.*\.jpg',
 409             'alt_title': 'Vine by TAKUMA',
 410             'comment_count': int,
 411             'repost_count': int,
 412             'like_count': int,
 413             'view_count': int,
 414         },
 415         'add_ie': ['Vine'],
 416     }, {
 417         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 418         'info_dict': {
 419             'id': '717462543795523584',
 420             'display_id': '719944021058060289',
 421             'ext': 'mp4',
 422             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 423             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 424             'uploader_id': 'CaptainAmerica',
 425             'uploader': 'Captain America',
 426             'duration': 3.17,
 427             'timestamp': 1460483005,
 428             'upload_date': '20160412',
 429             'uploader_url': 'https://twitter.com/CaptainAmerica',
 430             'thumbnail': r're:^https?://.*\.jpg',
 431             'comment_count': int,
 432             'repost_count': int,
 433             'like_count': int,
 434             'tags': [],
 435             'age_limit': 0,
 436         },
 437     }, {
 438         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 439         'info_dict': {
 440             'id': '1zqKVVlkqLaKB',
 441             'ext': 'mp4',
 442             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 443             'upload_date': '20160923',
 444             'uploader_id': '1PmKqpJdOJQoY',
 445             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 446             'timestamp': 1474613214,
 447             'thumbnail': r're:^https?://.*\.jpg',
 448         },
 449         'add_ie': ['Periscope'],
 450     }, {
 451         # has mp4 formats via mobile API
 452         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 453         'info_dict': {
 454             'id': '852138619213144067',
 455             'ext': 'mp4',
 456             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 457             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 458             'uploader': 'عالم الأخبار',
 459             'uploader_id': 'news_al3alm',
 460             'duration': 277.4,
 461             'timestamp': 1492000653,
 462             'upload_date': '20170412',
 463         },
 464         'skip': 'Account suspended',
 465     }, {
 466         'url': 'https://twitter.com/i/web/status/910031516746514432',
 467         'info_dict': {
 468             'id': '910030238373089285',
 469             'display_id': '910031516746514432',
 470             'ext': 'mp4',
 471             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 472             'thumbnail': r're:^https?://.*\.jpg',
 473             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 474             'uploader': 'Préfet de Guadeloupe',
 475             'uploader_id': 'Prefet971',
 476             'duration': 47.48,
 477             'timestamp': 1505803395,
 478             'upload_date': '20170919',
 479             'uploader_url': 'https://twitter.com/Prefet971',
 480             'comment_count': int,
 481             'repost_count': int,
 482             'like_count': int,
 483             'tags': ['Maria'],
 484             'age_limit': 0,
 485         },
 486         'params': {
 487             'skip_download': True,  # requires ffmpeg
 488         },
 489     }, {
 490         # card via api.twitter.com/1.1/videos/tweet/config
 491         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 492         'info_dict': {
 493             'id': '1001551417340022785',
 494             'display_id': '1001551623938805763',
 495             'ext': 'mp4',
 496             'title': 're:.*?Shep is on a roll today.*?',
 497             'thumbnail': r're:^https?://.*\.jpg',
 498             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 499             'uploader': 'Lis Power',
 500             'uploader_id': 'LisPower1',
 501             'duration': 111.278,
 502             'timestamp': 1527623489,
 503             'upload_date': '20180529',
 504             'uploader_url': 'https://twitter.com/LisPower1',
 505             'comment_count': int,
 506             'repost_count': int,
 507             'like_count': int,
 508             'tags': [],
 509             'age_limit': 0,
 510         },
 511         'params': {
 512             'skip_download': True,  # requires ffmpeg
 513         },
 514     }, {
 515         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 516         'info_dict': {
 517             'id': '1087791272830607360',
 518             'display_id': '1087791357756956680',
 519             'ext': 'mp4',
 520             'title': 'Twitter - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 521             'thumbnail': r're:^https?://.*\.jpg',
 522             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 523             'uploader': 'Twitter',
 524             'uploader_id': 'Twitter',
 525             'duration': 61.567,
 526             'timestamp': 1548184644,
 527             'upload_date': '20190122',
 528             'uploader_url': 'https://twitter.com/Twitter',
 529             'comment_count': int,
 530             'repost_count': int,
 531             'like_count': int,
 532             'tags': [],
 533             'age_limit': 0,
 534         },
 535     }, {
 536         # not available in Periscope
 537         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 538         'info_dict': {
 539             'id': '1vOGwqejwoWxB',
 540             'ext': 'mp4',
 541             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 542             'uploader': 'Vivi',
 543             'uploader_id': '1eVjYOLGkGrQL',
 544             'thumbnail': r're:^https?://.*\.jpg',
 545             'tags': ['EduTECH2019'],
 546             'view_count': int,
 547         },
 548         'add_ie': ['TwitterBroadcast'],
 549     }, {
 550         # unified card
 551         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 552         'info_dict': {
 553             'id': '1349774757969989634',
 554             'display_id': '1349794411333394432',
 555             'ext': 'mp4',
 556             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 557             'thumbnail': r're:^https?://.*\.jpg',
 558             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 559             'uploader': 'Brooklyn Nets',
 560             'uploader_id': 'BrooklynNets',
 561             'duration': 324.484,
 562             'timestamp': 1610651040,
 563             'upload_date': '20210114',
 564             'uploader_url': 'https://twitter.com/BrooklynNets',
 565             'comment_count': int,
 566             'repost_count': int,
 567             'like_count': int,
 568             'tags': [],
 569             'age_limit': 0,
 570         },
 571         'params': {
 572             'skip_download': True,
 573         },
 574     }, {
 575         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 576         'info_dict': {
 577             'id': '1577855447914409984',
 578             'display_id': '1577855540407197696',
 579             'ext': 'mp4',
 580             'title': 'md5:9d198efb93557b8f8d5b78c480407214',
 581             'description': 'md5:b9c3699335447391d11753ab21c70a74',
 582             'upload_date': '20221006',
 583             'uploader': 'oshtru',
 584             'uploader_id': 'oshtru',
 585             'uploader_url': 'https://twitter.com/oshtru',
 586             'thumbnail': r're:^https?://.*\.jpg',
 587             'duration': 30.03,
 588             'timestamp': 1665025050,
 589             'comment_count': int,
 590             'repost_count': int,
 591             'like_count': int,
 592             'tags': [],
 593             'age_limit': 0,
 594         },
 595         'params': {'skip_download': True},
 596     }, {
 597         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 598         'info_dict': {
 599             'id': '1577719286659006464',
 600             'title': 'Ultima | #\u0432\u029f\u043c - Test',
 601             'description': 'Test https://t.co/Y3KEZD7Dad',
 602             'uploader': 'Ultima | #\u0432\u029f\u043c',
 603             'uploader_id': 'UltimaShadowX',
 604             'uploader_url': 'https://twitter.com/UltimaShadowX',
 605             'upload_date': '20221005',
 606             'timestamp': 1664992565,
 607             'comment_count': int,
 608             'repost_count': int,
 609             'like_count': int,
 610             'tags': [],
 611             'age_limit': 0,
 612         },
 613         'playlist_count': 4,
 614         'params': {'skip_download': True},
 615     }, {
 616         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 617         'info_dict': {
 618             'id': '1575559336759263233',
 619             'display_id': '1575560063510810624',
 620             'ext': 'mp4',
 621             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 622             'thumbnail': r're:^https?://.*\.jpg',
 623             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 624             'uploader': 'Max Olson',
 625             'uploader_id': 'MesoMax919',
 626             'uploader_url': 'https://twitter.com/MesoMax919',
 627             'duration': 21.321,
 628             'timestamp': 1664477766,
 629             'upload_date': '20220929',
 630             'comment_count': int,
 631             'repost_count': int,
 632             'like_count': int,
 633             'tags': ['HurricaneIan'],
 634             'age_limit': 0,
 635         },
 636     }, {
 637         # Adult content, uses old token
 638         # Fails if not logged in (GraphQL)
 639         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 640         'info_dict': {
 641             'id': '1575199163847000068',
 642             'display_id': '1575199173472927762',
 643             'ext': 'mp4',
 644             'title': str,
 645             'description': str,
 646             'uploader': str,
 647             'uploader_id': 'Rizdraws',
 648             'uploader_url': 'https://twitter.com/Rizdraws',
 649             'upload_date': '20220928',
 650             'timestamp': 1664391723,
 651             'thumbnail': r're:^https?://.+\.jpg',
 652             'like_count': int,
 653             'repost_count': int,
 654             'comment_count': int,
 655             'age_limit': 18,
 656             'tags': []
 657         },
 658         'expected_warnings': ['404'],
 659     }, {
 660         # Description is missing one https://t.co url (GraphQL)
 661         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 662         'playlist_mincount': 2,
 663         'info_dict': {
 664             'id': '1395079556562706435',
 665             'title': str,
 666             'tags': [],
 667             'uploader': str,
 668             'like_count': int,
 669             'upload_date': '20210519',
 670             'age_limit': 0,
 671             'repost_count': int,
 672             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
 673             'uploader_id': 'Srirachachau',
 674             'comment_count': int,
 675             'uploader_url': 'https://twitter.com/Srirachachau',
 676             'timestamp': 1621447860,
 677         },
 678     }, {
 679         # Description is missing one https://t.co url (GraphQL)
 680         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 681         'playlist_mincount': 2,
 682         'info_dict': {
 683             'id': '1578353380363501568',
 684             'title': str,
 685             'uploader_id': 'DavidToons_',
 686             'repost_count': int,
 687             'like_count': int,
 688             'uploader': str,
 689             'timestamp': 1665143744,
 690             'uploader_url': 'https://twitter.com/DavidToons_',
 691             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
 692             'tags': [],
 693             'comment_count': int,
 694             'upload_date': '20221007',
 695             'age_limit': 0,
 696         },
 697     }, {
 698         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 699         'playlist_count': 2,
 700         'info_dict': {
 701             'id': '1578401165338976258',
 702             'title': str,
 703             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 704             'uploader': str,
 705             'uploader_id': 'primevideouk',
 706             'timestamp': 1665155137,
 707             'upload_date': '20221007',
 708             'age_limit': 0,
 709             'uploader_url': 'https://twitter.com/primevideouk',
 710             'comment_count': int,
 711             'repost_count': int,
 712             'like_count': int,
 713             'tags': ['TheRingsOfPower'],
 714         },
 715     }, {
 716         # Twitter Spaces
 717         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 718         'info_dict': {
 719             'id': '1lPJqmBeeNAJb',
 720             'ext': 'm4a',
 721             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 722             'uploader': r're:Monique Camarra.+?',
 723             'uploader_id': 'MoniqueCamarra',
 724             'live_status': 'was_live',
 725             'description': 'md5:acce559345fd49f129c20dbcda3f1201',
 726             'timestamp': 1658407771464,
 727         },
 728         'add_ie': ['TwitterSpaces'],
 729         'params': {'skip_download': 'm3u8'},
 730     }, {
 731         # URL specifies video number but --yes-playlist
 732         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
 733         'playlist_mincount': 2,
 734         'info_dict': {
 735             'id': '1600649710662213632',
 736             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 737             'timestamp': 1670459604.0,
 738             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 739             'comment_count': int,
 740             'uploader_id': 'CTVJLaidlaw',
 741             'repost_count': int,
 742             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 743             'upload_date': '20221208',
 744             'age_limit': 0,
 745             'uploader': 'Jocelyn Laidlaw',
 746             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 747             'like_count': int,
 748         },
 749     }, {
 750         # URL specifies video number and --no-playlist
 751         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
 752         'info_dict': {
 753             'id': '1600649511827013632',
 754             'ext': 'mp4',
 755             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 756             'thumbnail': r're:^https?://.+\.jpg',
 757             'timestamp': 1670459604.0,
 758             'uploader_id': 'CTVJLaidlaw',
 759             'uploader': 'Jocelyn Laidlaw',
 760             'repost_count': int,
 761             'comment_count': int,
 762             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 763             'duration': 102.226,
 764             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 765             'display_id': '1600649710662213632',
 766             'like_count': int,
 767             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 768             'upload_date': '20221208',
 769             'age_limit': 0,
 770         },
 771         'params': {'noplaylist': True},
 772     }, {
 773         # onion route
 774         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
 775         'only_matching': True,
 776     }, {
 777         # Twitch Clip Embed
 778         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 779         'only_matching': True,
 780     }, {
 781         # promo_video_website card
 782         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 783         'only_matching': True,
 784     }, {
 785         # promo_video_convo card
 786         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
 787         'only_matching': True,
 788     }, {
 789         # appplayer card
 790         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
 791         'only_matching': True,
 792     }, {
 793         # video_direct_message card
 794         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
 795         'only_matching': True,
 796     }, {
 797         # poll2choice_video card
 798         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
 799         'only_matching': True,
 800     }, {
 801         # poll3choice_video card
 802         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
 803         'only_matching': True,
 804     }, {
 805         # poll4choice_video card
 806         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
 807         'only_matching': True,
 808     }]
 809
 810     def _graphql_to_legacy(self, data, twid):
 811         result = traverse_obj(data, (
 812             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
 813             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
 814             'tweet_results', 'result'
 815         ), expected_type=dict, default={}, get_all=False)
 816
 817         if 'tombstone' in result:
 818             cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
 819             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
 820
 821         status = result.get('legacy', {})
 822         status.update(traverse_obj(result, {
 823             'user': ('core', 'user_results', 'result', 'legacy'),
 824             'card': ('card', 'legacy'),
 825             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
 826         }, expected_type=dict, default={}))
 827
 828         # extra transformation is needed since result does not match legacy format
 829         binding_values = {
 830             binding_value.get('key'): binding_value.get('value')
 831             for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
 832         }
 833         if binding_values:
 834             status['card']['binding_values'] = binding_values
 835
 836         return status
 837
 838     def _build_graphql_query(self, media_id):
 839         return {
 840             'variables': {
 841                 'focalTweetId': media_id,
 842                 'includePromotedContent': True,
 843                 'with_rux_injections': False,
 844                 'withBirdwatchNotes': True,
 845                 'withCommunity': True,
 846                 'withDownvotePerspective': False,
 847                 'withQuickPromoteEligibilityTweetFields': True,
 848                 'withReactionsMetadata': False,
 849                 'withReactionsPerspective': False,
 850                 'withSuperFollowsTweetFields': True,
 851                 'withSuperFollowsUserFields': True,
 852                 'withV2Timeline': True,
 853                 'withVoice': True,
 854             },
 855             'features': {
 856                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
 857                 'interactive_text_enabled': True,
 858                 'responsive_web_edit_tweet_api_enabled': True,
 859                 'responsive_web_enhance_cards_enabled': True,
 860                 'responsive_web_graphql_timeline_navigation_enabled': False,
 861                 'responsive_web_text_conversations_enabled': False,
 862                 'responsive_web_uc_gql_enabled': True,
 863                 'standardized_nudges_misinfo': True,
 864                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
 865                 'tweetypie_unmention_optimization_enabled': True,
 866                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
 867                 'verified_phone_label_enabled': False,
 868                 'vibe_api_enabled': True,
 869             },
 870         }
 871
 872     def _real_extract(self, url):
 873         twid, selected_index = self._match_valid_url(url).group('id', 'index')
 874         if self.is_logged_in or self._configuration_arg('force_graphql'):
 875             self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
 876             result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
 877             status = self._graphql_to_legacy(result, twid)
 878
 879         else:
 880             status = self._call_api(f'statuses/show/{twid}.json', twid, {
 881                 'cards_platform': 'Web-12',
 882                 'include_cards': 1,
 883                 'include_reply_count': 1,
 884                 'include_user_entities': 0,
 885                 'tweet_mode': 'extended',
 886             })
 887
 888         title = description = status['full_text'].replace('\n', ' ')
 889         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
 890         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
 891         user = status.get('user') or {}
 892         uploader = user.get('name')
 893         if uploader:
 894             title = f'{uploader} - {title}'
 895         uploader_id = user.get('screen_name')
 896
 897         tags = []
 898         for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
 899             hashtag_text = hashtag.get('text')
 900             if not hashtag_text:
 901                 continue
 902             tags.append(hashtag_text)
 903
 904         info = {
 905             'id': twid,
 906             'title': title,
 907             'description': description,
 908             'uploader': uploader,
 909             'timestamp': unified_timestamp(status.get('created_at')),
 910             'uploader_id': uploader_id,
 911             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
 912             'like_count': int_or_none(status.get('favorite_count')),
 913             'repost_count': int_or_none(status.get('retweet_count')),
 914             'comment_count': int_or_none(status.get('reply_count')),
 915             'age_limit': 18 if status.get('possibly_sensitive') else 0,
 916             'tags': tags,
 917         }
 918
 919         def extract_from_video_info(media):
 920             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
 921             self.write_debug(f'Extracting from video info: {media_id}')
 922             video_info = media.get('video_info') or {}
 923
 924             formats = []
 925             subtitles = {}
 926             for variant in video_info.get('variants', []):
 927                 fmts, subs = self._extract_variant_formats(variant, twid)
 928                 subtitles = self._merge_subtitles(subtitles, subs)
 929                 formats.extend(fmts)
 930
 931             thumbnails = []
 932             media_url = media.get('media_url_https') or media.get('media_url')
 933             if media_url:
 934                 def add_thumbnail(name, size):
 935                     thumbnails.append({
 936                         'id': name,
 937                         'url': update_url_query(media_url, {'name': name}),
 938                         'width': int_or_none(size.get('w') or size.get('width')),
 939                         'height': int_or_none(size.get('h') or size.get('height')),
 940                     })
 941                 for name, size in media.get('sizes', {}).items():
 942                     add_thumbnail(name, size)
 943                 add_thumbnail('orig', media.get('original_info') or {})
 944
 945             return {
 946                 'id': media_id,
 947                 'formats': formats,
 948                 'subtitles': subtitles,
 949                 'thumbnails': thumbnails,
 950                 'duration': float_or_none(video_info.get('duration_millis'), 1000),
 951                 # The codec of http formats are unknown
 952                 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
 953             }
 954
 955         def extract_from_card_info(card):
 956             if not card:
 957                 return
 958
 959             self.write_debug(f'Extracting from card info: {card.get("url")}')
 960             binding_values = card['binding_values']
 961
 962             def get_binding_value(k):
 963                 o = binding_values.get(k) or {}
 964                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
 965
 966             card_name = card['name'].split(':')[-1]
 967             if card_name == 'player':
 968                 yield {
 969                     '_type': 'url',
 970                     'url': get_binding_value('player_url'),
 971                 }
 972             elif card_name == 'periscope_broadcast':
 973                 yield {
 974                     '_type': 'url',
 975                     'url': get_binding_value('url') or get_binding_value('player_url'),
 976                     'ie_key': PeriscopeIE.ie_key(),
 977                 }
 978             elif card_name == 'broadcast':
 979                 yield {
 980                     '_type': 'url',
 981                     'url': get_binding_value('broadcast_url'),
 982                     'ie_key': TwitterBroadcastIE.ie_key(),
 983                 }
 984             elif card_name == 'audiospace':
 985                 yield {
 986                     '_type': 'url',
 987                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
 988                     'ie_key': TwitterSpacesIE.ie_key(),
 989                 }
 990             elif card_name == 'summary':
 991                 yield {
 992                     '_type': 'url',
 993                     'url': get_binding_value('card_url'),
 994                 }
 995             elif card_name == 'unified_card':
 996                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
 997                 yield from map(extract_from_video_info, traverse_obj(
 998                     unified_card, ('media_entities', ...), expected_type=dict))
 999             # amplify, promo_video_website, promo_video_convo, appplayer,
1000             # video_direct_message, poll2choice_video, poll3choice_video,
1001             # poll4choice_video, ...
1002             else:
1003                 is_amplify = card_name == 'amplify'
1004                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1005                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1006                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1007
1008                 thumbnails = []
1009                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1010                     image = get_binding_value('player_image' + suffix) or {}
1011                     image_url = image.get('url')
1012                     if not image_url or '/player-placeholder' in image_url:
1013                         continue
1014                     thumbnails.append({
1015                         'id': suffix[1:] if suffix else 'medium',
1016                         'url': image_url,
1017                         'width': int_or_none(image.get('width')),
1018                         'height': int_or_none(image.get('height')),
1019                     })
1020
1021                 yield {
1022                     'formats': formats,
1023                     'subtitles': subtitles,
1024                     'thumbnails': thumbnails,
1025                     'duration': int_or_none(get_binding_value(
1026                         'content_duration_seconds')),
1027                 }
1028
1029         media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
1030         videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
1031         cards = extract_from_card_info(status.get('card'))
1032         entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)]
1033
1034         if not entries:
1035             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1036             if not expanded_url or expanded_url == url:
1037                 raise ExtractorError('No video could be found in this tweet', expected=True)
1038
1039             return self.url_result(expanded_url, display_id=twid, **info)
1040
1041         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1042
1043         if not self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1044             index = int(selected_index) - 1
1045             if index >= len(entries):
1046                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1047
1048             return entries[index]
1049
1050         if len(entries) == 1:
1051             return entries[0]
1052
1053         for index, entry in enumerate(entries, 1):
1054             entry['title'] += f' #{index}'
1055
1056         return self.playlist_result(entries, **info)
1057
1058
1059 class TwitterAmplifyIE(TwitterBaseIE):
1060     IE_NAME = 'twitter:amplify'
1061     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1062
1063     _TEST = {
1064         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1065         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1066         'info_dict': {
1067             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1068             'ext': 'mp4',
1069             'title': 'Twitter Video',
1070             'thumbnail': 're:^https?://.*',
1071         },
1072         'params': {'format': '[protocol=https]'},
1073     }
1074
1075     def _real_extract(self, url):
1076         video_id = self._match_id(url)
1077         webpage = self._download_webpage(url, video_id)
1078
1079         vmap_url = self._html_search_meta(
1080             'twitter:amplify:vmap', webpage, 'vmap url')
1081         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1082
1083         thumbnails = []
1084         thumbnail = self._html_search_meta(
1085             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1086
1087         def _find_dimension(target):
1088             w = int_or_none(self._html_search_meta(
1089                 'twitter:%s:width' % target, webpage, fatal=False))
1090             h = int_or_none(self._html_search_meta(
1091                 'twitter:%s:height' % target, webpage, fatal=False))
1092             return w, h
1093
1094         if thumbnail:
1095             thumbnail_w, thumbnail_h = _find_dimension('image')
1096             thumbnails.append({
1097                 'url': thumbnail,
1098                 'width': thumbnail_w,
1099                 'height': thumbnail_h,
1100             })
1101
1102         video_w, video_h = _find_dimension('player')
1103         formats[0].update({
1104             'width': video_w,
1105             'height': video_h,
1106         })
1107
1108         return {
1109             'id': video_id,
1110             'title': 'Twitter Video',
1111             'formats': formats,
1112             'thumbnails': thumbnails,
1113         }
1114
1115
1116 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1117     IE_NAME = 'twitter:broadcast'
1118     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1119
1120     _TEST = {
1121         # untitled Periscope video
1122         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1123         'info_dict': {
1124             'id': '1yNGaQLWpejGj',
1125             'ext': 'mp4',
1126             'title': 'Andrea May Sahouri - Periscope Broadcast',
1127             'uploader': 'Andrea May Sahouri',
1128             'uploader_id': '1PXEdBZWpGwKe',
1129             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1130             'view_count': int,
1131         },
1132     }
1133
1134     def _real_extract(self, url):
1135         broadcast_id = self._match_id(url)
1136         broadcast = self._call_api(
1137             'broadcasts/show.json', broadcast_id,
1138             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1139         info = self._parse_broadcast_data(broadcast, broadcast_id)
1140         media_key = broadcast['media_key']
1141         source = self._call_api(
1142             f'live_video_stream/status/{media_key}', media_key)['source']
1143         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1144         if '/live_video_stream/geoblocked/' in m3u8_url:
1145             self.raise_geo_restricted()
1146         m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1147             m3u8_url).query).get('type', [None])[0]
1148         state, width, height = self._extract_common_format_info(broadcast)
1149         info['formats'] = self._extract_pscp_m3u8_formats(
1150             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1151         return info
1152
1153
1154 class TwitterSpacesIE(TwitterBaseIE):
1155     IE_NAME = 'twitter:spaces'
1156     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1157
1158     _TESTS = [{
1159         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1160         'info_dict': {
1161             'id': '1RDxlgyvNXzJL',
1162             'ext': 'm4a',
1163             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1164             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1165             'uploader': r're:Lucio Di Gaetano.*?',
1166             'uploader_id': 'luciodigaetano',
1167             'live_status': 'was_live',
1168             'timestamp': 1659877956397,
1169         },
1170         'params': {'skip_download': 'm3u8'},
1171     }]
1172
1173     SPACE_STATUS = {
1174         'notstarted': 'is_upcoming',
1175         'ended': 'was_live',
1176         'running': 'is_live',
1177         'timedout': 'post_live',
1178     }
1179
1180     def _build_graphql_query(self, space_id):
1181         return {
1182             'variables': {
1183                 'id': space_id,
1184                 'isMetatagsQuery': True,
1185                 'withDownvotePerspective': False,
1186                 'withReactionsMetadata': False,
1187                 'withReactionsPerspective': False,
1188                 'withReplays': True,
1189                 'withSuperFollowsUserFields': True,
1190                 'withSuperFollowsTweetFields': True,
1191             },
1192             'features': {
1193                 'dont_mention_me_view_api_enabled': True,
1194                 'interactive_text_enabled': True,
1195                 'responsive_web_edit_tweet_api_enabled': True,
1196                 'responsive_web_enhance_cards_enabled': True,
1197                 'responsive_web_uc_gql_enabled': True,
1198                 'spaces_2022_h2_clipping': True,
1199                 'spaces_2022_h2_spaces_communities': False,
1200                 'standardized_nudges_misinfo': True,
1201                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1202                 'vibe_api_enabled': True,
1203             },
1204         }
1205
1206     def _real_extract(self, url):
1207         space_id = self._match_id(url)
1208         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1209         if not space_data:
1210             raise ExtractorError('Twitter Space not found', expected=True)
1211
1212         metadata = space_data['metadata']
1213         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1214
1215         formats = []
1216         if live_status == 'is_upcoming':
1217             self.raise_no_formats('Twitter Space not started yet', expected=True)
1218         elif live_status == 'post_live':
1219             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1220         else:
1221             source = self._call_api(
1222                 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1223
1224             # XXX: Native downloader does not work
1225             formats = self._extract_m3u8_formats(
1226                 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1227                 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1228                 headers={'Referer': 'https://twitter.com/'})
1229             for fmt in formats:
1230                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1231
1232         participants = ', '.join(traverse_obj(
1233             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1234         return {
1235             'id': space_id,
1236             'title': metadata.get('title'),
1237             'description': f'Twitter Space participated by {participants}',
1238             'uploader': traverse_obj(
1239                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1240             'uploader_id': traverse_obj(
1241                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1242             'live_status': live_status,
1243             'timestamp': metadata.get('created_at'),
1244             'formats': formats,
1245         }
1246
1247
1248 class TwitterShortenerIE(TwitterBaseIE):
1249     IE_NAME = 'twitter:shortener'
1250     _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1251     _BASE_URL = 'https://t.co/'
1252
1253     def _real_extract(self, url):
1254         mobj = self._match_valid_url(url)
1255         eid, id = mobj.group('eid', 'id')
1256         if eid:
1257             id = eid
1258             url = self._BASE_URL + id
1259         new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1260         __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1261         if new_url.startswith(__UNSAFE_LINK):
1262             new_url = new_url.replace(__UNSAFE_LINK, "")
1263         return self.url_result(new_url)