yt_dlp/extractor/twitter.py

   1 import json
   2 import re
   3 import urllib.error
   4
   5 from .common import InfoExtractor
   6 from .periscope import PeriscopeBaseIE, PeriscopeIE
   7 from ..compat import functools  # isort: split
   8 from ..compat import (
   9     compat_parse_qs,
  10     compat_urllib_parse_unquote,
  11     compat_urllib_parse_urlparse,
  12 )
  13 from ..utils import (
  14     ExtractorError,
  15     dict_get,
  16     float_or_none,
  17     format_field,
  18     int_or_none,
  19     make_archive_id,
  20     str_or_none,
  21     strip_or_none,
  22     traverse_obj,
  23     try_call,
  24     try_get,
  25     unified_timestamp,
  26     update_url_query,
  27     url_or_none,
  28     xpath_text,
  29 )
  30
  31
  32 class TwitterBaseIE(InfoExtractor):
  33     _API_BASE = 'https://api.twitter.com/1.1/'
  34     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
  35     _TOKENS = {
  36         'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
  37         'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
  38     }
  39     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  40
  41     def _extract_variant_formats(self, variant, video_id):
  42         variant_url = variant.get('url')
  43         if not variant_url:
  44             return [], {}
  45         elif '.m3u8' in variant_url:
  46             return self._extract_m3u8_formats_and_subtitles(
  47                 variant_url, video_id, 'mp4', 'm3u8_native',
  48                 m3u8_id='hls', fatal=False)
  49         else:
  50             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
  51             f = {
  52                 'url': variant_url,
  53                 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
  54                 'tbr': tbr,
  55             }
  56             self._search_dimensions_in_video_url(f, variant_url)
  57             return [f], {}
  58
  59     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
  60         vmap_url = url_or_none(vmap_url)
  61         if not vmap_url:
  62             return [], {}
  63         vmap_data = self._download_xml(vmap_url, video_id)
  64         formats = []
  65         subtitles = {}
  66         urls = []
  67         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
  68             video_variant.attrib['url'] = compat_urllib_parse_unquote(
  69                 video_variant.attrib['url'])
  70             urls.append(video_variant.attrib['url'])
  71             fmts, subs = self._extract_variant_formats(
  72                 video_variant.attrib, video_id)
  73             formats.extend(fmts)
  74             subtitles = self._merge_subtitles(subtitles, subs)
  75         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
  76         if video_url not in urls:
  77             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
  78             formats.extend(fmts)
  79             subtitles = self._merge_subtitles(subtitles, subs)
  80         return formats, subtitles
  81
  82     @staticmethod
  83     def _search_dimensions_in_video_url(a_format, video_url):
  84         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
  85         if m:
  86             a_format.update({
  87                 'width': int(m.group('width')),
  88                 'height': int(m.group('height')),
  89             })
  90
  91     @functools.cached_property
  92     def is_logged_in(self):
  93         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
  94
  95     def _call_api(self, path, video_id, query={}, graphql=False):
  96         cookies = self._get_cookies(self._API_BASE)
  97         headers = {}
  98
  99         csrf_cookie = cookies.get('ct0')
 100         if csrf_cookie:
 101             headers['x-csrf-token'] = csrf_cookie.value
 102
 103         if self.is_logged_in:
 104             headers.update({
 105                 'x-twitter-auth-type': 'OAuth2Session',
 106                 'x-twitter-client-language': 'en',
 107                 'x-twitter-active-user': 'yes',
 108             })
 109
 110         result, last_error = None, None
 111         for bearer_token in self._TOKENS:
 112             headers['Authorization'] = f'Bearer {bearer_token}'
 113
 114             if not self.is_logged_in:
 115                 if not self._TOKENS[bearer_token]:
 116                     headers.pop('x-guest-token', None)
 117                     guest_token_response = self._download_json(
 118                         self._API_BASE + 'guest/activate.json', video_id,
 119                         'Downloading guest token', data=b'', headers=headers)
 120
 121                     self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
 122                     if not self._TOKENS[bearer_token]:
 123                         raise ExtractorError('Could not retrieve guest token')
 124                 headers['x-guest-token'] = self._TOKENS[bearer_token]
 125
 126             try:
 127                 allowed_status = {400, 403, 404} if graphql else {403}
 128                 result = self._download_json(
 129                     (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
 130                     video_id, headers=headers, query=query, expected_status=allowed_status)
 131                 break
 132
 133             except ExtractorError as e:
 134                 if last_error:
 135                     raise last_error
 136                 elif not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
 137                     raise
 138                 last_error = e
 139                 self.report_warning(
 140                     'Twitter API gave 404 response, retrying with deprecated token. '
 141                     'Only one media item can be extracted')
 142
 143         if result.get('errors'):
 144             error_message = ', '.join(set(traverse_obj(
 145                 result, ('errors', ..., 'message'), expected_type=str))) or 'Unknown error'
 146             raise ExtractorError(f'Error(s) while querying api: {error_message}', expected=True)
 147
 148         assert result is not None
 149         return result
 150
 151     def _build_graphql_query(self, media_id):
 152         raise NotImplementedError('Method must be implemented to support GraphQL')
 153
 154     def _call_graphql_api(self, endpoint, media_id):
 155         data = self._build_graphql_query(media_id)
 156         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 157         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 158
 159
 160 class TwitterCardIE(InfoExtractor):
 161     IE_NAME = 'twitter:card'
 162     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 163     _TESTS = [
 164         {
 165             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 166             # MD5 checksums are different in different places
 167             'info_dict': {
 168                 'id': '560070131976392705',
 169                 'ext': 'mp4',
 170                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 171                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 172                 'uploader': 'Twitter',
 173                 'uploader_id': 'Twitter',
 174                 'thumbnail': r're:^https?://.*\.jpg',
 175                 'duration': 30.033,
 176                 'timestamp': 1422366112,
 177                 'upload_date': '20150127',
 178                 'age_limit': 0,
 179                 'comment_count': int,
 180                 'tags': [],
 181                 'repost_count': int,
 182                 'like_count': int,
 183                 'display_id': '560070183650213889',
 184                 'uploader_url': 'https://twitter.com/Twitter',
 185             },
 186         },
 187         {
 188             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 189             'md5': '7137eca597f72b9abbe61e5ae0161399',
 190             'info_dict': {
 191                 'id': '623160978427936768',
 192                 'ext': 'mp4',
 193                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 194                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 195                 'uploader': 'NASA',
 196                 'uploader_id': 'NASA',
 197                 'timestamp': 1437408129,
 198                 'upload_date': '20150720',
 199                 'uploader_url': 'https://twitter.com/NASA',
 200                 'age_limit': 0,
 201                 'comment_count': int,
 202                 'like_count': int,
 203                 'repost_count': int,
 204                 'tags': ['PlutoFlyby'],
 205             },
 206             'params': {'format': '[protocol=https]'}
 207         },
 208         {
 209             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 210             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 211             'info_dict': {
 212                 'id': 'dq4Oj5quskI',
 213                 'ext': 'mp4',
 214                 'title': 'Ubuntu 11.10 Overview',
 215                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 216                 'upload_date': '20111013',
 217                 'uploader': 'OMG! UBUNTU!',
 218                 'uploader_id': 'omgubuntu',
 219                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 220                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 221                 'channel_follower_count': int,
 222                 'chapters': 'count:8',
 223                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 224                 'duration': 138,
 225                 'categories': ['Film & Animation'],
 226                 'age_limit': 0,
 227                 'comment_count': int,
 228                 'availability': 'public',
 229                 'like_count': int,
 230                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 231                 'view_count': int,
 232                 'tags': 'count:12',
 233                 'channel': 'OMG! UBUNTU!',
 234                 'playable_in_embed': True,
 235             },
 236             'add_ie': ['Youtube'],
 237         },
 238         {
 239             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 240             'info_dict': {
 241                 'id': 'iBb2x00UVlv',
 242                 'ext': 'mp4',
 243                 'upload_date': '20151113',
 244                 'uploader_id': '1189339351084113920',
 245                 'uploader': 'ArsenalTerje',
 246                 'title': 'Vine by ArsenalTerje',
 247                 'timestamp': 1447451307,
 248                 'alt_title': 'Vine by ArsenalTerje',
 249                 'comment_count': int,
 250                 'like_count': int,
 251                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 252                 'view_count': int,
 253                 'repost_count': int,
 254             },
 255             'add_ie': ['Vine'],
 256             'params': {'skip_download': 'm3u8'},
 257         },
 258         {
 259             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 260             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 261             'info_dict': {
 262                 'id': '705235433198714880',
 263                 'ext': 'mp4',
 264                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 265                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 266                 'uploader': 'Brent Yarina',
 267                 'uploader_id': 'BTNBrentYarina',
 268                 'timestamp': 1456976204,
 269                 'upload_date': '20160303',
 270             },
 271             'skip': 'This content is no longer available.',
 272         },
 273         {
 274             'url': 'https://twitter.com/i/videos/752274308186120192',
 275             'only_matching': True,
 276         },
 277     ]
 278
 279     def _real_extract(self, url):
 280         status_id = self._match_id(url)
 281         return self.url_result(
 282             'https://twitter.com/statuses/' + status_id,
 283             TwitterIE.ie_key(), status_id)
 284
 285
 286 class TwitterIE(TwitterBaseIE):
 287     IE_NAME = 'twitter'
 288     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)'
 289
 290     _TESTS = [{
 291         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 292         'info_dict': {
 293             'id': '643211870443208704',
 294             'display_id': '643211948184596480',
 295             'ext': 'mp4',
 296             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 297             'thumbnail': r're:^https?://.*\.jpg',
 298             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 299             'uploader': 'FREE THE NIPPLE',
 300             'uploader_id': 'freethenipple',
 301             'duration': 12.922,
 302             'timestamp': 1442188653,
 303             'upload_date': '20150913',
 304             'uploader_url': 'https://twitter.com/freethenipple',
 305             'comment_count': int,
 306             'repost_count': int,
 307             'like_count': int,
 308             'tags': [],
 309             'age_limit': 18,
 310         },
 311     }, {
 312         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 313         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 314         'info_dict': {
 315             'id': '657991469417025536',
 316             'ext': 'mp4',
 317             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 318             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 319             'thumbnail': r're:^https?://.*\.png',
 320             'uploader': 'Gifs',
 321             'uploader_id': 'giphz',
 322         },
 323         'expected_warnings': ['height', 'width'],
 324         'skip': 'Account suspended',
 325     }, {
 326         'url': 'https://twitter.com/starwars/status/665052190608723968',
 327         'info_dict': {
 328             'id': '665052190608723968',
 329             'display_id': '665052190608723968',
 330             'ext': 'mp4',
 331             'title': 'md5:3f57ab5d35116537a2ae7345cd0060d8',
 332             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 333             'uploader_id': 'starwars',
 334             'uploader': r're:Star Wars.*',
 335             'timestamp': 1447395772,
 336             'upload_date': '20151113',
 337             'uploader_url': 'https://twitter.com/starwars',
 338             'comment_count': int,
 339             'repost_count': int,
 340             'like_count': int,
 341             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 342             'age_limit': 0,
 343         },
 344     }, {
 345         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 346         'info_dict': {
 347             'id': '705235433198714880',
 348             'ext': 'mp4',
 349             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 350             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 351             'uploader_id': 'BTNBrentYarina',
 352             'uploader': 'Brent Yarina',
 353             'timestamp': 1456976204,
 354             'upload_date': '20160303',
 355             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 356             'comment_count': int,
 357             'repost_count': int,
 358             'like_count': int,
 359             'tags': [],
 360             'age_limit': 0,
 361         },
 362         'params': {
 363             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 364             # Test case of TwitterCardIE
 365             'skip_download': True,
 366         },
 367     }, {
 368         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 369         'info_dict': {
 370             'id': '700207414000242688',
 371             'display_id': '700207533655363584',
 372             'ext': 'mp4',
 373             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 374             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 375             'thumbnail': r're:^https?://.*\.jpg',
 376             'uploader': 'jaydin donte geer',
 377             'uploader_id': 'jaydingeer',
 378             'duration': 30.0,
 379             'timestamp': 1455777459,
 380             'upload_date': '20160218',
 381             'uploader_url': 'https://twitter.com/jaydingeer',
 382             'comment_count': int,
 383             'repost_count': int,
 384             'like_count': int,
 385             'tags': ['Damndaniel'],
 386             'age_limit': 0,
 387         },
 388     }, {
 389         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 390         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 391         'info_dict': {
 392             'id': 'MIOxnrUteUd',
 393             'ext': 'mp4',
 394             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 395             'uploader': 'TAKUMA',
 396             'uploader_id': '1004126642786242560',
 397             'timestamp': 1402826626,
 398             'upload_date': '20140615',
 399             'thumbnail': r're:^https?://.*\.jpg',
 400             'alt_title': 'Vine by TAKUMA',
 401             'comment_count': int,
 402             'repost_count': int,
 403             'like_count': int,
 404             'view_count': int,
 405         },
 406         'add_ie': ['Vine'],
 407     }, {
 408         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 409         'info_dict': {
 410             'id': '717462543795523584',
 411             'display_id': '719944021058060289',
 412             'ext': 'mp4',
 413             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 414             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 415             'uploader_id': 'CaptainAmerica',
 416             'uploader': 'Captain America',
 417             'duration': 3.17,
 418             'timestamp': 1460483005,
 419             'upload_date': '20160412',
 420             'uploader_url': 'https://twitter.com/CaptainAmerica',
 421             'thumbnail': r're:^https?://.*\.jpg',
 422             'comment_count': int,
 423             'repost_count': int,
 424             'like_count': int,
 425             'tags': [],
 426             'age_limit': 0,
 427         },
 428     }, {
 429         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 430         'info_dict': {
 431             'id': '1zqKVVlkqLaKB',
 432             'ext': 'mp4',
 433             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 434             'upload_date': '20160923',
 435             'uploader_id': '1PmKqpJdOJQoY',
 436             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 437             'timestamp': 1474613214,
 438             'thumbnail': r're:^https?://.*\.jpg',
 439         },
 440         'add_ie': ['Periscope'],
 441     }, {
 442         # has mp4 formats via mobile API
 443         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 444         'info_dict': {
 445             'id': '852138619213144067',
 446             'ext': 'mp4',
 447             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 448             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 449             'uploader': 'عالم الأخبار',
 450             'uploader_id': 'news_al3alm',
 451             'duration': 277.4,
 452             'timestamp': 1492000653,
 453             'upload_date': '20170412',
 454         },
 455         'skip': 'Account suspended',
 456     }, {
 457         'url': 'https://twitter.com/i/web/status/910031516746514432',
 458         'info_dict': {
 459             'id': '910030238373089285',
 460             'display_id': '910031516746514432',
 461             'ext': 'mp4',
 462             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 463             'thumbnail': r're:^https?://.*\.jpg',
 464             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 465             'uploader': 'Préfet de Guadeloupe',
 466             'uploader_id': 'Prefet971',
 467             'duration': 47.48,
 468             'timestamp': 1505803395,
 469             'upload_date': '20170919',
 470             'uploader_url': 'https://twitter.com/Prefet971',
 471             'comment_count': int,
 472             'repost_count': int,
 473             'like_count': int,
 474             'tags': ['Maria'],
 475             'age_limit': 0,
 476         },
 477         'params': {
 478             'skip_download': True,  # requires ffmpeg
 479         },
 480     }, {
 481         # card via api.twitter.com/1.1/videos/tweet/config
 482         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 483         'info_dict': {
 484             'id': '1001551417340022785',
 485             'display_id': '1001551623938805763',
 486             'ext': 'mp4',
 487             'title': 're:.*?Shep is on a roll today.*?',
 488             'thumbnail': r're:^https?://.*\.jpg',
 489             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 490             'uploader': 'Lis Power',
 491             'uploader_id': 'LisPower1',
 492             'duration': 111.278,
 493             'timestamp': 1527623489,
 494             'upload_date': '20180529',
 495             'uploader_url': 'https://twitter.com/LisPower1',
 496             'comment_count': int,
 497             'repost_count': int,
 498             'like_count': int,
 499             'tags': [],
 500             'age_limit': 0,
 501         },
 502         'params': {
 503             'skip_download': True,  # requires ffmpeg
 504         },
 505     }, {
 506         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 507         'info_dict': {
 508             'id': '1087791272830607360',
 509             'display_id': '1087791357756956680',
 510             'ext': 'mp4',
 511             'title': 'Twitter - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 512             'thumbnail': r're:^https?://.*\.jpg',
 513             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 514             'uploader': 'Twitter',
 515             'uploader_id': 'Twitter',
 516             'duration': 61.567,
 517             'timestamp': 1548184644,
 518             'upload_date': '20190122',
 519             'uploader_url': 'https://twitter.com/Twitter',
 520             'comment_count': int,
 521             'repost_count': int,
 522             'like_count': int,
 523             'tags': [],
 524             'age_limit': 0,
 525         },
 526     }, {
 527         # not available in Periscope
 528         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 529         'info_dict': {
 530             'id': '1vOGwqejwoWxB',
 531             'ext': 'mp4',
 532             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 533             'uploader': 'Vivi',
 534             'uploader_id': '1eVjYOLGkGrQL',
 535             'thumbnail': r're:^https?://.*\.jpg',
 536             'tags': ['EduTECH2019'],
 537             'view_count': int,
 538         },
 539         'add_ie': ['TwitterBroadcast'],
 540     }, {
 541         # unified card
 542         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 543         'info_dict': {
 544             'id': '1349774757969989634',
 545             'display_id': '1349794411333394432',
 546             'ext': 'mp4',
 547             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 548             'thumbnail': r're:^https?://.*\.jpg',
 549             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 550             'uploader': 'Brooklyn Nets',
 551             'uploader_id': 'BrooklynNets',
 552             'duration': 324.484,
 553             'timestamp': 1610651040,
 554             'upload_date': '20210114',
 555             'uploader_url': 'https://twitter.com/BrooklynNets',
 556             'comment_count': int,
 557             'repost_count': int,
 558             'like_count': int,
 559             'tags': [],
 560             'age_limit': 0,
 561         },
 562         'params': {
 563             'skip_download': True,
 564         },
 565     }, {
 566         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 567         'info_dict': {
 568             'id': '1577855447914409984',
 569             'display_id': '1577855540407197696',
 570             'ext': 'mp4',
 571             'title': 'oshtru \U0001faac\U0001f47d - gm \u2728\ufe0f now I can post image and video. nice update.',
 572             'description': 'gm \u2728\ufe0f now I can post image and video. nice update. https://t.co/cG7XgiINOm',
 573             'upload_date': '20221006',
 574             'uploader': 'oshtru \U0001faac\U0001f47d',
 575             'uploader_id': 'oshtru',
 576             'uploader_url': 'https://twitter.com/oshtru',
 577             'thumbnail': r're:^https?://.*\.jpg',
 578             'duration': 30.03,
 579             'timestamp': 1665025050,
 580             'comment_count': int,
 581             'repost_count': int,
 582             'like_count': int,
 583             'tags': [],
 584             'age_limit': 0,
 585         },
 586         'params': {'skip_download': True},
 587     }, {
 588         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 589         'info_dict': {
 590             'id': '1577719286659006464',
 591             'title': 'Ultima | #\u0432\u029f\u043c - Test',
 592             'description': 'Test https://t.co/Y3KEZD7Dad',
 593             'uploader': 'Ultima | #\u0432\u029f\u043c',
 594             'uploader_id': 'UltimaShadowX',
 595             'uploader_url': 'https://twitter.com/UltimaShadowX',
 596             'upload_date': '20221005',
 597             'timestamp': 1664992565,
 598             'comment_count': int,
 599             'repost_count': int,
 600             'like_count': int,
 601             'tags': [],
 602             'age_limit': 0,
 603         },
 604         'playlist_count': 4,
 605         'params': {'skip_download': True},
 606     }, {
 607         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 608         'info_dict': {
 609             'id': '1575559336759263233',
 610             'display_id': '1575560063510810624',
 611             'ext': 'mp4',
 612             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 613             'thumbnail': r're:^https?://.*\.jpg',
 614             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 615             'uploader': 'Max Olson',
 616             'uploader_id': 'MesoMax919',
 617             'uploader_url': 'https://twitter.com/MesoMax919',
 618             'duration': 21.321,
 619             'timestamp': 1664477766,
 620             'upload_date': '20220929',
 621             'comment_count': int,
 622             'repost_count': int,
 623             'like_count': int,
 624             'tags': ['HurricaneIan'],
 625             'age_limit': 0,
 626         },
 627     }, {
 628         # Adult content, uses old token
 629         # Fails if not logged in (GraphQL)
 630         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 631         'info_dict': {
 632             'id': '1575199163847000068',
 633             'display_id': '1575199173472927762',
 634             'ext': 'mp4',
 635             'title': str,
 636             'description': str,
 637             'uploader': str,
 638             'uploader_id': 'Rizdraws',
 639             'uploader_url': 'https://twitter.com/Rizdraws',
 640             'upload_date': '20220928',
 641             'timestamp': 1664391723,
 642             'thumbnail': 're:^https?://.*\\.jpg',
 643             'like_count': int,
 644             'repost_count': int,
 645             'comment_count': int,
 646             'age_limit': 18,
 647             'tags': []
 648         },
 649         'expected_warnings': ['404'],
 650     }, {
 651         # Description is missing one https://t.co url (GraphQL)
 652         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 653         'playlist_mincount': 2,
 654         'info_dict': {
 655             'id': '1395079556562706435',
 656             'title': str,
 657             'tags': [],
 658             'uploader': str,
 659             'like_count': int,
 660             'upload_date': '20210519',
 661             'age_limit': 0,
 662             'repost_count': int,
 663             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
 664             'uploader_id': 'Srirachachau',
 665             'comment_count': int,
 666             'uploader_url': 'https://twitter.com/Srirachachau',
 667             'timestamp': 1621447860,
 668         },
 669     }, {
 670         # Description is missing one https://t.co url (GraphQL)
 671         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 672         'playlist_mincount': 2,
 673         'info_dict': {
 674             'id': '1578353380363501568',
 675             'title': str,
 676             'uploader_id': 'DavidToons_',
 677             'repost_count': int,
 678             'like_count': int,
 679             'uploader': str,
 680             'timestamp': 1665143744,
 681             'uploader_url': 'https://twitter.com/DavidToons_',
 682             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
 683             'tags': [],
 684             'comment_count': int,
 685             'upload_date': '20221007',
 686             'age_limit': 0,
 687         },
 688     }, {
 689         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 690         'playlist_count': 2,
 691         'info_dict': {
 692             'id': '1578401165338976258',
 693             'title': str,
 694             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 695             'uploader': str,
 696             'uploader_id': 'primevideouk',
 697             'timestamp': 1665155137,
 698             'upload_date': '20221007',
 699             'age_limit': 0,
 700             'uploader_url': 'https://twitter.com/primevideouk',
 701             'comment_count': int,
 702             'repost_count': int,
 703             'like_count': int,
 704             'tags': ['TheRingsOfPower'],
 705         },
 706     }, {
 707         # Twitter Spaces
 708         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 709         'info_dict': {
 710             'id': '1lPJqmBeeNAJb',
 711             'ext': 'm4a',
 712             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 713             'uploader': r're:Monique Camarra.+?',
 714             'uploader_id': 'MoniqueCamarra',
 715             'live_status': 'was_live',
 716             'description': 'md5:acce559345fd49f129c20dbcda3f1201',
 717             'timestamp': 1658407771464,
 718         },
 719         'add_ie': ['TwitterSpaces'],
 720         'params': {'skip_download': 'm3u8'},
 721     }, {
 722         # onion route
 723         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
 724         'only_matching': True,
 725     }, {
 726         # Twitch Clip Embed
 727         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 728         'only_matching': True,
 729     }, {
 730         # promo_video_website card
 731         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 732         'only_matching': True,
 733     }, {
 734         # promo_video_convo card
 735         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
 736         'only_matching': True,
 737     }, {
 738         # appplayer card
 739         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
 740         'only_matching': True,
 741     }, {
 742         # video_direct_message card
 743         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
 744         'only_matching': True,
 745     }, {
 746         # poll2choice_video card
 747         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
 748         'only_matching': True,
 749     }, {
 750         # poll3choice_video card
 751         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
 752         'only_matching': True,
 753     }, {
 754         # poll4choice_video card
 755         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
 756         'only_matching': True,
 757     }]
 758
 759     def _graphql_to_legacy(self, data, twid):
 760         result = traverse_obj(data, (
 761             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
 762             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
 763             'tweet_results', 'result'
 764         ), expected_type=dict, default={}, get_all=False)
 765
 766         if 'tombstone' in result:
 767             cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
 768             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
 769
 770         status = result.get('legacy', {})
 771         status.update(traverse_obj(result, {
 772             'user': ('core', 'user_results', 'result', 'legacy'),
 773             'card': ('card', 'legacy'),
 774             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
 775         }, expected_type=dict, default={}))
 776
 777         # extra transformation is needed since result does not match legacy format
 778         binding_values = {
 779             binding_value.get('key'): binding_value.get('value')
 780             for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
 781         }
 782         if binding_values:
 783             status['card']['binding_values'] = binding_values
 784
 785         return status
 786
 787     def _build_graphql_query(self, media_id):
 788         return {
 789             'variables': {
 790                 'focalTweetId': media_id,
 791                 'includePromotedContent': True,
 792                 'with_rux_injections': False,
 793                 'withBirdwatchNotes': True,
 794                 'withCommunity': True,
 795                 'withDownvotePerspective': False,
 796                 'withQuickPromoteEligibilityTweetFields': True,
 797                 'withReactionsMetadata': False,
 798                 'withReactionsPerspective': False,
 799                 'withSuperFollowsTweetFields': True,
 800                 'withSuperFollowsUserFields': True,
 801                 'withV2Timeline': True,
 802                 'withVoice': True,
 803             },
 804             'features': {
 805                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
 806                 'interactive_text_enabled': True,
 807                 'responsive_web_edit_tweet_api_enabled': True,
 808                 'responsive_web_enhance_cards_enabled': True,
 809                 'responsive_web_graphql_timeline_navigation_enabled': False,
 810                 'responsive_web_text_conversations_enabled': False,
 811                 'responsive_web_uc_gql_enabled': True,
 812                 'standardized_nudges_misinfo': True,
 813                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
 814                 'tweetypie_unmention_optimization_enabled': True,
 815                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
 816                 'verified_phone_label_enabled': False,
 817                 'vibe_api_enabled': True,
 818             },
 819         }
 820
 821     def _real_extract(self, url):
 822         twid = self._match_id(url)
 823         if self.is_logged_in or self._configuration_arg('force_graphql'):
 824             self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
 825             result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
 826             status = self._graphql_to_legacy(result, twid)
 827
 828         else:
 829             status = self._call_api(f'statuses/show/{twid}.json', twid, {
 830                 'cards_platform': 'Web-12',
 831                 'include_cards': 1,
 832                 'include_reply_count': 1,
 833                 'include_user_entities': 0,
 834                 'tweet_mode': 'extended',
 835             })
 836
 837         title = description = status['full_text'].replace('\n', ' ')
 838         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
 839         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
 840         user = status.get('user') or {}
 841         uploader = user.get('name')
 842         if uploader:
 843             title = f'{uploader} - {title}'
 844         uploader_id = user.get('screen_name')
 845
 846         tags = []
 847         for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
 848             hashtag_text = hashtag.get('text')
 849             if not hashtag_text:
 850                 continue
 851             tags.append(hashtag_text)
 852
 853         info = {
 854             'id': twid,
 855             'title': title,
 856             'description': description,
 857             'uploader': uploader,
 858             'timestamp': unified_timestamp(status.get('created_at')),
 859             'uploader_id': uploader_id,
 860             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
 861             'like_count': int_or_none(status.get('favorite_count')),
 862             'repost_count': int_or_none(status.get('retweet_count')),
 863             'comment_count': int_or_none(status.get('reply_count')),
 864             'age_limit': 18 if status.get('possibly_sensitive') else 0,
 865             'tags': tags,
 866         }
 867
 868         def extract_from_video_info(media):
 869             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
 870             self.write_debug(f'Extracting from video info: {media_id}')
 871             video_info = media.get('video_info') or {}
 872
 873             formats = []
 874             subtitles = {}
 875             for variant in video_info.get('variants', []):
 876                 fmts, subs = self._extract_variant_formats(variant, twid)
 877                 subtitles = self._merge_subtitles(subtitles, subs)
 878                 formats.extend(fmts)
 879
 880             thumbnails = []
 881             media_url = media.get('media_url_https') or media.get('media_url')
 882             if media_url:
 883                 def add_thumbnail(name, size):
 884                     thumbnails.append({
 885                         'id': name,
 886                         'url': update_url_query(media_url, {'name': name}),
 887                         'width': int_or_none(size.get('w') or size.get('width')),
 888                         'height': int_or_none(size.get('h') or size.get('height')),
 889                     })
 890                 for name, size in media.get('sizes', {}).items():
 891                     add_thumbnail(name, size)
 892                 add_thumbnail('orig', media.get('original_info') or {})
 893
 894             return {
 895                 'id': media_id,
 896                 'formats': formats,
 897                 'subtitles': subtitles,
 898                 'thumbnails': thumbnails,
 899                 'duration': float_or_none(video_info.get('duration_millis'), 1000),
 900                 # The codec of http formats are unknown
 901                 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
 902             }
 903
 904         def extract_from_card_info(card):
 905             if not card:
 906                 return
 907
 908             self.write_debug(f'Extracting from card info: {card.get("url")}')
 909             binding_values = card['binding_values']
 910
 911             def get_binding_value(k):
 912                 o = binding_values.get(k) or {}
 913                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
 914
 915             card_name = card['name'].split(':')[-1]
 916             if card_name == 'player':
 917                 yield {
 918                     '_type': 'url',
 919                     'url': get_binding_value('player_url'),
 920                 }
 921             elif card_name == 'periscope_broadcast':
 922                 yield {
 923                     '_type': 'url',
 924                     'url': get_binding_value('url') or get_binding_value('player_url'),
 925                     'ie_key': PeriscopeIE.ie_key(),
 926                 }
 927             elif card_name == 'broadcast':
 928                 yield {
 929                     '_type': 'url',
 930                     'url': get_binding_value('broadcast_url'),
 931                     'ie_key': TwitterBroadcastIE.ie_key(),
 932                 }
 933             elif card_name == 'audiospace':
 934                 yield {
 935                     '_type': 'url',
 936                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
 937                     'ie_key': TwitterSpacesIE.ie_key(),
 938                 }
 939             elif card_name == 'summary':
 940                 yield {
 941                     '_type': 'url',
 942                     'url': get_binding_value('card_url'),
 943                 }
 944             elif card_name == 'unified_card':
 945                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
 946                 yield from map(extract_from_video_info, traverse_obj(
 947                     unified_card, ('media_entities', ...), expected_type=dict))
 948             # amplify, promo_video_website, promo_video_convo, appplayer,
 949             # video_direct_message, poll2choice_video, poll3choice_video,
 950             # poll4choice_video, ...
 951             else:
 952                 is_amplify = card_name == 'amplify'
 953                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
 954                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
 955                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
 956
 957                 thumbnails = []
 958                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
 959                     image = get_binding_value('player_image' + suffix) or {}
 960                     image_url = image.get('url')
 961                     if not image_url or '/player-placeholder' in image_url:
 962                         continue
 963                     thumbnails.append({
 964                         'id': suffix[1:] if suffix else 'medium',
 965                         'url': image_url,
 966                         'width': int_or_none(image.get('width')),
 967                         'height': int_or_none(image.get('height')),
 968                     })
 969
 970                 yield {
 971                     'formats': formats,
 972                     'subtitles': subtitles,
 973                     'thumbnails': thumbnails,
 974                     'duration': int_or_none(get_binding_value(
 975                         'content_duration_seconds')),
 976                 }
 977
 978         media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
 979         videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
 980         cards = extract_from_card_info(status.get('card'))
 981         entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)]
 982
 983         if not entries:
 984             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
 985             if not expanded_url or expanded_url == url:
 986                 raise ExtractorError('No video could be found in this tweet', expected=True)
 987
 988             return self.url_result(expanded_url, display_id=twid, **info)
 989
 990         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
 991
 992         if len(entries) == 1:
 993             return entries[0]
 994
 995         for index, entry in enumerate(entries, 1):
 996             entry['title'] += f' #{index}'
 997
 998         return self.playlist_result(entries, **info)
 999
1000
1001 class TwitterAmplifyIE(TwitterBaseIE):
1002     IE_NAME = 'twitter:amplify'
1003     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1004
1005     _TEST = {
1006         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1007         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1008         'info_dict': {
1009             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1010             'ext': 'mp4',
1011             'title': 'Twitter Video',
1012             'thumbnail': 're:^https?://.*',
1013         },
1014         'params': {'format': '[protocol=https]'},
1015     }
1016
1017     def _real_extract(self, url):
1018         video_id = self._match_id(url)
1019         webpage = self._download_webpage(url, video_id)
1020
1021         vmap_url = self._html_search_meta(
1022             'twitter:amplify:vmap', webpage, 'vmap url')
1023         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1024
1025         thumbnails = []
1026         thumbnail = self._html_search_meta(
1027             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1028
1029         def _find_dimension(target):
1030             w = int_or_none(self._html_search_meta(
1031                 'twitter:%s:width' % target, webpage, fatal=False))
1032             h = int_or_none(self._html_search_meta(
1033                 'twitter:%s:height' % target, webpage, fatal=False))
1034             return w, h
1035
1036         if thumbnail:
1037             thumbnail_w, thumbnail_h = _find_dimension('image')
1038             thumbnails.append({
1039                 'url': thumbnail,
1040                 'width': thumbnail_w,
1041                 'height': thumbnail_h,
1042             })
1043
1044         video_w, video_h = _find_dimension('player')
1045         formats[0].update({
1046             'width': video_w,
1047             'height': video_h,
1048         })
1049
1050         return {
1051             'id': video_id,
1052             'title': 'Twitter Video',
1053             'formats': formats,
1054             'thumbnails': thumbnails,
1055         }
1056
1057
1058 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1059     IE_NAME = 'twitter:broadcast'
1060     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1061
1062     _TEST = {
1063         # untitled Periscope video
1064         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1065         'info_dict': {
1066             'id': '1yNGaQLWpejGj',
1067             'ext': 'mp4',
1068             'title': 'Andrea May Sahouri - Periscope Broadcast',
1069             'uploader': 'Andrea May Sahouri',
1070             'uploader_id': '1PXEdBZWpGwKe',
1071             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1072             'view_count': int,
1073         },
1074     }
1075
1076     def _real_extract(self, url):
1077         broadcast_id = self._match_id(url)
1078         broadcast = self._call_api(
1079             'broadcasts/show.json', broadcast_id,
1080             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1081         info = self._parse_broadcast_data(broadcast, broadcast_id)
1082         media_key = broadcast['media_key']
1083         source = self._call_api(
1084             f'live_video_stream/status/{media_key}', media_key)['source']
1085         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1086         if '/live_video_stream/geoblocked/' in m3u8_url:
1087             self.raise_geo_restricted()
1088         m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1089             m3u8_url).query).get('type', [None])[0]
1090         state, width, height = self._extract_common_format_info(broadcast)
1091         info['formats'] = self._extract_pscp_m3u8_formats(
1092             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1093         return info
1094
1095
1096 class TwitterSpacesIE(TwitterBaseIE):
1097     IE_NAME = 'twitter:spaces'
1098     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1099     _TWITTER_GRAPHQL = 'https://twitter.com/i/api/graphql/HPEisOmj1epUNLCWTYhUWw/'
1100
1101     _TESTS = [{
1102         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1103         'info_dict': {
1104             'id': '1RDxlgyvNXzJL',
1105             'ext': 'm4a',
1106             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1107             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1108             'uploader': r're:Lucio Di Gaetano.*?',
1109             'uploader_id': 'luciodigaetano',
1110             'live_status': 'was_live',
1111             'timestamp': 1659877956397,
1112         },
1113         'params': {'skip_download': 'm3u8'},
1114     }]
1115
1116     SPACE_STATUS = {
1117         'notstarted': 'is_upcoming',
1118         'ended': 'was_live',
1119         'running': 'is_live',
1120         'timedout': 'post_live',
1121     }
1122
1123     def _build_graphql_query(self, space_id):
1124         return {
1125             'variables': {
1126                 'id': space_id,
1127                 'isMetatagsQuery': True,
1128                 'withDownvotePerspective': False,
1129                 'withReactionsMetadata': False,
1130                 'withReactionsPerspective': False,
1131                 'withReplays': True,
1132                 'withSuperFollowsUserFields': True,
1133                 'withSuperFollowsTweetFields': True,
1134             },
1135             'features': {
1136                 'dont_mention_me_view_api_enabled': True,
1137                 'interactive_text_enabled': True,
1138                 'responsive_web_edit_tweet_api_enabled': True,
1139                 'responsive_web_enhance_cards_enabled': True,
1140                 'responsive_web_uc_gql_enabled': True,
1141                 'spaces_2022_h2_clipping': True,
1142                 'spaces_2022_h2_spaces_communities': False,
1143                 'standardized_nudges_misinfo': True,
1144                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1145                 'vibe_api_enabled': True,
1146             },
1147         }
1148
1149     def _real_extract(self, url):
1150         space_id = self._match_id(url)
1151         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1152         if not space_data:
1153             raise ExtractorError('Twitter Space not found', expected=True)
1154
1155         metadata = space_data['metadata']
1156         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1157
1158         formats = []
1159         if live_status == 'is_upcoming':
1160             self.raise_no_formats('Twitter Space not started yet', expected=True)
1161         elif live_status == 'post_live':
1162             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1163         else:
1164             source = self._call_api(
1165                 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1166
1167             # XXX: Native downloader does not work
1168             formats = self._extract_m3u8_formats(
1169                 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1170                 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live')
1171             for fmt in formats:
1172                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1173
1174         participants = ', '.join(traverse_obj(
1175             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1176         return {
1177             'id': space_id,
1178             'title': metadata.get('title'),
1179             'description': f'Twitter Space participated by {participants}',
1180             'uploader': traverse_obj(
1181                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1182             'uploader_id': traverse_obj(
1183                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1184             'live_status': live_status,
1185             'timestamp': metadata.get('created_at'),
1186             'formats': formats,
1187         }
1188
1189
1190 class TwitterShortenerIE(TwitterBaseIE):
1191     IE_NAME = 'twitter:shortener'
1192     _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1193     _BASE_URL = 'https://t.co/'
1194
1195     def _real_extract(self, url):
1196         mobj = self._match_valid_url(url)
1197         eid, id = mobj.group('eid', 'id')
1198         if eid:
1199             id = eid
1200             url = self._BASE_URL + id
1201         new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1202         __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1203         if new_url.startswith(__UNSAFE_LINK):
1204             new_url = new_url.replace(__UNSAFE_LINK, "")
1205         return self.url_result(new_url)