yt_dlp/extractor/twitter.py

   1 import json
   2 import re
   3 import urllib.error
   4
   5 from .common import InfoExtractor
   6 from .periscope import PeriscopeBaseIE, PeriscopeIE
   7 from ..compat import functools  # isort: split
   8 from ..compat import (
   9     compat_parse_qs,
  10     compat_urllib_parse_unquote,
  11     compat_urllib_parse_urlparse,
  12 )
  13 from ..utils import (
  14     ExtractorError,
  15     dict_get,
  16     float_or_none,
  17     format_field,
  18     int_or_none,
  19     make_archive_id,
  20     str_or_none,
  21     strip_or_none,
  22     traverse_obj,
  23     try_call,
  24     try_get,
  25     unified_timestamp,
  26     update_url_query,
  27     url_or_none,
  28     xpath_text,
  29 )
  30
  31
  32 class TwitterBaseIE(InfoExtractor):
  33     _API_BASE = 'https://api.twitter.com/1.1/'
  34     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
  35     _TOKENS = {
  36         'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
  37         'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
  38     }
  39     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  40
  41     def _extract_variant_formats(self, variant, video_id):
  42         variant_url = variant.get('url')
  43         if not variant_url:
  44             return [], {}
  45         elif '.m3u8' in variant_url:
  46             return self._extract_m3u8_formats_and_subtitles(
  47                 variant_url, video_id, 'mp4', 'm3u8_native',
  48                 m3u8_id='hls', fatal=False)
  49         else:
  50             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
  51             f = {
  52                 'url': variant_url,
  53                 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
  54                 'tbr': tbr,
  55             }
  56             self._search_dimensions_in_video_url(f, variant_url)
  57             return [f], {}
  58
  59     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
  60         vmap_url = url_or_none(vmap_url)
  61         if not vmap_url:
  62             return [], {}
  63         vmap_data = self._download_xml(vmap_url, video_id)
  64         formats = []
  65         subtitles = {}
  66         urls = []
  67         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
  68             video_variant.attrib['url'] = compat_urllib_parse_unquote(
  69                 video_variant.attrib['url'])
  70             urls.append(video_variant.attrib['url'])
  71             fmts, subs = self._extract_variant_formats(
  72                 video_variant.attrib, video_id)
  73             formats.extend(fmts)
  74             subtitles = self._merge_subtitles(subtitles, subs)
  75         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
  76         if video_url not in urls:
  77             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
  78             formats.extend(fmts)
  79             subtitles = self._merge_subtitles(subtitles, subs)
  80         return formats, subtitles
  81
  82     @staticmethod
  83     def _search_dimensions_in_video_url(a_format, video_url):
  84         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
  85         if m:
  86             a_format.update({
  87                 'width': int(m.group('width')),
  88                 'height': int(m.group('height')),
  89             })
  90
  91     @functools.cached_property
  92     def is_logged_in(self):
  93         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
  94
  95     def _call_api(self, path, video_id, query={}, graphql=False):
  96         cookies = self._get_cookies(self._API_BASE)
  97         headers = {}
  98
  99         csrf_cookie = cookies.get('ct0')
 100         if csrf_cookie:
 101             headers['x-csrf-token'] = csrf_cookie.value
 102
 103         if self.is_logged_in:
 104             headers.update({
 105                 'x-twitter-auth-type': 'OAuth2Session',
 106                 'x-twitter-client-language': 'en',
 107                 'x-twitter-active-user': 'yes',
 108             })
 109
 110         last_error = None
 111         for bearer_token in self._TOKENS:
 112             for first_attempt in (True, False):
 113                 headers['Authorization'] = f'Bearer {bearer_token}'
 114
 115                 if not self.is_logged_in:
 116                     if not self._TOKENS[bearer_token]:
 117                         headers.pop('x-guest-token', None)
 118                         guest_token_response = self._download_json(
 119                             self._API_BASE + 'guest/activate.json', video_id,
 120                             'Downloading guest token', data=b'', headers=headers)
 121
 122                         self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
 123                         if not self._TOKENS[bearer_token]:
 124                             raise ExtractorError('Could not retrieve guest token')
 125
 126                     headers['x-guest-token'] = self._TOKENS[bearer_token]
 127
 128                 try:
 129                     allowed_status = {400, 403, 404} if graphql else {403}
 130                     result = self._download_json(
 131                         (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
 132                         video_id, headers=headers, query=query, expected_status=allowed_status)
 133
 134                 except ExtractorError as e:
 135                     if last_error:
 136                         raise last_error
 137
 138                     if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
 139                         raise
 140
 141                     last_error = e
 142                     self.report_warning(
 143                         'Twitter API gave 404 response, retrying with deprecated auth token. '
 144                         'Only one media item can be extracted')
 145                     break  # continue outer loop with next bearer_token
 146
 147                 if result.get('errors'):
 148                     errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
 149                     if first_attempt and any('bad guest token' in error.lower() for error in errors):
 150                         self.to_screen('Guest token has expired. Refreshing guest token')
 151                         self._TOKENS[bearer_token] = None
 152                         continue
 153
 154                     error_message = ', '.join(set(errors)) or 'Unknown error'
 155                     raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
 156
 157                 return result
 158
 159     def _build_graphql_query(self, media_id):
 160         raise NotImplementedError('Method must be implemented to support GraphQL')
 161
 162     def _call_graphql_api(self, endpoint, media_id):
 163         data = self._build_graphql_query(media_id)
 164         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 165         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 166
 167
 168 class TwitterCardIE(InfoExtractor):
 169     IE_NAME = 'twitter:card'
 170     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 171     _TESTS = [
 172         {
 173             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 174             # MD5 checksums are different in different places
 175             'info_dict': {
 176                 'id': '560070131976392705',
 177                 'ext': 'mp4',
 178                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 179                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 180                 'uploader': 'Twitter',
 181                 'uploader_id': 'Twitter',
 182                 'thumbnail': r're:^https?://.*\.jpg',
 183                 'duration': 30.033,
 184                 'timestamp': 1422366112,
 185                 'upload_date': '20150127',
 186                 'age_limit': 0,
 187                 'comment_count': int,
 188                 'tags': [],
 189                 'repost_count': int,
 190                 'like_count': int,
 191                 'display_id': '560070183650213889',
 192                 'uploader_url': 'https://twitter.com/Twitter',
 193             },
 194         },
 195         {
 196             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 197             'md5': '7137eca597f72b9abbe61e5ae0161399',
 198             'info_dict': {
 199                 'id': '623160978427936768',
 200                 'ext': 'mp4',
 201                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 202                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 203                 'uploader': 'NASA',
 204                 'uploader_id': 'NASA',
 205                 'timestamp': 1437408129,
 206                 'upload_date': '20150720',
 207                 'uploader_url': 'https://twitter.com/NASA',
 208                 'age_limit': 0,
 209                 'comment_count': int,
 210                 'like_count': int,
 211                 'repost_count': int,
 212                 'tags': ['PlutoFlyby'],
 213             },
 214             'params': {'format': '[protocol=https]'}
 215         },
 216         {
 217             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 218             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 219             'info_dict': {
 220                 'id': 'dq4Oj5quskI',
 221                 'ext': 'mp4',
 222                 'title': 'Ubuntu 11.10 Overview',
 223                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 224                 'upload_date': '20111013',
 225                 'uploader': 'OMG! UBUNTU!',
 226                 'uploader_id': 'omgubuntu',
 227                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 228                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 229                 'channel_follower_count': int,
 230                 'chapters': 'count:8',
 231                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 232                 'duration': 138,
 233                 'categories': ['Film & Animation'],
 234                 'age_limit': 0,
 235                 'comment_count': int,
 236                 'availability': 'public',
 237                 'like_count': int,
 238                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 239                 'view_count': int,
 240                 'tags': 'count:12',
 241                 'channel': 'OMG! UBUNTU!',
 242                 'playable_in_embed': True,
 243             },
 244             'add_ie': ['Youtube'],
 245         },
 246         {
 247             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 248             'info_dict': {
 249                 'id': 'iBb2x00UVlv',
 250                 'ext': 'mp4',
 251                 'upload_date': '20151113',
 252                 'uploader_id': '1189339351084113920',
 253                 'uploader': 'ArsenalTerje',
 254                 'title': 'Vine by ArsenalTerje',
 255                 'timestamp': 1447451307,
 256                 'alt_title': 'Vine by ArsenalTerje',
 257                 'comment_count': int,
 258                 'like_count': int,
 259                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 260                 'view_count': int,
 261                 'repost_count': int,
 262             },
 263             'add_ie': ['Vine'],
 264             'params': {'skip_download': 'm3u8'},
 265         },
 266         {
 267             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 268             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 269             'info_dict': {
 270                 'id': '705235433198714880',
 271                 'ext': 'mp4',
 272                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 273                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 274                 'uploader': 'Brent Yarina',
 275                 'uploader_id': 'BTNBrentYarina',
 276                 'timestamp': 1456976204,
 277                 'upload_date': '20160303',
 278             },
 279             'skip': 'This content is no longer available.',
 280         },
 281         {
 282             'url': 'https://twitter.com/i/videos/752274308186120192',
 283             'only_matching': True,
 284         },
 285     ]
 286
 287     def _real_extract(self, url):
 288         status_id = self._match_id(url)
 289         return self.url_result(
 290             'https://twitter.com/statuses/' + status_id,
 291             TwitterIE.ie_key(), status_id)
 292
 293
 294 class TwitterIE(TwitterBaseIE):
 295     IE_NAME = 'twitter'
 296     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/video/(?P<index>\d+))?'
 297
 298     _TESTS = [{
 299         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 300         'info_dict': {
 301             'id': '643211870443208704',
 302             'display_id': '643211948184596480',
 303             'ext': 'mp4',
 304             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 305             'thumbnail': r're:^https?://.*\.jpg',
 306             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 307             'uploader': 'FREE THE NIPPLE',
 308             'uploader_id': 'freethenipple',
 309             'duration': 12.922,
 310             'timestamp': 1442188653,
 311             'upload_date': '20150913',
 312             'uploader_url': 'https://twitter.com/freethenipple',
 313             'comment_count': int,
 314             'repost_count': int,
 315             'like_count': int,
 316             'tags': [],
 317             'age_limit': 18,
 318         },
 319     }, {
 320         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 321         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 322         'info_dict': {
 323             'id': '657991469417025536',
 324             'ext': 'mp4',
 325             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 326             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 327             'thumbnail': r're:^https?://.*\.png',
 328             'uploader': 'Gifs',
 329             'uploader_id': 'giphz',
 330         },
 331         'expected_warnings': ['height', 'width'],
 332         'skip': 'Account suspended',
 333     }, {
 334         'url': 'https://twitter.com/starwars/status/665052190608723968',
 335         'info_dict': {
 336             'id': '665052190608723968',
 337             'display_id': '665052190608723968',
 338             'ext': 'mp4',
 339             'title': 'md5:e99588f17b3dd0503814ffb560e64731',
 340             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 341             'uploader_id': 'starwars',
 342             'uploader': r're:Star Wars.*',
 343             'timestamp': 1447395772,
 344             'upload_date': '20151113',
 345             'uploader_url': 'https://twitter.com/starwars',
 346             'comment_count': int,
 347             'repost_count': int,
 348             'like_count': int,
 349             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 350             'age_limit': 0,
 351         },
 352     }, {
 353         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 354         'info_dict': {
 355             'id': '705235433198714880',
 356             'ext': 'mp4',
 357             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 358             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 359             'uploader_id': 'BTNBrentYarina',
 360             'uploader': 'Brent Yarina',
 361             'timestamp': 1456976204,
 362             'upload_date': '20160303',
 363             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 364             'comment_count': int,
 365             'repost_count': int,
 366             'like_count': int,
 367             'tags': [],
 368             'age_limit': 0,
 369         },
 370         'params': {
 371             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 372             # Test case of TwitterCardIE
 373             'skip_download': True,
 374         },
 375         'skip': 'Dead external link',
 376     }, {
 377         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 378         'info_dict': {
 379             'id': '700207414000242688',
 380             'display_id': '700207533655363584',
 381             'ext': 'mp4',
 382             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 383             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 384             'thumbnail': r're:^https?://.*\.jpg',
 385             'uploader': 'jaydin donte geer',
 386             'uploader_id': 'jaydingeer',
 387             'duration': 30.0,
 388             'timestamp': 1455777459,
 389             'upload_date': '20160218',
 390             'uploader_url': 'https://twitter.com/jaydingeer',
 391             'comment_count': int,
 392             'repost_count': int,
 393             'like_count': int,
 394             'tags': ['Damndaniel'],
 395             'age_limit': 0,
 396         },
 397     }, {
 398         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 399         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 400         'info_dict': {
 401             'id': 'MIOxnrUteUd',
 402             'ext': 'mp4',
 403             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 404             'uploader': 'TAKUMA',
 405             'uploader_id': '1004126642786242560',
 406             'timestamp': 1402826626,
 407             'upload_date': '20140615',
 408             'thumbnail': r're:^https?://.*\.jpg',
 409             'alt_title': 'Vine by TAKUMA',
 410             'comment_count': int,
 411             'repost_count': int,
 412             'like_count': int,
 413             'view_count': int,
 414         },
 415         'add_ie': ['Vine'],
 416     }, {
 417         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 418         'info_dict': {
 419             'id': '717462543795523584',
 420             'display_id': '719944021058060289',
 421             'ext': 'mp4',
 422             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 423             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 424             'uploader_id': 'CaptainAmerica',
 425             'uploader': 'Captain America',
 426             'duration': 3.17,
 427             'timestamp': 1460483005,
 428             'upload_date': '20160412',
 429             'uploader_url': 'https://twitter.com/CaptainAmerica',
 430             'thumbnail': r're:^https?://.*\.jpg',
 431             'comment_count': int,
 432             'repost_count': int,
 433             'like_count': int,
 434             'tags': [],
 435             'age_limit': 0,
 436         },
 437     }, {
 438         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 439         'info_dict': {
 440             'id': '1zqKVVlkqLaKB',
 441             'ext': 'mp4',
 442             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 443             'upload_date': '20160923',
 444             'uploader_id': '1PmKqpJdOJQoY',
 445             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 446             'timestamp': 1474613214,
 447             'thumbnail': r're:^https?://.*\.jpg',
 448         },
 449         'add_ie': ['Periscope'],
 450     }, {
 451         # has mp4 formats via mobile API
 452         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 453         'info_dict': {
 454             'id': '852138619213144067',
 455             'ext': 'mp4',
 456             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 457             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 458             'uploader': 'عالم الأخبار',
 459             'uploader_id': 'news_al3alm',
 460             'duration': 277.4,
 461             'timestamp': 1492000653,
 462             'upload_date': '20170412',
 463         },
 464         'skip': 'Account suspended',
 465     }, {
 466         'url': 'https://twitter.com/i/web/status/910031516746514432',
 467         'info_dict': {
 468             'id': '910030238373089285',
 469             'display_id': '910031516746514432',
 470             'ext': 'mp4',
 471             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 472             'thumbnail': r're:^https?://.*\.jpg',
 473             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 474             'uploader': 'Préfet de Guadeloupe',
 475             'uploader_id': 'Prefet971',
 476             'duration': 47.48,
 477             'timestamp': 1505803395,
 478             'upload_date': '20170919',
 479             'uploader_url': 'https://twitter.com/Prefet971',
 480             'comment_count': int,
 481             'repost_count': int,
 482             'like_count': int,
 483             'tags': ['Maria'],
 484             'age_limit': 0,
 485         },
 486         'params': {
 487             'skip_download': True,  # requires ffmpeg
 488         },
 489     }, {
 490         # card via api.twitter.com/1.1/videos/tweet/config
 491         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 492         'info_dict': {
 493             'id': '1001551417340022785',
 494             'display_id': '1001551623938805763',
 495             'ext': 'mp4',
 496             'title': 're:.*?Shep is on a roll today.*?',
 497             'thumbnail': r're:^https?://.*\.jpg',
 498             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 499             'uploader': 'Lis Power',
 500             'uploader_id': 'LisPower1',
 501             'duration': 111.278,
 502             'timestamp': 1527623489,
 503             'upload_date': '20180529',
 504             'uploader_url': 'https://twitter.com/LisPower1',
 505             'comment_count': int,
 506             'repost_count': int,
 507             'like_count': int,
 508             'tags': [],
 509             'age_limit': 0,
 510         },
 511         'params': {
 512             'skip_download': True,  # requires ffmpeg
 513         },
 514     }, {
 515         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 516         'info_dict': {
 517             'id': '1087791272830607360',
 518             'display_id': '1087791357756956680',
 519             'ext': 'mp4',
 520             'title': 'Twitter - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 521             'thumbnail': r're:^https?://.*\.jpg',
 522             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 523             'uploader': 'Twitter',
 524             'uploader_id': 'Twitter',
 525             'duration': 61.567,
 526             'timestamp': 1548184644,
 527             'upload_date': '20190122',
 528             'uploader_url': 'https://twitter.com/Twitter',
 529             'comment_count': int,
 530             'repost_count': int,
 531             'like_count': int,
 532             'tags': [],
 533             'age_limit': 0,
 534         },
 535     }, {
 536         # not available in Periscope
 537         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 538         'info_dict': {
 539             'id': '1vOGwqejwoWxB',
 540             'ext': 'mp4',
 541             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 542             'uploader': 'Vivi',
 543             'uploader_id': '1eVjYOLGkGrQL',
 544             'thumbnail': r're:^https?://.*\.jpg',
 545             'tags': ['EduTECH2019'],
 546             'view_count': int,
 547         },
 548         'add_ie': ['TwitterBroadcast'],
 549     }, {
 550         # unified card
 551         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 552         'info_dict': {
 553             'id': '1349774757969989634',
 554             'display_id': '1349794411333394432',
 555             'ext': 'mp4',
 556             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 557             'thumbnail': r're:^https?://.*\.jpg',
 558             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 559             'uploader': 'Brooklyn Nets',
 560             'uploader_id': 'BrooklynNets',
 561             'duration': 324.484,
 562             'timestamp': 1610651040,
 563             'upload_date': '20210114',
 564             'uploader_url': 'https://twitter.com/BrooklynNets',
 565             'comment_count': int,
 566             'repost_count': int,
 567             'like_count': int,
 568             'tags': [],
 569             'age_limit': 0,
 570         },
 571         'params': {
 572             'skip_download': True,
 573         },
 574     }, {
 575         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 576         'info_dict': {
 577             'id': '1577855447914409984',
 578             'display_id': '1577855540407197696',
 579             'ext': 'mp4',
 580             'title': 'md5:9d198efb93557b8f8d5b78c480407214',
 581             'description': 'md5:b9c3699335447391d11753ab21c70a74',
 582             'upload_date': '20221006',
 583             'uploader': 'oshtru',
 584             'uploader_id': 'oshtru',
 585             'uploader_url': 'https://twitter.com/oshtru',
 586             'thumbnail': r're:^https?://.*\.jpg',
 587             'duration': 30.03,
 588             'timestamp': 1665025050,
 589             'comment_count': int,
 590             'repost_count': int,
 591             'like_count': int,
 592             'tags': [],
 593             'age_limit': 0,
 594         },
 595         'params': {'skip_download': True},
 596     }, {
 597         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 598         'info_dict': {
 599             'id': '1577719286659006464',
 600             'title': 'Ultima | #\u0432\u029f\u043c - Test',
 601             'description': 'Test https://t.co/Y3KEZD7Dad',
 602             'uploader': 'Ultima | #\u0432\u029f\u043c',
 603             'uploader_id': 'UltimaShadowX',
 604             'uploader_url': 'https://twitter.com/UltimaShadowX',
 605             'upload_date': '20221005',
 606             'timestamp': 1664992565,
 607             'comment_count': int,
 608             'repost_count': int,
 609             'like_count': int,
 610             'tags': [],
 611             'age_limit': 0,
 612         },
 613         'playlist_count': 4,
 614         'params': {'skip_download': True},
 615     }, {
 616         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 617         'info_dict': {
 618             'id': '1575559336759263233',
 619             'display_id': '1575560063510810624',
 620             'ext': 'mp4',
 621             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 622             'thumbnail': r're:^https?://.*\.jpg',
 623             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 624             'uploader': 'Max Olson',
 625             'uploader_id': 'MesoMax919',
 626             'uploader_url': 'https://twitter.com/MesoMax919',
 627             'duration': 21.321,
 628             'timestamp': 1664477766,
 629             'upload_date': '20220929',
 630             'comment_count': int,
 631             'repost_count': int,
 632             'like_count': int,
 633             'tags': ['HurricaneIan'],
 634             'age_limit': 0,
 635         },
 636     }, {
 637         # Adult content, uses old token
 638         # Fails if not logged in (GraphQL)
 639         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 640         'info_dict': {
 641             'id': '1575199163847000068',
 642             'display_id': '1575199173472927762',
 643             'ext': 'mp4',
 644             'title': str,
 645             'description': str,
 646             'uploader': str,
 647             'uploader_id': 'Rizdraws',
 648             'uploader_url': 'https://twitter.com/Rizdraws',
 649             'upload_date': '20220928',
 650             'timestamp': 1664391723,
 651             'thumbnail': r're:^https?://.+\.jpg',
 652             'like_count': int,
 653             'repost_count': int,
 654             'comment_count': int,
 655             'age_limit': 18,
 656             'tags': []
 657         },
 658         'expected_warnings': ['404'],
 659     }, {
 660         # Description is missing one https://t.co url (GraphQL)
 661         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 662         'playlist_mincount': 2,
 663         'info_dict': {
 664             'id': '1395079556562706435',
 665             'title': str,
 666             'tags': [],
 667             'uploader': str,
 668             'like_count': int,
 669             'upload_date': '20210519',
 670             'age_limit': 0,
 671             'repost_count': int,
 672             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
 673             'uploader_id': 'Srirachachau',
 674             'comment_count': int,
 675             'uploader_url': 'https://twitter.com/Srirachachau',
 676             'timestamp': 1621447860,
 677         },
 678     }, {
 679         # Description is missing one https://t.co url (GraphQL)
 680         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 681         'playlist_mincount': 2,
 682         'info_dict': {
 683             'id': '1578353380363501568',
 684             'title': str,
 685             'uploader_id': 'DavidToons_',
 686             'repost_count': int,
 687             'like_count': int,
 688             'uploader': str,
 689             'timestamp': 1665143744,
 690             'uploader_url': 'https://twitter.com/DavidToons_',
 691             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
 692             'tags': [],
 693             'comment_count': int,
 694             'upload_date': '20221007',
 695             'age_limit': 0,
 696         },
 697     }, {
 698         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 699         'playlist_count': 2,
 700         'info_dict': {
 701             'id': '1578401165338976258',
 702             'title': str,
 703             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 704             'uploader': str,
 705             'uploader_id': 'primevideouk',
 706             'timestamp': 1665155137,
 707             'upload_date': '20221007',
 708             'age_limit': 0,
 709             'uploader_url': 'https://twitter.com/primevideouk',
 710             'comment_count': int,
 711             'repost_count': int,
 712             'like_count': int,
 713             'tags': ['TheRingsOfPower'],
 714         },
 715     }, {
 716         # Twitter Spaces
 717         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 718         'info_dict': {
 719             'id': '1lPJqmBeeNAJb',
 720             'ext': 'm4a',
 721             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 722             'uploader': r're:Monique Camarra.+?',
 723             'uploader_id': 'MoniqueCamarra',
 724             'live_status': 'was_live',
 725             'description': 'md5:acce559345fd49f129c20dbcda3f1201',
 726             'timestamp': 1658407771464,
 727         },
 728         'add_ie': ['TwitterSpaces'],
 729         'params': {'skip_download': 'm3u8'},
 730     }, {
 731         # URL specifies video number but --yes-playlist
 732         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
 733         'playlist_mincount': 2,
 734         'info_dict': {
 735             'id': '1600649710662213632',
 736             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 737             'timestamp': 1670459604.0,
 738             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 739             'comment_count': int,
 740             'uploader_id': 'CTVJLaidlaw',
 741             'repost_count': int,
 742             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 743             'upload_date': '20221208',
 744             'age_limit': 0,
 745             'uploader': 'Jocelyn Laidlaw',
 746             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 747             'like_count': int,
 748         },
 749     }, {
 750         # URL specifies video number and --no-playlist
 751         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
 752         'info_dict': {
 753             'id': '1600649511827013632',
 754             'ext': 'mp4',
 755             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 756             'thumbnail': r're:^https?://.+\.jpg',
 757             'timestamp': 1670459604.0,
 758             'uploader_id': 'CTVJLaidlaw',
 759             'uploader': 'Jocelyn Laidlaw',
 760             'repost_count': int,
 761             'comment_count': int,
 762             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 763             'duration': 102.226,
 764             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 765             'display_id': '1600649710662213632',
 766             'like_count': int,
 767             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 768             'upload_date': '20221208',
 769             'age_limit': 0,
 770         },
 771         'params': {'noplaylist': True},
 772     }, {
 773         # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
 774         # note the id different between extraction and url
 775         'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
 776         'info_dict': {
 777             'id': '1621117577354424321',
 778             'display_id': '1621117700482416640',
 779             'ext': 'mp4',
 780             'title': '뽀 - 아 최우제 이동속도 봐',
 781             'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
 782             'duration': 24.598,
 783             'uploader': '뽀',
 784             'uploader_id': 's2FAKER',
 785             'uploader_url': 'https://twitter.com/s2FAKER',
 786             'upload_date': '20230202',
 787             'timestamp': 1675339553.0,
 788             'thumbnail': r're:https?://pbs\.twimg\.com/.+',
 789             'age_limit': 18,
 790             'tags': [],
 791             'like_count': int,
 792             'repost_count': int,
 793             'comment_count': int,
 794         },
 795     }, {
 796         # onion route
 797         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
 798         'only_matching': True,
 799     }, {
 800         # Twitch Clip Embed
 801         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 802         'only_matching': True,
 803     }, {
 804         # promo_video_website card
 805         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 806         'only_matching': True,
 807     }, {
 808         # promo_video_convo card
 809         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
 810         'only_matching': True,
 811     }, {
 812         # appplayer card
 813         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
 814         'only_matching': True,
 815     }, {
 816         # video_direct_message card
 817         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
 818         'only_matching': True,
 819     }, {
 820         # poll2choice_video card
 821         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
 822         'only_matching': True,
 823     }, {
 824         # poll3choice_video card
 825         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
 826         'only_matching': True,
 827     }, {
 828         # poll4choice_video card
 829         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
 830         'only_matching': True,
 831     }]
 832
 833     def _graphql_to_legacy(self, data, twid):
 834         result = traverse_obj(data, (
 835             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
 836             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
 837             'tweet_results', 'result', ('tweet', None),
 838         ), expected_type=dict, default={}, get_all=False)
 839
 840         if result.get('__typename') not in ('Tweet', None):
 841             self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
 842
 843         if 'tombstone' in result:
 844             cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
 845             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
 846
 847         status = result.get('legacy', {})
 848         status.update(traverse_obj(result, {
 849             'user': ('core', 'user_results', 'result', 'legacy'),
 850             'card': ('card', 'legacy'),
 851             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
 852         }, expected_type=dict, default={}))
 853
 854         # extra transformation is needed since result does not match legacy format
 855         binding_values = {
 856             binding_value.get('key'): binding_value.get('value')
 857             for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
 858         }
 859         if binding_values:
 860             status['card']['binding_values'] = binding_values
 861
 862         return status
 863
 864     def _build_graphql_query(self, media_id):
 865         return {
 866             'variables': {
 867                 'focalTweetId': media_id,
 868                 'includePromotedContent': True,
 869                 'with_rux_injections': False,
 870                 'withBirdwatchNotes': True,
 871                 'withCommunity': True,
 872                 'withDownvotePerspective': False,
 873                 'withQuickPromoteEligibilityTweetFields': True,
 874                 'withReactionsMetadata': False,
 875                 'withReactionsPerspective': False,
 876                 'withSuperFollowsTweetFields': True,
 877                 'withSuperFollowsUserFields': True,
 878                 'withV2Timeline': True,
 879                 'withVoice': True,
 880             },
 881             'features': {
 882                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
 883                 'interactive_text_enabled': True,
 884                 'responsive_web_edit_tweet_api_enabled': True,
 885                 'responsive_web_enhance_cards_enabled': True,
 886                 'responsive_web_graphql_timeline_navigation_enabled': False,
 887                 'responsive_web_text_conversations_enabled': False,
 888                 'responsive_web_uc_gql_enabled': True,
 889                 'standardized_nudges_misinfo': True,
 890                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
 891                 'tweetypie_unmention_optimization_enabled': True,
 892                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
 893                 'verified_phone_label_enabled': False,
 894                 'vibe_api_enabled': True,
 895             },
 896         }
 897
 898     def _real_extract(self, url):
 899         twid, selected_index = self._match_valid_url(url).group('id', 'index')
 900         if self.is_logged_in or self._configuration_arg('force_graphql'):
 901             self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
 902             result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
 903             status = self._graphql_to_legacy(result, twid)
 904
 905         else:
 906             status = self._call_api(f'statuses/show/{twid}.json', twid, {
 907                 'cards_platform': 'Web-12',
 908                 'include_cards': 1,
 909                 'include_reply_count': 1,
 910                 'include_user_entities': 0,
 911                 'tweet_mode': 'extended',
 912             })
 913
 914         title = description = status['full_text'].replace('\n', ' ')
 915         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
 916         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
 917         user = status.get('user') or {}
 918         uploader = user.get('name')
 919         if uploader:
 920             title = f'{uploader} - {title}'
 921         uploader_id = user.get('screen_name')
 922
 923         tags = []
 924         for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
 925             hashtag_text = hashtag.get('text')
 926             if not hashtag_text:
 927                 continue
 928             tags.append(hashtag_text)
 929
 930         info = {
 931             'id': twid,
 932             'title': title,
 933             'description': description,
 934             'uploader': uploader,
 935             'timestamp': unified_timestamp(status.get('created_at')),
 936             'uploader_id': uploader_id,
 937             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
 938             'like_count': int_or_none(status.get('favorite_count')),
 939             'repost_count': int_or_none(status.get('retweet_count')),
 940             'comment_count': int_or_none(status.get('reply_count')),
 941             'age_limit': 18 if status.get('possibly_sensitive') else 0,
 942             'tags': tags,
 943         }
 944
 945         def extract_from_video_info(media):
 946             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
 947             self.write_debug(f'Extracting from video info: {media_id}')
 948             video_info = media.get('video_info') or {}
 949
 950             formats = []
 951             subtitles = {}
 952             for variant in video_info.get('variants', []):
 953                 fmts, subs = self._extract_variant_formats(variant, twid)
 954                 subtitles = self._merge_subtitles(subtitles, subs)
 955                 formats.extend(fmts)
 956
 957             thumbnails = []
 958             media_url = media.get('media_url_https') or media.get('media_url')
 959             if media_url:
 960                 def add_thumbnail(name, size):
 961                     thumbnails.append({
 962                         'id': name,
 963                         'url': update_url_query(media_url, {'name': name}),
 964                         'width': int_or_none(size.get('w') or size.get('width')),
 965                         'height': int_or_none(size.get('h') or size.get('height')),
 966                     })
 967                 for name, size in media.get('sizes', {}).items():
 968                     add_thumbnail(name, size)
 969                 add_thumbnail('orig', media.get('original_info') or {})
 970
 971             return {
 972                 'id': media_id,
 973                 'formats': formats,
 974                 'subtitles': subtitles,
 975                 'thumbnails': thumbnails,
 976                 'duration': float_or_none(video_info.get('duration_millis'), 1000),
 977                 # The codec of http formats are unknown
 978                 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
 979             }
 980
 981         def extract_from_card_info(card):
 982             if not card:
 983                 return
 984
 985             self.write_debug(f'Extracting from card info: {card.get("url")}')
 986             binding_values = card['binding_values']
 987
 988             def get_binding_value(k):
 989                 o = binding_values.get(k) or {}
 990                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
 991
 992             card_name = card['name'].split(':')[-1]
 993             if card_name == 'player':
 994                 yield {
 995                     '_type': 'url',
 996                     'url': get_binding_value('player_url'),
 997                 }
 998             elif card_name == 'periscope_broadcast':
 999                 yield {
1000                     '_type': 'url',
1001                     'url': get_binding_value('url') or get_binding_value('player_url'),
1002                     'ie_key': PeriscopeIE.ie_key(),
1003                 }
1004             elif card_name == 'broadcast':
1005                 yield {
1006                     '_type': 'url',
1007                     'url': get_binding_value('broadcast_url'),
1008                     'ie_key': TwitterBroadcastIE.ie_key(),
1009                 }
1010             elif card_name == 'audiospace':
1011                 yield {
1012                     '_type': 'url',
1013                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1014                     'ie_key': TwitterSpacesIE.ie_key(),
1015                 }
1016             elif card_name == 'summary':
1017                 yield {
1018                     '_type': 'url',
1019                     'url': get_binding_value('card_url'),
1020                 }
1021             elif card_name == 'unified_card':
1022                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1023                 yield from map(extract_from_video_info, traverse_obj(
1024                     unified_card, ('media_entities', ...), expected_type=dict))
1025             # amplify, promo_video_website, promo_video_convo, appplayer,
1026             # video_direct_message, poll2choice_video, poll3choice_video,
1027             # poll4choice_video, ...
1028             else:
1029                 is_amplify = card_name == 'amplify'
1030                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1031                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1032                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1033
1034                 thumbnails = []
1035                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1036                     image = get_binding_value('player_image' + suffix) or {}
1037                     image_url = image.get('url')
1038                     if not image_url or '/player-placeholder' in image_url:
1039                         continue
1040                     thumbnails.append({
1041                         'id': suffix[1:] if suffix else 'medium',
1042                         'url': image_url,
1043                         'width': int_or_none(image.get('width')),
1044                         'height': int_or_none(image.get('height')),
1045                     })
1046
1047                 yield {
1048                     'formats': formats,
1049                     'subtitles': subtitles,
1050                     'thumbnails': thumbnails,
1051                     'duration': int_or_none(get_binding_value(
1052                         'content_duration_seconds')),
1053                 }
1054
1055         media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
1056         videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
1057         cards = extract_from_card_info(status.get('card'))
1058         entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)]
1059
1060         if not entries:
1061             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1062             if not expanded_url or expanded_url == url:
1063                 raise ExtractorError('No video could be found in this tweet', expected=True)
1064
1065             return self.url_result(expanded_url, display_id=twid, **info)
1066
1067         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1068
1069         if not self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1070             index = int(selected_index) - 1
1071             if index >= len(entries):
1072                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1073
1074             return entries[index]
1075
1076         if len(entries) == 1:
1077             return entries[0]
1078
1079         for index, entry in enumerate(entries, 1):
1080             entry['title'] += f' #{index}'
1081
1082         return self.playlist_result(entries, **info)
1083
1084
1085 class TwitterAmplifyIE(TwitterBaseIE):
1086     IE_NAME = 'twitter:amplify'
1087     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1088
1089     _TEST = {
1090         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1091         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1092         'info_dict': {
1093             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1094             'ext': 'mp4',
1095             'title': 'Twitter Video',
1096             'thumbnail': 're:^https?://.*',
1097         },
1098         'params': {'format': '[protocol=https]'},
1099     }
1100
1101     def _real_extract(self, url):
1102         video_id = self._match_id(url)
1103         webpage = self._download_webpage(url, video_id)
1104
1105         vmap_url = self._html_search_meta(
1106             'twitter:amplify:vmap', webpage, 'vmap url')
1107         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1108
1109         thumbnails = []
1110         thumbnail = self._html_search_meta(
1111             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1112
1113         def _find_dimension(target):
1114             w = int_or_none(self._html_search_meta(
1115                 'twitter:%s:width' % target, webpage, fatal=False))
1116             h = int_or_none(self._html_search_meta(
1117                 'twitter:%s:height' % target, webpage, fatal=False))
1118             return w, h
1119
1120         if thumbnail:
1121             thumbnail_w, thumbnail_h = _find_dimension('image')
1122             thumbnails.append({
1123                 'url': thumbnail,
1124                 'width': thumbnail_w,
1125                 'height': thumbnail_h,
1126             })
1127
1128         video_w, video_h = _find_dimension('player')
1129         formats[0].update({
1130             'width': video_w,
1131             'height': video_h,
1132         })
1133
1134         return {
1135             'id': video_id,
1136             'title': 'Twitter Video',
1137             'formats': formats,
1138             'thumbnails': thumbnails,
1139         }
1140
1141
1142 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1143     IE_NAME = 'twitter:broadcast'
1144     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1145
1146     _TEST = {
1147         # untitled Periscope video
1148         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1149         'info_dict': {
1150             'id': '1yNGaQLWpejGj',
1151             'ext': 'mp4',
1152             'title': 'Andrea May Sahouri - Periscope Broadcast',
1153             'uploader': 'Andrea May Sahouri',
1154             'uploader_id': '1PXEdBZWpGwKe',
1155             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1156             'view_count': int,
1157         },
1158     }
1159
1160     def _real_extract(self, url):
1161         broadcast_id = self._match_id(url)
1162         broadcast = self._call_api(
1163             'broadcasts/show.json', broadcast_id,
1164             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1165         info = self._parse_broadcast_data(broadcast, broadcast_id)
1166         media_key = broadcast['media_key']
1167         source = self._call_api(
1168             f'live_video_stream/status/{media_key}', media_key)['source']
1169         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1170         if '/live_video_stream/geoblocked/' in m3u8_url:
1171             self.raise_geo_restricted()
1172         m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1173             m3u8_url).query).get('type', [None])[0]
1174         state, width, height = self._extract_common_format_info(broadcast)
1175         info['formats'] = self._extract_pscp_m3u8_formats(
1176             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1177         return info
1178
1179
1180 class TwitterSpacesIE(TwitterBaseIE):
1181     IE_NAME = 'twitter:spaces'
1182     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1183
1184     _TESTS = [{
1185         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1186         'info_dict': {
1187             'id': '1RDxlgyvNXzJL',
1188             'ext': 'm4a',
1189             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1190             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1191             'uploader': r're:Lucio Di Gaetano.*?',
1192             'uploader_id': 'luciodigaetano',
1193             'live_status': 'was_live',
1194             'timestamp': 1659877956397,
1195         },
1196         'params': {'skip_download': 'm3u8'},
1197     }]
1198
1199     SPACE_STATUS = {
1200         'notstarted': 'is_upcoming',
1201         'ended': 'was_live',
1202         'running': 'is_live',
1203         'timedout': 'post_live',
1204     }
1205
1206     def _build_graphql_query(self, space_id):
1207         return {
1208             'variables': {
1209                 'id': space_id,
1210                 'isMetatagsQuery': True,
1211                 'withDownvotePerspective': False,
1212                 'withReactionsMetadata': False,
1213                 'withReactionsPerspective': False,
1214                 'withReplays': True,
1215                 'withSuperFollowsUserFields': True,
1216                 'withSuperFollowsTweetFields': True,
1217             },
1218             'features': {
1219                 'dont_mention_me_view_api_enabled': True,
1220                 'interactive_text_enabled': True,
1221                 'responsive_web_edit_tweet_api_enabled': True,
1222                 'responsive_web_enhance_cards_enabled': True,
1223                 'responsive_web_uc_gql_enabled': True,
1224                 'spaces_2022_h2_clipping': True,
1225                 'spaces_2022_h2_spaces_communities': False,
1226                 'standardized_nudges_misinfo': True,
1227                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1228                 'vibe_api_enabled': True,
1229             },
1230         }
1231
1232     def _real_extract(self, url):
1233         space_id = self._match_id(url)
1234         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1235         if not space_data:
1236             raise ExtractorError('Twitter Space not found', expected=True)
1237
1238         metadata = space_data['metadata']
1239         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1240
1241         formats = []
1242         if live_status == 'is_upcoming':
1243             self.raise_no_formats('Twitter Space not started yet', expected=True)
1244         elif live_status == 'post_live':
1245             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1246         else:
1247             source = self._call_api(
1248                 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1249
1250             # XXX: Native downloader does not work
1251             formats = self._extract_m3u8_formats(
1252                 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1253                 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1254                 headers={'Referer': 'https://twitter.com/'})
1255             for fmt in formats:
1256                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1257
1258         participants = ', '.join(traverse_obj(
1259             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1260         return {
1261             'id': space_id,
1262             'title': metadata.get('title'),
1263             'description': f'Twitter Space participated by {participants}',
1264             'uploader': traverse_obj(
1265                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1266             'uploader_id': traverse_obj(
1267                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1268             'live_status': live_status,
1269             'timestamp': metadata.get('created_at'),
1270             'formats': formats,
1271         }
1272
1273
1274 class TwitterShortenerIE(TwitterBaseIE):
1275     IE_NAME = 'twitter:shortener'
1276     _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1277     _BASE_URL = 'https://t.co/'
1278
1279     def _real_extract(self, url):
1280         mobj = self._match_valid_url(url)
1281         eid, id = mobj.group('eid', 'id')
1282         if eid:
1283             id = eid
1284             url = self._BASE_URL + id
1285         new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1286         __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1287         if new_url.startswith(__UNSAFE_LINK):
1288             new_url = new_url.replace(__UNSAFE_LINK, "")
1289         return self.url_result(new_url)