yt_dlp/extractor/twitter.py

   1 import json
   2 import re
   3
   4 from .common import InfoExtractor
   5 from .periscope import PeriscopeBaseIE, PeriscopeIE
   6 from ..compat import functools  # isort: split
   7 from ..compat import (
   8     compat_parse_qs,
   9     compat_urllib_parse_unquote,
  10     compat_urllib_parse_urlparse,
  11 )
  12 from ..utils import (
  13     ExtractorError,
  14     dict_get,
  15     float_or_none,
  16     format_field,
  17     int_or_none,
  18     make_archive_id,
  19     remove_end,
  20     str_or_none,
  21     strip_or_none,
  22     traverse_obj,
  23     try_call,
  24     try_get,
  25     unified_timestamp,
  26     update_url_query,
  27     url_or_none,
  28     xpath_text,
  29 )
  30
  31
  32 class TwitterBaseIE(InfoExtractor):
  33     _API_BASE = 'https://api.twitter.com/1.1/'
  34     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
  35     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  36     _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
  37     _guest_token = None
  38
  39     def _extract_variant_formats(self, variant, video_id):
  40         variant_url = variant.get('url')
  41         if not variant_url:
  42             return [], {}
  43         elif '.m3u8' in variant_url:
  44             return self._extract_m3u8_formats_and_subtitles(
  45                 variant_url, video_id, 'mp4', 'm3u8_native',
  46                 m3u8_id='hls', fatal=False)
  47         else:
  48             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
  49             f = {
  50                 'url': variant_url,
  51                 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
  52                 'tbr': tbr,
  53             }
  54             self._search_dimensions_in_video_url(f, variant_url)
  55             return [f], {}
  56
  57     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
  58         vmap_url = url_or_none(vmap_url)
  59         if not vmap_url:
  60             return [], {}
  61         vmap_data = self._download_xml(vmap_url, video_id)
  62         formats = []
  63         subtitles = {}
  64         urls = []
  65         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
  66             video_variant.attrib['url'] = compat_urllib_parse_unquote(
  67                 video_variant.attrib['url'])
  68             urls.append(video_variant.attrib['url'])
  69             fmts, subs = self._extract_variant_formats(
  70                 video_variant.attrib, video_id)
  71             formats.extend(fmts)
  72             subtitles = self._merge_subtitles(subtitles, subs)
  73         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
  74         if video_url not in urls:
  75             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
  76             formats.extend(fmts)
  77             subtitles = self._merge_subtitles(subtitles, subs)
  78         return formats, subtitles
  79
  80     @staticmethod
  81     def _search_dimensions_in_video_url(a_format, video_url):
  82         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
  83         if m:
  84             a_format.update({
  85                 'width': int(m.group('width')),
  86                 'height': int(m.group('height')),
  87             })
  88
  89     @functools.cached_property
  90     def is_logged_in(self):
  91         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
  92
  93     def _call_api(self, path, video_id, query={}, graphql=False):
  94         cookies = self._get_cookies(self._API_BASE)
  95         headers = self._AUTH.copy()
  96
  97         csrf_cookie = cookies.get('ct0')
  98         if csrf_cookie:
  99             headers['x-csrf-token'] = csrf_cookie.value
 100
 101         if self.is_logged_in:
 102             headers.update({
 103                 'x-twitter-auth-type': 'OAuth2Session',
 104                 'x-twitter-client-language': 'en',
 105                 'x-twitter-active-user': 'yes',
 106             })
 107
 108         for first_attempt in (True, False):
 109             if not self.is_logged_in and not self._guest_token:
 110                 headers.pop('x-guest-token', None)
 111                 self._guest_token = traverse_obj(self._download_json(
 112                     f'{self._API_BASE}guest/activate.json', video_id,
 113                     'Downloading guest token', data=b'', headers=headers), 'guest_token')
 114             if self._guest_token:
 115                 headers['x-guest-token'] = self._guest_token
 116             elif not self.is_logged_in:
 117                 raise ExtractorError('Could not retrieve guest token')
 118
 119             allowed_status = {400, 401, 403, 404} if graphql else {403}
 120             result = self._download_json(
 121                 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
 122                 video_id, headers=headers, query=query, expected_status=allowed_status,
 123                 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
 124
 125             if result.get('errors'):
 126                 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
 127                 if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
 128                     self.to_screen('Guest token has expired. Refreshing guest token')
 129                     self._guest_token = None
 130                     continue
 131
 132                 raise ExtractorError(
 133                     f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
 134
 135             return result
 136
 137     def _build_graphql_query(self, media_id):
 138         raise NotImplementedError('Method must be implemented to support GraphQL')
 139
 140     def _call_graphql_api(self, endpoint, media_id):
 141         data = self._build_graphql_query(media_id)
 142         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 143         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 144
 145
 146 class TwitterCardIE(InfoExtractor):
 147     IE_NAME = 'twitter:card'
 148     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 149     _TESTS = [
 150         {
 151             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 152             # MD5 checksums are different in different places
 153             'info_dict': {
 154                 'id': '560070131976392705',
 155                 'ext': 'mp4',
 156                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 157                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 158                 'uploader': 'Twitter',
 159                 'uploader_id': 'Twitter',
 160                 'thumbnail': r're:^https?://.*\.jpg',
 161                 'duration': 30.033,
 162                 'timestamp': 1422366112,
 163                 'upload_date': '20150127',
 164                 'age_limit': 0,
 165                 'comment_count': int,
 166                 'tags': [],
 167                 'repost_count': int,
 168                 'like_count': int,
 169                 'display_id': '560070183650213889',
 170                 'uploader_url': 'https://twitter.com/Twitter',
 171             },
 172         },
 173         {
 174             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 175             'md5': '7137eca597f72b9abbe61e5ae0161399',
 176             'info_dict': {
 177                 'id': '623160978427936768',
 178                 'ext': 'mp4',
 179                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 180                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 181                 'uploader': 'NASA',
 182                 'uploader_id': 'NASA',
 183                 'timestamp': 1437408129,
 184                 'upload_date': '20150720',
 185                 'uploader_url': 'https://twitter.com/NASA',
 186                 'age_limit': 0,
 187                 'comment_count': int,
 188                 'like_count': int,
 189                 'repost_count': int,
 190                 'tags': ['PlutoFlyby'],
 191             },
 192             'params': {'format': '[protocol=https]'}
 193         },
 194         {
 195             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 196             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 197             'info_dict': {
 198                 'id': 'dq4Oj5quskI',
 199                 'ext': 'mp4',
 200                 'title': 'Ubuntu 11.10 Overview',
 201                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 202                 'upload_date': '20111013',
 203                 'uploader': 'OMG! UBUNTU!',
 204                 'uploader_id': 'omgubuntu',
 205                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 206                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 207                 'channel_follower_count': int,
 208                 'chapters': 'count:8',
 209                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 210                 'duration': 138,
 211                 'categories': ['Film & Animation'],
 212                 'age_limit': 0,
 213                 'comment_count': int,
 214                 'availability': 'public',
 215                 'like_count': int,
 216                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 217                 'view_count': int,
 218                 'tags': 'count:12',
 219                 'channel': 'OMG! UBUNTU!',
 220                 'playable_in_embed': True,
 221             },
 222             'add_ie': ['Youtube'],
 223         },
 224         {
 225             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 226             'info_dict': {
 227                 'id': 'iBb2x00UVlv',
 228                 'ext': 'mp4',
 229                 'upload_date': '20151113',
 230                 'uploader_id': '1189339351084113920',
 231                 'uploader': 'ArsenalTerje',
 232                 'title': 'Vine by ArsenalTerje',
 233                 'timestamp': 1447451307,
 234                 'alt_title': 'Vine by ArsenalTerje',
 235                 'comment_count': int,
 236                 'like_count': int,
 237                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 238                 'view_count': int,
 239                 'repost_count': int,
 240             },
 241             'add_ie': ['Vine'],
 242             'params': {'skip_download': 'm3u8'},
 243         },
 244         {
 245             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 246             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 247             'info_dict': {
 248                 'id': '705235433198714880',
 249                 'ext': 'mp4',
 250                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 251                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 252                 'uploader': 'Brent Yarina',
 253                 'uploader_id': 'BTNBrentYarina',
 254                 'timestamp': 1456976204,
 255                 'upload_date': '20160303',
 256             },
 257             'skip': 'This content is no longer available.',
 258         },
 259         {
 260             'url': 'https://twitter.com/i/videos/752274308186120192',
 261             'only_matching': True,
 262         },
 263     ]
 264
 265     def _real_extract(self, url):
 266         status_id = self._match_id(url)
 267         return self.url_result(
 268             'https://twitter.com/statuses/' + status_id,
 269             TwitterIE.ie_key(), status_id)
 270
 271
 272 class TwitterIE(TwitterBaseIE):
 273     IE_NAME = 'twitter'
 274     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 275
 276     _TESTS = [{
 277         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 278         'info_dict': {
 279             'id': '643211870443208704',
 280             'display_id': '643211948184596480',
 281             'ext': 'mp4',
 282             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 283             'thumbnail': r're:^https?://.*\.jpg',
 284             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 285             'uploader': 'FREE THE NIPPLE',
 286             'uploader_id': 'freethenipple',
 287             'duration': 12.922,
 288             'timestamp': 1442188653,
 289             'upload_date': '20150913',
 290             'uploader_url': 'https://twitter.com/freethenipple',
 291             'comment_count': int,
 292             'repost_count': int,
 293             'like_count': int,
 294             'view_count': int,
 295             'tags': [],
 296             'age_limit': 18,
 297         },
 298     }, {
 299         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 300         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 301         'info_dict': {
 302             'id': '657991469417025536',
 303             'ext': 'mp4',
 304             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 305             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 306             'thumbnail': r're:^https?://.*\.png',
 307             'uploader': 'Gifs',
 308             'uploader_id': 'giphz',
 309         },
 310         'expected_warnings': ['height', 'width'],
 311         'skip': 'Account suspended',
 312     }, {
 313         'url': 'https://twitter.com/starwars/status/665052190608723968',
 314         'info_dict': {
 315             'id': '665052190608723968',
 316             'display_id': '665052190608723968',
 317             'ext': 'mp4',
 318             'title': r're:Star Wars.*A new beginning is coming December 18.*',
 319             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 320             'uploader_id': 'starwars',
 321             'uploader': r're:Star Wars.*',
 322             'timestamp': 1447395772,
 323             'upload_date': '20151113',
 324             'uploader_url': 'https://twitter.com/starwars',
 325             'comment_count': int,
 326             'repost_count': int,
 327             'like_count': int,
 328             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 329             'age_limit': 0,
 330         },
 331     }, {
 332         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 333         'info_dict': {
 334             'id': '705235433198714880',
 335             'ext': 'mp4',
 336             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 337             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 338             'uploader_id': 'BTNBrentYarina',
 339             'uploader': 'Brent Yarina',
 340             'timestamp': 1456976204,
 341             'upload_date': '20160303',
 342             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 343             'comment_count': int,
 344             'repost_count': int,
 345             'like_count': int,
 346             'tags': [],
 347             'age_limit': 0,
 348         },
 349         'params': {
 350             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 351             # Test case of TwitterCardIE
 352             'skip_download': True,
 353         },
 354         'skip': 'Dead external link',
 355     }, {
 356         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 357         'info_dict': {
 358             'id': '700207414000242688',
 359             'display_id': '700207533655363584',
 360             'ext': 'mp4',
 361             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 362             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 363             'thumbnail': r're:^https?://.*\.jpg',
 364             'uploader': 'jaydin donte geer',
 365             'uploader_id': 'jaydingeer',
 366             'duration': 30.0,
 367             'timestamp': 1455777459,
 368             'upload_date': '20160218',
 369             'uploader_url': 'https://twitter.com/jaydingeer',
 370             'comment_count': int,
 371             'repost_count': int,
 372             'like_count': int,
 373             'view_count': int,
 374             'tags': ['Damndaniel'],
 375             'age_limit': 0,
 376         },
 377     }, {
 378         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 379         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 380         'info_dict': {
 381             'id': 'MIOxnrUteUd',
 382             'ext': 'mp4',
 383             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 384             'uploader': 'TAKUMA',
 385             'uploader_id': '1004126642786242560',
 386             'timestamp': 1402826626,
 387             'upload_date': '20140615',
 388             'thumbnail': r're:^https?://.*\.jpg',
 389             'alt_title': 'Vine by TAKUMA',
 390             'comment_count': int,
 391             'repost_count': int,
 392             'like_count': int,
 393             'view_count': int,
 394         },
 395         'add_ie': ['Vine'],
 396     }, {
 397         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 398         'info_dict': {
 399             'id': '717462543795523584',
 400             'display_id': '719944021058060289',
 401             'ext': 'mp4',
 402             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 403             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 404             'uploader_id': 'CaptainAmerica',
 405             'uploader': 'Captain America',
 406             'duration': 3.17,
 407             'timestamp': 1460483005,
 408             'upload_date': '20160412',
 409             'uploader_url': 'https://twitter.com/CaptainAmerica',
 410             'thumbnail': r're:^https?://.*\.jpg',
 411             'comment_count': int,
 412             'repost_count': int,
 413             'like_count': int,
 414             'view_count': int,
 415             'tags': [],
 416             'age_limit': 0,
 417         },
 418     }, {
 419         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 420         'info_dict': {
 421             'id': '1zqKVVlkqLaKB',
 422             'ext': 'mp4',
 423             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 424             'upload_date': '20160923',
 425             'uploader_id': '1PmKqpJdOJQoY',
 426             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 427             'timestamp': 1474613214,
 428             'thumbnail': r're:^https?://.*\.jpg',
 429         },
 430         'add_ie': ['Periscope'],
 431     }, {
 432         # has mp4 formats via mobile API
 433         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 434         'info_dict': {
 435             'id': '852138619213144067',
 436             'ext': 'mp4',
 437             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 438             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 439             'uploader': 'عالم الأخبار',
 440             'uploader_id': 'news_al3alm',
 441             'duration': 277.4,
 442             'timestamp': 1492000653,
 443             'upload_date': '20170412',
 444         },
 445         'skip': 'Account suspended',
 446     }, {
 447         'url': 'https://twitter.com/i/web/status/910031516746514432',
 448         'info_dict': {
 449             'id': '910030238373089285',
 450             'display_id': '910031516746514432',
 451             'ext': 'mp4',
 452             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 453             'thumbnail': r're:^https?://.*\.jpg',
 454             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 455             'uploader': 'Préfet de Guadeloupe',
 456             'uploader_id': 'Prefet971',
 457             'duration': 47.48,
 458             'timestamp': 1505803395,
 459             'upload_date': '20170919',
 460             'uploader_url': 'https://twitter.com/Prefet971',
 461             'comment_count': int,
 462             'repost_count': int,
 463             'like_count': int,
 464             'view_count': int,
 465             'tags': ['Maria'],
 466             'age_limit': 0,
 467         },
 468         'params': {
 469             'skip_download': True,  # requires ffmpeg
 470         },
 471     }, {
 472         # card via api.twitter.com/1.1/videos/tweet/config
 473         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 474         'info_dict': {
 475             'id': '1001551417340022785',
 476             'display_id': '1001551623938805763',
 477             'ext': 'mp4',
 478             'title': 're:.*?Shep is on a roll today.*?',
 479             'thumbnail': r're:^https?://.*\.jpg',
 480             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 481             'uploader': 'Lis Power',
 482             'uploader_id': 'LisPower1',
 483             'duration': 111.278,
 484             'timestamp': 1527623489,
 485             'upload_date': '20180529',
 486             'uploader_url': 'https://twitter.com/LisPower1',
 487             'comment_count': int,
 488             'repost_count': int,
 489             'like_count': int,
 490             'view_count': int,
 491             'tags': [],
 492             'age_limit': 0,
 493         },
 494         'params': {
 495             'skip_download': True,  # requires ffmpeg
 496         },
 497     }, {
 498         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 499         'info_dict': {
 500             'id': '1087791272830607360',
 501             'display_id': '1087791357756956680',
 502             'ext': 'mp4',
 503             'title': 'Twitter - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 504             'thumbnail': r're:^https?://.*\.jpg',
 505             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 506             'uploader': 'Twitter',
 507             'uploader_id': 'Twitter',
 508             'duration': 61.567,
 509             'timestamp': 1548184644,
 510             'upload_date': '20190122',
 511             'uploader_url': 'https://twitter.com/Twitter',
 512             'comment_count': int,
 513             'repost_count': int,
 514             'like_count': int,
 515             'view_count': int,
 516             'tags': [],
 517             'age_limit': 0,
 518         },
 519     }, {
 520         # not available in Periscope
 521         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 522         'info_dict': {
 523             'id': '1vOGwqejwoWxB',
 524             'ext': 'mp4',
 525             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 526             'uploader': 'Vivi',
 527             'uploader_id': '1eVjYOLGkGrQL',
 528             'thumbnail': r're:^https?://.*\.jpg',
 529             'tags': ['EduTECH2019'],
 530             'view_count': int,
 531         },
 532         'add_ie': ['TwitterBroadcast'],
 533     }, {
 534         # unified card
 535         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 536         'info_dict': {
 537             'id': '1349774757969989634',
 538             'display_id': '1349794411333394432',
 539             'ext': 'mp4',
 540             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 541             'thumbnail': r're:^https?://.*\.jpg',
 542             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 543             'uploader': 'Brooklyn Nets',
 544             'uploader_id': 'BrooklynNets',
 545             'duration': 324.484,
 546             'timestamp': 1610651040,
 547             'upload_date': '20210114',
 548             'uploader_url': 'https://twitter.com/BrooklynNets',
 549             'comment_count': int,
 550             'repost_count': int,
 551             'like_count': int,
 552             'tags': [],
 553             'age_limit': 0,
 554         },
 555         'params': {
 556             'skip_download': True,
 557         },
 558     }, {
 559         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 560         'info_dict': {
 561             'id': '1577855447914409984',
 562             'display_id': '1577855540407197696',
 563             'ext': 'mp4',
 564             'title': 'md5:9d198efb93557b8f8d5b78c480407214',
 565             'description': 'md5:b9c3699335447391d11753ab21c70a74',
 566             'upload_date': '20221006',
 567             'uploader': 'oshtru',
 568             'uploader_id': 'oshtru',
 569             'uploader_url': 'https://twitter.com/oshtru',
 570             'thumbnail': r're:^https?://.*\.jpg',
 571             'duration': 30.03,
 572             'timestamp': 1665025050,
 573             'comment_count': int,
 574             'repost_count': int,
 575             'like_count': int,
 576             'view_count': int,
 577             'tags': [],
 578             'age_limit': 0,
 579         },
 580         'params': {'skip_download': True},
 581     }, {
 582         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 583         'info_dict': {
 584             'id': '1577719286659006464',
 585             'title': 'Ultima | #\u0432\u029f\u043c - Test',
 586             'description': 'Test https://t.co/Y3KEZD7Dad',
 587             'uploader': 'Ultima | #\u0432\u029f\u043c',
 588             'uploader_id': 'UltimaShadowX',
 589             'uploader_url': 'https://twitter.com/UltimaShadowX',
 590             'upload_date': '20221005',
 591             'timestamp': 1664992565,
 592             'comment_count': int,
 593             'repost_count': int,
 594             'like_count': int,
 595             'tags': [],
 596             'age_limit': 0,
 597         },
 598         'playlist_count': 4,
 599         'params': {'skip_download': True},
 600     }, {
 601         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 602         'info_dict': {
 603             'id': '1575559336759263233',
 604             'display_id': '1575560063510810624',
 605             'ext': 'mp4',
 606             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 607             'thumbnail': r're:^https?://.*\.jpg',
 608             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 609             'uploader': 'Max Olson',
 610             'uploader_id': 'MesoMax919',
 611             'uploader_url': 'https://twitter.com/MesoMax919',
 612             'duration': 21.321,
 613             'timestamp': 1664477766,
 614             'upload_date': '20220929',
 615             'comment_count': int,
 616             'repost_count': int,
 617             'like_count': int,
 618             'view_count': int,
 619             'tags': ['HurricaneIan'],
 620             'age_limit': 0,
 621         },
 622     }, {
 623         # Adult content, fails if not logged in (GraphQL)
 624         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 625         'info_dict': {
 626             'id': '1575199163847000068',
 627             'display_id': '1575199173472927762',
 628             'ext': 'mp4',
 629             'title': str,
 630             'description': str,
 631             'uploader': str,
 632             'uploader_id': 'Rizdraws',
 633             'uploader_url': 'https://twitter.com/Rizdraws',
 634             'upload_date': '20220928',
 635             'timestamp': 1664391723,
 636             'thumbnail': r're:^https?://.+\.jpg',
 637             'like_count': int,
 638             'repost_count': int,
 639             'comment_count': int,
 640             'age_limit': 18,
 641             'tags': []
 642         },
 643         'skip': 'Requires authentication',
 644     }, {
 645         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 646         'playlist_mincount': 2,
 647         'info_dict': {
 648             'id': '1395079556562706435',
 649             'title': str,
 650             'tags': [],
 651             'uploader': str,
 652             'like_count': int,
 653             'upload_date': '20210519',
 654             'age_limit': 0,
 655             'repost_count': int,
 656             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
 657             'uploader_id': 'Srirachachau',
 658             'comment_count': int,
 659             'uploader_url': 'https://twitter.com/Srirachachau',
 660             'timestamp': 1621447860,
 661         },
 662     }, {
 663         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 664         'playlist_mincount': 2,
 665         'info_dict': {
 666             'id': '1578353380363501568',
 667             'title': str,
 668             'uploader_id': 'DavidToons_',
 669             'repost_count': int,
 670             'like_count': int,
 671             'uploader': str,
 672             'timestamp': 1665143744,
 673             'uploader_url': 'https://twitter.com/DavidToons_',
 674             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
 675             'tags': [],
 676             'comment_count': int,
 677             'upload_date': '20221007',
 678             'age_limit': 0,
 679         },
 680     }, {
 681         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 682         'playlist_count': 2,
 683         'info_dict': {
 684             'id': '1578401165338976258',
 685             'title': str,
 686             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 687             'uploader': str,
 688             'uploader_id': 'primevideouk',
 689             'timestamp': 1665155137,
 690             'upload_date': '20221007',
 691             'age_limit': 0,
 692             'uploader_url': 'https://twitter.com/primevideouk',
 693             'comment_count': int,
 694             'repost_count': int,
 695             'like_count': int,
 696             'tags': ['TheRingsOfPower'],
 697         },
 698     }, {
 699         # Twitter Spaces
 700         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 701         'info_dict': {
 702             'id': '1lPJqmBeeNAJb',
 703             'ext': 'm4a',
 704             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 705             'uploader': r're:Monique Camarra.+?',
 706             'uploader_id': 'MoniqueCamarra',
 707             'live_status': 'was_live',
 708             'description': 'md5:acce559345fd49f129c20dbcda3f1201',
 709             'timestamp': 1658407771464,
 710         },
 711         'add_ie': ['TwitterSpaces'],
 712         'params': {'skip_download': 'm3u8'},
 713     }, {
 714         # URL specifies video number but --yes-playlist
 715         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
 716         'playlist_mincount': 2,
 717         'info_dict': {
 718             'id': '1600649710662213632',
 719             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 720             'timestamp': 1670459604.0,
 721             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 722             'comment_count': int,
 723             'uploader_id': 'CTVJLaidlaw',
 724             'repost_count': int,
 725             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 726             'upload_date': '20221208',
 727             'age_limit': 0,
 728             'uploader': 'Jocelyn Laidlaw',
 729             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 730             'like_count': int,
 731         },
 732     }, {
 733         # URL specifies video number and --no-playlist
 734         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
 735         'info_dict': {
 736             'id': '1600649511827013632',
 737             'ext': 'mp4',
 738             'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
 739             'thumbnail': r're:^https?://.+\.jpg',
 740             'timestamp': 1670459604.0,
 741             'uploader_id': 'CTVJLaidlaw',
 742             'uploader': 'Jocelyn Laidlaw',
 743             'repost_count': int,
 744             'comment_count': int,
 745             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 746             'duration': 102.226,
 747             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 748             'display_id': '1600649710662213632',
 749             'like_count': int,
 750             'view_count': int,
 751             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 752             'upload_date': '20221208',
 753             'age_limit': 0,
 754         },
 755         'params': {'noplaylist': True},
 756     }, {
 757         # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
 758         # note the id different between extraction and url
 759         'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
 760         'info_dict': {
 761             'id': '1621117577354424321',
 762             'display_id': '1621117700482416640',
 763             'ext': 'mp4',
 764             'title': '뽀 - 아 최우제 이동속도 봐',
 765             'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
 766             'duration': 24.598,
 767             'uploader': '뽀',
 768             'uploader_id': 's2FAKER',
 769             'uploader_url': 'https://twitter.com/s2FAKER',
 770             'upload_date': '20230202',
 771             'timestamp': 1675339553.0,
 772             'thumbnail': r're:https?://pbs\.twimg\.com/.+',
 773             'age_limit': 18,
 774             'tags': [],
 775             'like_count': int,
 776             'repost_count': int,
 777             'comment_count': int,
 778             'view_count': int,
 779         },
 780     }, {
 781         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
 782         'info_dict': {
 783             'id': '1599108643743473680',
 784             'display_id': '1599108751385972737',
 785             'ext': 'mp4',
 786             'title': '\u06ea - \U0001F48B',
 787             'uploader_url': 'https://twitter.com/hlo_again',
 788             'like_count': int,
 789             'uploader_id': 'hlo_again',
 790             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
 791             'repost_count': int,
 792             'duration': 9.531,
 793             'comment_count': int,
 794             'view_count': int,
 795             'upload_date': '20221203',
 796             'age_limit': 0,
 797             'timestamp': 1670092210.0,
 798             'tags': [],
 799             'uploader': '\u06ea',
 800             'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
 801         },
 802         'params': {'noplaylist': True},
 803     }, {
 804         'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
 805         'info_dict': {
 806             'id': '1600009362759733248',
 807             'display_id': '1600009574919962625',
 808             'ext': 'mp4',
 809             'uploader_url': 'https://twitter.com/MunTheShinobi',
 810             'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
 811             'view_count': int,
 812             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
 813             'age_limit': 0,
 814             'uploader': 'Mün The Shinobi',
 815             'repost_count': int,
 816             'upload_date': '20221206',
 817             'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
 818             'comment_count': int,
 819             'like_count': int,
 820             'tags': [],
 821             'uploader_id': 'MunTheShinobi',
 822             'duration': 139.987,
 823             'timestamp': 1670306984.0,
 824         },
 825     }, {
 826         # url to retweet id, legacy API
 827         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
 828         'info_dict': {
 829             'id': '1623274794488659969',
 830             'display_id': '1623739803874349067',
 831             'ext': 'mp4',
 832             'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
 833             'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
 834             'uploader': 'Johnny Bullets',
 835             'uploader_id': 'Johnnybull3ts',
 836             'uploader_url': 'https://twitter.com/Johnnybull3ts',
 837             'age_limit': 0,
 838             'tags': [],
 839             'duration': 8.033,
 840             'timestamp': 1675853859.0,
 841             'upload_date': '20230208',
 842             'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
 843             'like_count': int,
 844             'repost_count': int,
 845             'comment_count': int,
 846         },
 847         'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
 848     }, {
 849         # onion route
 850         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
 851         'only_matching': True,
 852     }, {
 853         # Twitch Clip Embed
 854         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 855         'only_matching': True,
 856     }, {
 857         # promo_video_website card
 858         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 859         'only_matching': True,
 860     }, {
 861         # promo_video_convo card
 862         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
 863         'only_matching': True,
 864     }, {
 865         # appplayer card
 866         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
 867         'only_matching': True,
 868     }, {
 869         # video_direct_message card
 870         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
 871         'only_matching': True,
 872     }, {
 873         # poll2choice_video card
 874         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
 875         'only_matching': True,
 876     }, {
 877         # poll3choice_video card
 878         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
 879         'only_matching': True,
 880     }, {
 881         # poll4choice_video card
 882         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
 883         'only_matching': True,
 884     }]
 885
 886     def _graphql_to_legacy(self, data, twid):
 887         result = traverse_obj(data, (
 888             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
 889             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
 890             'tweet_results', 'result', ('tweet', None),
 891         ), expected_type=dict, default={}, get_all=False)
 892
 893         if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
 894             self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
 895
 896         if 'tombstone' in result:
 897             cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
 898             if cause and 'adult content' in cause:
 899                 self.raise_login_required(cause)
 900             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
 901
 902         status = result.get('legacy', {})
 903         status.update(traverse_obj(result, {
 904             'user': ('core', 'user_results', 'result', 'legacy'),
 905             'card': ('card', 'legacy'),
 906             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
 907         }, expected_type=dict, default={}))
 908
 909         # extra transformation is needed since result does not match legacy format
 910         binding_values = {
 911             binding_value.get('key'): binding_value.get('value')
 912             for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
 913         }
 914         if binding_values:
 915             status['card']['binding_values'] = binding_values
 916
 917         return status
 918
 919     def _build_graphql_query(self, media_id):
 920         return {
 921             'variables': {
 922                 'focalTweetId': media_id,
 923                 'includePromotedContent': True,
 924                 'with_rux_injections': False,
 925                 'withBirdwatchNotes': True,
 926                 'withCommunity': True,
 927                 'withDownvotePerspective': False,
 928                 'withQuickPromoteEligibilityTweetFields': True,
 929                 'withReactionsMetadata': False,
 930                 'withReactionsPerspective': False,
 931                 'withSuperFollowsTweetFields': True,
 932                 'withSuperFollowsUserFields': True,
 933                 'withV2Timeline': True,
 934                 'withVoice': True,
 935             },
 936             'features': {
 937                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
 938                 'interactive_text_enabled': True,
 939                 'responsive_web_edit_tweet_api_enabled': True,
 940                 'responsive_web_enhance_cards_enabled': True,
 941                 'responsive_web_graphql_timeline_navigation_enabled': False,
 942                 'responsive_web_text_conversations_enabled': False,
 943                 'responsive_web_uc_gql_enabled': True,
 944                 'standardized_nudges_misinfo': True,
 945                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
 946                 'tweetypie_unmention_optimization_enabled': True,
 947                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
 948                 'verified_phone_label_enabled': False,
 949                 'vibe_api_enabled': True,
 950             },
 951         }
 952
 953     def _real_extract(self, url):
 954         twid, selected_index = self._match_valid_url(url).group('id', 'index')
 955         if self._configuration_arg('legacy_api') and not self.is_logged_in:
 956             status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
 957                 'cards_platform': 'Web-12',
 958                 'include_cards': 1,
 959                 'include_reply_count': 1,
 960                 'include_user_entities': 0,
 961                 'tweet_mode': 'extended',
 962             }), 'retweeted_status', None)
 963         else:
 964             result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
 965             status = self._graphql_to_legacy(result, twid)
 966
 967         title = description = status['full_text'].replace('\n', ' ')
 968         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
 969         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
 970         user = status.get('user') or {}
 971         uploader = user.get('name')
 972         if uploader:
 973             title = f'{uploader} - {title}'
 974         uploader_id = user.get('screen_name')
 975
 976         info = {
 977             'id': twid,
 978             'title': title,
 979             'description': description,
 980             'uploader': uploader,
 981             'timestamp': unified_timestamp(status.get('created_at')),
 982             'uploader_id': uploader_id,
 983             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
 984             'like_count': int_or_none(status.get('favorite_count')),
 985             'repost_count': int_or_none(status.get('retweet_count')),
 986             'comment_count': int_or_none(status.get('reply_count')),
 987             'age_limit': 18 if status.get('possibly_sensitive') else 0,
 988             'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
 989         }
 990
 991         def extract_from_video_info(media):
 992             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
 993             self.write_debug(f'Extracting from video info: {media_id}')
 994             video_info = media.get('video_info') or {}
 995
 996             formats = []
 997             subtitles = {}
 998             for variant in video_info.get('variants', []):
 999                 fmts, subs = self._extract_variant_formats(variant, twid)
1000                 subtitles = self._merge_subtitles(subtitles, subs)
1001                 formats.extend(fmts)
1002
1003             thumbnails = []
1004             media_url = media.get('media_url_https') or media.get('media_url')
1005             if media_url:
1006                 def add_thumbnail(name, size):
1007                     thumbnails.append({
1008                         'id': name,
1009                         'url': update_url_query(media_url, {'name': name}),
1010                         'width': int_or_none(size.get('w') or size.get('width')),
1011                         'height': int_or_none(size.get('h') or size.get('height')),
1012                     })
1013                 for name, size in media.get('sizes', {}).items():
1014                     add_thumbnail(name, size)
1015                 add_thumbnail('orig', media.get('original_info') or {})
1016
1017             return {
1018                 'id': media_id,
1019                 'formats': formats,
1020                 'subtitles': subtitles,
1021                 'thumbnails': thumbnails,
1022                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1023                 'duration': float_or_none(video_info.get('duration_millis'), 1000),
1024                 # The codec of http formats are unknown
1025                 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1026             }
1027
1028         def extract_from_card_info(card):
1029             if not card:
1030                 return
1031
1032             self.write_debug(f'Extracting from card info: {card.get("url")}')
1033             binding_values = card['binding_values']
1034
1035             def get_binding_value(k):
1036                 o = binding_values.get(k) or {}
1037                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1038
1039             card_name = card['name'].split(':')[-1]
1040             if card_name == 'player':
1041                 yield {
1042                     '_type': 'url',
1043                     'url': get_binding_value('player_url'),
1044                 }
1045             elif card_name == 'periscope_broadcast':
1046                 yield {
1047                     '_type': 'url',
1048                     'url': get_binding_value('url') or get_binding_value('player_url'),
1049                     'ie_key': PeriscopeIE.ie_key(),
1050                 }
1051             elif card_name == 'broadcast':
1052                 yield {
1053                     '_type': 'url',
1054                     'url': get_binding_value('broadcast_url'),
1055                     'ie_key': TwitterBroadcastIE.ie_key(),
1056                 }
1057             elif card_name == 'audiospace':
1058                 yield {
1059                     '_type': 'url',
1060                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1061                     'ie_key': TwitterSpacesIE.ie_key(),
1062                 }
1063             elif card_name == 'summary':
1064                 yield {
1065                     '_type': 'url',
1066                     'url': get_binding_value('card_url'),
1067                 }
1068             elif card_name == 'unified_card':
1069                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1070                 yield from map(extract_from_video_info, traverse_obj(
1071                     unified_card, ('media_entities', ...), expected_type=dict))
1072             # amplify, promo_video_website, promo_video_convo, appplayer,
1073             # video_direct_message, poll2choice_video, poll3choice_video,
1074             # poll4choice_video, ...
1075             else:
1076                 is_amplify = card_name == 'amplify'
1077                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1078                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1079                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1080
1081                 thumbnails = []
1082                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1083                     image = get_binding_value('player_image' + suffix) or {}
1084                     image_url = image.get('url')
1085                     if not image_url or '/player-placeholder' in image_url:
1086                         continue
1087                     thumbnails.append({
1088                         'id': suffix[1:] if suffix else 'medium',
1089                         'url': image_url,
1090                         'width': int_or_none(image.get('width')),
1091                         'height': int_or_none(image.get('height')),
1092                     })
1093
1094                 yield {
1095                     'formats': formats,
1096                     'subtitles': subtitles,
1097                     'thumbnails': thumbnails,
1098                     'duration': int_or_none(get_binding_value(
1099                         'content_duration_seconds')),
1100                 }
1101
1102         videos = traverse_obj(status, (
1103             (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1104
1105         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1106             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1107         else:
1108             desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
1109             if not desired_obj:
1110                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1111             elif desired_obj.get('type') != 'video':
1112                 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1113
1114             # Restore original archive id and video index in title
1115             for index, entry in enumerate(videos, 1):
1116                 if entry.get('id') != desired_obj.get('id'):
1117                     continue
1118                 if index == 1:
1119                     info['_old_archive_ids'] = [make_archive_id(self, twid)]
1120                 if len(videos) != 1:
1121                     info['title'] += f' #{index}'
1122                 break
1123
1124             return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1125
1126         entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1127         if not entries:
1128             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1129             if not expanded_url or expanded_url == url:
1130                 self.raise_no_formats('No video could be found in this tweet', expected=True)
1131                 return info
1132
1133             return self.url_result(expanded_url, display_id=twid, **info)
1134
1135         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1136
1137         if len(entries) == 1:
1138             return entries[0]
1139
1140         for index, entry in enumerate(entries, 1):
1141             entry['title'] += f' #{index}'
1142
1143         return self.playlist_result(entries, **info)
1144
1145
1146 class TwitterAmplifyIE(TwitterBaseIE):
1147     IE_NAME = 'twitter:amplify'
1148     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1149
1150     _TEST = {
1151         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1152         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1153         'info_dict': {
1154             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1155             'ext': 'mp4',
1156             'title': 'Twitter Video',
1157             'thumbnail': 're:^https?://.*',
1158         },
1159         'params': {'format': '[protocol=https]'},
1160     }
1161
1162     def _real_extract(self, url):
1163         video_id = self._match_id(url)
1164         webpage = self._download_webpage(url, video_id)
1165
1166         vmap_url = self._html_search_meta(
1167             'twitter:amplify:vmap', webpage, 'vmap url')
1168         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1169
1170         thumbnails = []
1171         thumbnail = self._html_search_meta(
1172             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1173
1174         def _find_dimension(target):
1175             w = int_or_none(self._html_search_meta(
1176                 'twitter:%s:width' % target, webpage, fatal=False))
1177             h = int_or_none(self._html_search_meta(
1178                 'twitter:%s:height' % target, webpage, fatal=False))
1179             return w, h
1180
1181         if thumbnail:
1182             thumbnail_w, thumbnail_h = _find_dimension('image')
1183             thumbnails.append({
1184                 'url': thumbnail,
1185                 'width': thumbnail_w,
1186                 'height': thumbnail_h,
1187             })
1188
1189         video_w, video_h = _find_dimension('player')
1190         formats[0].update({
1191             'width': video_w,
1192             'height': video_h,
1193         })
1194
1195         return {
1196             'id': video_id,
1197             'title': 'Twitter Video',
1198             'formats': formats,
1199             'thumbnails': thumbnails,
1200         }
1201
1202
1203 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1204     IE_NAME = 'twitter:broadcast'
1205     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1206
1207     _TEST = {
1208         # untitled Periscope video
1209         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1210         'info_dict': {
1211             'id': '1yNGaQLWpejGj',
1212             'ext': 'mp4',
1213             'title': 'Andrea May Sahouri - Periscope Broadcast',
1214             'uploader': 'Andrea May Sahouri',
1215             'uploader_id': '1PXEdBZWpGwKe',
1216             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1217             'view_count': int,
1218         },
1219     }
1220
1221     def _real_extract(self, url):
1222         broadcast_id = self._match_id(url)
1223         broadcast = self._call_api(
1224             'broadcasts/show.json', broadcast_id,
1225             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1226         info = self._parse_broadcast_data(broadcast, broadcast_id)
1227         media_key = broadcast['media_key']
1228         source = self._call_api(
1229             f'live_video_stream/status/{media_key}', media_key)['source']
1230         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1231         if '/live_video_stream/geoblocked/' in m3u8_url:
1232             self.raise_geo_restricted()
1233         m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1234             m3u8_url).query).get('type', [None])[0]
1235         state, width, height = self._extract_common_format_info(broadcast)
1236         info['formats'] = self._extract_pscp_m3u8_formats(
1237             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1238         return info
1239
1240
1241 class TwitterSpacesIE(TwitterBaseIE):
1242     IE_NAME = 'twitter:spaces'
1243     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1244
1245     _TESTS = [{
1246         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1247         'info_dict': {
1248             'id': '1RDxlgyvNXzJL',
1249             'ext': 'm4a',
1250             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1251             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1252             'uploader': r're:Lucio Di Gaetano.*?',
1253             'uploader_id': 'luciodigaetano',
1254             'live_status': 'was_live',
1255             'timestamp': 1659877956397,
1256         },
1257         'params': {'skip_download': 'm3u8'},
1258     }]
1259
1260     SPACE_STATUS = {
1261         'notstarted': 'is_upcoming',
1262         'ended': 'was_live',
1263         'running': 'is_live',
1264         'timedout': 'post_live',
1265     }
1266
1267     def _build_graphql_query(self, space_id):
1268         return {
1269             'variables': {
1270                 'id': space_id,
1271                 'isMetatagsQuery': True,
1272                 'withDownvotePerspective': False,
1273                 'withReactionsMetadata': False,
1274                 'withReactionsPerspective': False,
1275                 'withReplays': True,
1276                 'withSuperFollowsUserFields': True,
1277                 'withSuperFollowsTweetFields': True,
1278             },
1279             'features': {
1280                 'dont_mention_me_view_api_enabled': True,
1281                 'interactive_text_enabled': True,
1282                 'responsive_web_edit_tweet_api_enabled': True,
1283                 'responsive_web_enhance_cards_enabled': True,
1284                 'responsive_web_uc_gql_enabled': True,
1285                 'spaces_2022_h2_clipping': True,
1286                 'spaces_2022_h2_spaces_communities': False,
1287                 'standardized_nudges_misinfo': True,
1288                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1289                 'vibe_api_enabled': True,
1290             },
1291         }
1292
1293     def _real_extract(self, url):
1294         space_id = self._match_id(url)
1295         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1296         if not space_data:
1297             raise ExtractorError('Twitter Space not found', expected=True)
1298
1299         metadata = space_data['metadata']
1300         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1301
1302         formats = []
1303         if live_status == 'is_upcoming':
1304             self.raise_no_formats('Twitter Space not started yet', expected=True)
1305         elif live_status == 'post_live':
1306             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1307         else:
1308             source = self._call_api(
1309                 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1310
1311             # XXX: Native downloader does not work
1312             formats = self._extract_m3u8_formats(
1313                 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1314                 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1315                 headers={'Referer': 'https://twitter.com/'})
1316             for fmt in formats:
1317                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1318
1319         participants = ', '.join(traverse_obj(
1320             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1321         return {
1322             'id': space_id,
1323             'title': metadata.get('title'),
1324             'description': f'Twitter Space participated by {participants}',
1325             'uploader': traverse_obj(
1326                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1327             'uploader_id': traverse_obj(
1328                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1329             'live_status': live_status,
1330             'timestamp': metadata.get('created_at'),
1331             'formats': formats,
1332         }
1333
1334
1335 class TwitterShortenerIE(TwitterBaseIE):
1336     IE_NAME = 'twitter:shortener'
1337     _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1338     _BASE_URL = 'https://t.co/'
1339
1340     def _real_extract(self, url):
1341         mobj = self._match_valid_url(url)
1342         eid, id = mobj.group('eid', 'id')
1343         if eid:
1344             id = eid
1345             url = self._BASE_URL + id
1346         new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1347         __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1348         if new_url.startswith(__UNSAFE_LINK):
1349             new_url = new_url.replace(__UNSAFE_LINK, "")
1350         return self.url_result(new_url)