yt_dlp/extractor/twitter.py

   1 import json
   2 import re
   3 import urllib.error
   4
   5 from .common import InfoExtractor
   6 from .periscope import PeriscopeBaseIE, PeriscopeIE
   7 from ..compat import functools  # isort: split
   8 from ..compat import (
   9     compat_parse_qs,
  10     compat_urllib_parse_unquote,
  11     compat_urllib_parse_urlparse,
  12 )
  13 from ..utils import (
  14     ExtractorError,
  15     dict_get,
  16     float_or_none,
  17     format_field,
  18     int_or_none,
  19     make_archive_id,
  20     str_or_none,
  21     strip_or_none,
  22     traverse_obj,
  23     try_call,
  24     try_get,
  25     unified_timestamp,
  26     update_url_query,
  27     url_or_none,
  28     xpath_text,
  29 )
  30
  31
  32 class TwitterBaseIE(InfoExtractor):
  33     _API_BASE = 'https://api.twitter.com/1.1/'
  34     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
  35     _TOKENS = {
  36         'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
  37         'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
  38     }
  39     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  40
  41     def _extract_variant_formats(self, variant, video_id):
  42         variant_url = variant.get('url')
  43         if not variant_url:
  44             return [], {}
  45         elif '.m3u8' in variant_url:
  46             return self._extract_m3u8_formats_and_subtitles(
  47                 variant_url, video_id, 'mp4', 'm3u8_native',
  48                 m3u8_id='hls', fatal=False)
  49         else:
  50             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
  51             f = {
  52                 'url': variant_url,
  53                 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
  54                 'tbr': tbr,
  55             }
  56             self._search_dimensions_in_video_url(f, variant_url)
  57             return [f], {}
  58
  59     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
  60         vmap_url = url_or_none(vmap_url)
  61         if not vmap_url:
  62             return [], {}
  63         vmap_data = self._download_xml(vmap_url, video_id)
  64         formats = []
  65         subtitles = {}
  66         urls = []
  67         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
  68             video_variant.attrib['url'] = compat_urllib_parse_unquote(
  69                 video_variant.attrib['url'])
  70             urls.append(video_variant.attrib['url'])
  71             fmts, subs = self._extract_variant_formats(
  72                 video_variant.attrib, video_id)
  73             formats.extend(fmts)
  74             subtitles = self._merge_subtitles(subtitles, subs)
  75         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
  76         if video_url not in urls:
  77             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
  78             formats.extend(fmts)
  79             subtitles = self._merge_subtitles(subtitles, subs)
  80         return formats, subtitles
  81
  82     @staticmethod
  83     def _search_dimensions_in_video_url(a_format, video_url):
  84         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
  85         if m:
  86             a_format.update({
  87                 'width': int(m.group('width')),
  88                 'height': int(m.group('height')),
  89             })
  90
  91     @functools.cached_property
  92     def is_logged_in(self):
  93         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
  94
  95     def _call_api(self, path, video_id, query={}, graphql=False):
  96         cookies = self._get_cookies(self._API_BASE)
  97         headers = {}
  98
  99         csrf_cookie = cookies.get('ct0')
 100         if csrf_cookie:
 101             headers['x-csrf-token'] = csrf_cookie.value
 102
 103         if self.is_logged_in:
 104             headers.update({
 105                 'x-twitter-auth-type': 'OAuth2Session',
 106                 'x-twitter-client-language': 'en',
 107                 'x-twitter-active-user': 'yes',
 108             })
 109
 110         last_error = None
 111         for bearer_token in self._TOKENS:
 112             for first_attempt in (True, False):
 113                 headers['Authorization'] = f'Bearer {bearer_token}'
 114
 115                 if not self.is_logged_in:
 116                     if not self._TOKENS[bearer_token]:
 117                         headers.pop('x-guest-token', None)
 118                         guest_token_response = self._download_json(
 119                             self._API_BASE + 'guest/activate.json', video_id,
 120                             'Downloading guest token', data=b'', headers=headers)
 121
 122                         self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
 123                         if not self._TOKENS[bearer_token]:
 124                             raise ExtractorError('Could not retrieve guest token')
 125
 126                     headers['x-guest-token'] = self._TOKENS[bearer_token]
 127
 128                 try:
 129                     allowed_status = {400, 403, 404} if graphql else {403}
 130                     result = self._download_json(
 131                         (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
 132                         video_id, headers=headers, query=query, expected_status=allowed_status)
 133
 134                 except ExtractorError as e:
 135                     if last_error:
 136                         raise last_error
 137
 138                     if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
 139                         raise
 140
 141                     last_error = e
 142                     self.report_warning(
 143                         'Twitter API gave 404 response, retrying with deprecated auth token. '
 144                         'Only one media item can be extracted')
 145                     break  # continue outer loop with next bearer_token
 146
 147                 if result.get('errors'):
 148                     errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
 149                     if first_attempt and any('bad guest token' in error.lower() for error in errors):
 150                         self.to_screen('Guest token has expired. Refreshing guest token')
 151                         self._TOKENS[bearer_token] = None
 152                         continue
 153
 154                     error_message = ', '.join(set(errors)) or 'Unknown error'
 155                     raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
 156
 157                 return result
 158
 159     def _build_graphql_query(self, media_id):
 160         raise NotImplementedError('Method must be implemented to support GraphQL')
 161
 162     def _call_graphql_api(self, endpoint, media_id):
 163         data = self._build_graphql_query(media_id)
 164         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 165         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 166
 167
 168 class TwitterCardIE(InfoExtractor):
 169     IE_NAME = 'twitter:card'
 170     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 171     _TESTS = [
 172         {
 173             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 174             # MD5 checksums are different in different places
 175             'info_dict': {
 176                 'id': '560070131976392705',
 177                 'ext': 'mp4',
 178                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 179                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 180                 'uploader': 'Twitter',
 181                 'uploader_id': 'Twitter',
 182                 'thumbnail': r're:^https?://.*\.jpg',
 183                 'duration': 30.033,
 184                 'timestamp': 1422366112,
 185                 'upload_date': '20150127',
 186                 'age_limit': 0,
 187                 'comment_count': int,
 188                 'tags': [],
 189                 'repost_count': int,
 190                 'like_count': int,
 191                 'display_id': '560070183650213889',
 192                 'uploader_url': 'https://twitter.com/Twitter',
 193             },
 194         },
 195         {
 196             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 197             'md5': '7137eca597f72b9abbe61e5ae0161399',
 198             'info_dict': {
 199                 'id': '623160978427936768',
 200                 'ext': 'mp4',
 201                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 202                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 203                 'uploader': 'NASA',
 204                 'uploader_id': 'NASA',
 205                 'timestamp': 1437408129,
 206                 'upload_date': '20150720',
 207                 'uploader_url': 'https://twitter.com/NASA',
 208                 'age_limit': 0,
 209                 'comment_count': int,
 210                 'like_count': int,
 211                 'repost_count': int,
 212                 'tags': ['PlutoFlyby'],
 213             },
 214             'params': {'format': '[protocol=https]'}
 215         },
 216         {
 217             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 218             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 219             'info_dict': {
 220                 'id': 'dq4Oj5quskI',
 221                 'ext': 'mp4',
 222                 'title': 'Ubuntu 11.10 Overview',
 223                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 224                 'upload_date': '20111013',
 225                 'uploader': 'OMG! UBUNTU!',
 226                 'uploader_id': 'omgubuntu',
 227                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 228                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 229                 'channel_follower_count': int,
 230                 'chapters': 'count:8',
 231                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 232                 'duration': 138,
 233                 'categories': ['Film & Animation'],
 234                 'age_limit': 0,
 235                 'comment_count': int,
 236                 'availability': 'public',
 237                 'like_count': int,
 238                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 239                 'view_count': int,
 240                 'tags': 'count:12',
 241                 'channel': 'OMG! UBUNTU!',
 242                 'playable_in_embed': True,
 243             },
 244             'add_ie': ['Youtube'],
 245         },
 246         {
 247             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 248             'info_dict': {
 249                 'id': 'iBb2x00UVlv',
 250                 'ext': 'mp4',
 251                 'upload_date': '20151113',
 252                 'uploader_id': '1189339351084113920',
 253                 'uploader': 'ArsenalTerje',
 254                 'title': 'Vine by ArsenalTerje',
 255                 'timestamp': 1447451307,
 256                 'alt_title': 'Vine by ArsenalTerje',
 257                 'comment_count': int,
 258                 'like_count': int,
 259                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 260                 'view_count': int,
 261                 'repost_count': int,
 262             },
 263             'add_ie': ['Vine'],
 264             'params': {'skip_download': 'm3u8'},
 265         },
 266         {
 267             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 268             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 269             'info_dict': {
 270                 'id': '705235433198714880',
 271                 'ext': 'mp4',
 272                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 273                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 274                 'uploader': 'Brent Yarina',
 275                 'uploader_id': 'BTNBrentYarina',
 276                 'timestamp': 1456976204,
 277                 'upload_date': '20160303',
 278             },
 279             'skip': 'This content is no longer available.',
 280         },
 281         {
 282             'url': 'https://twitter.com/i/videos/752274308186120192',
 283             'only_matching': True,
 284         },
 285     ]
 286
 287     def _real_extract(self, url):
 288         status_id = self._match_id(url)
 289         return self.url_result(
 290             'https://twitter.com/statuses/' + status_id,
 291             TwitterIE.ie_key(), status_id)
 292
 293
 294 class TwitterIE(TwitterBaseIE):
 295     IE_NAME = 'twitter'
 296     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 297
 298     _TESTS = [{
 299         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 300         'info_dict': {
 301             'id': '643211870443208704',
 302             'display_id': '643211948184596480',
 303             'ext': 'mp4',
 304             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 305             'thumbnail': r're:^https?://.*\.jpg',
 306             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 307             'uploader': 'FREE THE NIPPLE',
 308             'uploader_id': 'freethenipple',
 309             'duration': 12.922,
 310             'timestamp': 1442188653,
 311             'upload_date': '20150913',
 312             'uploader_url': 'https://twitter.com/freethenipple',
 313             'comment_count': int,
 314             'repost_count': int,
 315             'like_count': int,
 316             'tags': [],
 317             'age_limit': 18,
 318         },
 319     }, {
 320         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 321         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 322         'info_dict': {
 323             'id': '657991469417025536',
 324             'ext': 'mp4',
 325             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 326             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 327             'thumbnail': r're:^https?://.*\.png',
 328             'uploader': 'Gifs',
 329             'uploader_id': 'giphz',
 330         },
 331         'expected_warnings': ['height', 'width'],
 332         'skip': 'Account suspended',
 333     }, {
 334         'url': 'https://twitter.com/starwars/status/665052190608723968',
 335         'info_dict': {
 336             'id': '665052190608723968',
 337             'display_id': '665052190608723968',
 338             'ext': 'mp4',
 339             'title': r're:Star Wars.*A new beginning is coming December 18.*',
 340             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 341             'uploader_id': 'starwars',
 342             'uploader': r're:Star Wars.*',
 343             'timestamp': 1447395772,
 344             'upload_date': '20151113',
 345             'uploader_url': 'https://twitter.com/starwars',
 346             'comment_count': int,
 347             'repost_count': int,
 348             'like_count': int,
 349             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 350             'age_limit': 0,
 351         },
 352     }, {
 353         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 354         'info_dict': {
 355             'id': '705235433198714880',
 356             'ext': 'mp4',
 357             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 358             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 359             'uploader_id': 'BTNBrentYarina',
 360             'uploader': 'Brent Yarina',
 361             'timestamp': 1456976204,
 362             'upload_date': '20160303',
 363             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 364             'comment_count': int,
 365             'repost_count': int,
 366             'like_count': int,
 367             'tags': [],
 368             'age_limit': 0,
 369         },
 370         'params': {
 371             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 372             # Test case of TwitterCardIE
 373             'skip_download': True,
 374         },
 375         'skip': 'Dead external link',
 376     }, {
 377         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 378         'info_dict': {
 379             'id': '700207414000242688',
 380             'display_id': '700207533655363584',
 381             'ext': 'mp4',
 382             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 383             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 384             'thumbnail': r're:^https?://.*\.jpg',
 385             'uploader': 'jaydin donte geer',
 386             'uploader_id': 'jaydingeer',
 387             'duration': 30.0,
 388             'timestamp': 1455777459,
 389             'upload_date': '20160218',
 390             'uploader_url': 'https://twitter.com/jaydingeer',
 391             'comment_count': int,
 392             'repost_count': int,
 393             'like_count': int,
 394             'tags': ['Damndaniel'],
 395             'age_limit': 0,
 396         },
 397     }, {
 398         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 399         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 400         'info_dict': {
 401             'id': 'MIOxnrUteUd',
 402             'ext': 'mp4',
 403             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 404             'uploader': 'TAKUMA',
 405             'uploader_id': '1004126642786242560',
 406             'timestamp': 1402826626,
 407             'upload_date': '20140615',
 408             'thumbnail': r're:^https?://.*\.jpg',
 409             'alt_title': 'Vine by TAKUMA',
 410             'comment_count': int,
 411             'repost_count': int,
 412             'like_count': int,
 413             'view_count': int,
 414         },
 415         'add_ie': ['Vine'],
 416     }, {
 417         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 418         'info_dict': {
 419             'id': '717462543795523584',
 420             'display_id': '719944021058060289',
 421             'ext': 'mp4',
 422             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 423             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 424             'uploader_id': 'CaptainAmerica',
 425             'uploader': 'Captain America',
 426             'duration': 3.17,
 427             'timestamp': 1460483005,
 428             'upload_date': '20160412',
 429             'uploader_url': 'https://twitter.com/CaptainAmerica',
 430             'thumbnail': r're:^https?://.*\.jpg',
 431             'comment_count': int,
 432             'repost_count': int,
 433             'like_count': int,
 434             'tags': [],
 435             'age_limit': 0,
 436         },
 437     }, {
 438         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 439         'info_dict': {
 440             'id': '1zqKVVlkqLaKB',
 441             'ext': 'mp4',
 442             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 443             'upload_date': '20160923',
 444             'uploader_id': '1PmKqpJdOJQoY',
 445             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 446             'timestamp': 1474613214,
 447             'thumbnail': r're:^https?://.*\.jpg',
 448         },
 449         'add_ie': ['Periscope'],
 450     }, {
 451         # has mp4 formats via mobile API
 452         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 453         'info_dict': {
 454             'id': '852138619213144067',
 455             'ext': 'mp4',
 456             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 457             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 458             'uploader': 'عالم الأخبار',
 459             'uploader_id': 'news_al3alm',
 460             'duration': 277.4,
 461             'timestamp': 1492000653,
 462             'upload_date': '20170412',
 463         },
 464         'skip': 'Account suspended',
 465     }, {
 466         'url': 'https://twitter.com/i/web/status/910031516746514432',
 467         'info_dict': {
 468             'id': '910030238373089285',
 469             'display_id': '910031516746514432',
 470             'ext': 'mp4',
 471             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 472             'thumbnail': r're:^https?://.*\.jpg',
 473             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 474             'uploader': 'Préfet de Guadeloupe',
 475             'uploader_id': 'Prefet971',
 476             'duration': 47.48,
 477             'timestamp': 1505803395,
 478             'upload_date': '20170919',
 479             'uploader_url': 'https://twitter.com/Prefet971',
 480             'comment_count': int,
 481             'repost_count': int,
 482             'like_count': int,
 483             'tags': ['Maria'],
 484             'age_limit': 0,
 485         },
 486         'params': {
 487             'skip_download': True,  # requires ffmpeg
 488         },
 489     }, {
 490         # card via api.twitter.com/1.1/videos/tweet/config
 491         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 492         'info_dict': {
 493             'id': '1001551417340022785',
 494             'display_id': '1001551623938805763',
 495             'ext': 'mp4',
 496             'title': 're:.*?Shep is on a roll today.*?',
 497             'thumbnail': r're:^https?://.*\.jpg',
 498             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 499             'uploader': 'Lis Power',
 500             'uploader_id': 'LisPower1',
 501             'duration': 111.278,
 502             'timestamp': 1527623489,
 503             'upload_date': '20180529',
 504             'uploader_url': 'https://twitter.com/LisPower1',
 505             'comment_count': int,
 506             'repost_count': int,
 507             'like_count': int,
 508             'tags': [],
 509             'age_limit': 0,
 510         },
 511         'params': {
 512             'skip_download': True,  # requires ffmpeg
 513         },
 514     }, {
 515         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 516         'info_dict': {
 517             'id': '1087791272830607360',
 518             'display_id': '1087791357756956680',
 519             'ext': 'mp4',
 520             'title': 'Twitter - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 521             'thumbnail': r're:^https?://.*\.jpg',
 522             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 523             'uploader': 'Twitter',
 524             'uploader_id': 'Twitter',
 525             'duration': 61.567,
 526             'timestamp': 1548184644,
 527             'upload_date': '20190122',
 528             'uploader_url': 'https://twitter.com/Twitter',
 529             'comment_count': int,
 530             'repost_count': int,
 531             'like_count': int,
 532             'tags': [],
 533             'age_limit': 0,
 534         },
 535     }, {
 536         # not available in Periscope
 537         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 538         'info_dict': {
 539             'id': '1vOGwqejwoWxB',
 540             'ext': 'mp4',
 541             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 542             'uploader': 'Vivi',
 543             'uploader_id': '1eVjYOLGkGrQL',
 544             'thumbnail': r're:^https?://.*\.jpg',
 545             'tags': ['EduTECH2019'],
 546             'view_count': int,
 547         },
 548         'add_ie': ['TwitterBroadcast'],
 549     }, {
 550         # unified card
 551         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 552         'info_dict': {
 553             'id': '1349774757969989634',
 554             'display_id': '1349794411333394432',
 555             'ext': 'mp4',
 556             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 557             'thumbnail': r're:^https?://.*\.jpg',
 558             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 559             'uploader': 'Brooklyn Nets',
 560             'uploader_id': 'BrooklynNets',
 561             'duration': 324.484,
 562             'timestamp': 1610651040,
 563             'upload_date': '20210114',
 564             'uploader_url': 'https://twitter.com/BrooklynNets',
 565             'comment_count': int,
 566             'repost_count': int,
 567             'like_count': int,
 568             'tags': [],
 569             'age_limit': 0,
 570         },
 571         'params': {
 572             'skip_download': True,
 573         },
 574     }, {
 575         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 576         'info_dict': {
 577             'id': '1577855447914409984',
 578             'display_id': '1577855540407197696',
 579             'ext': 'mp4',
 580             'title': 'md5:9d198efb93557b8f8d5b78c480407214',
 581             'description': 'md5:b9c3699335447391d11753ab21c70a74',
 582             'upload_date': '20221006',
 583             'uploader': 'oshtru',
 584             'uploader_id': 'oshtru',
 585             'uploader_url': 'https://twitter.com/oshtru',
 586             'thumbnail': r're:^https?://.*\.jpg',
 587             'duration': 30.03,
 588             'timestamp': 1665025050,
 589             'comment_count': int,
 590             'repost_count': int,
 591             'like_count': int,
 592             'tags': [],
 593             'age_limit': 0,
 594         },
 595         'params': {'skip_download': True},
 596     }, {
 597         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 598         'info_dict': {
 599             'id': '1577719286659006464',
 600             'title': 'Ultima | #\u0432\u029f\u043c - Test',
 601             'description': 'Test https://t.co/Y3KEZD7Dad',
 602             'uploader': 'Ultima | #\u0432\u029f\u043c',
 603             'uploader_id': 'UltimaShadowX',
 604             'uploader_url': 'https://twitter.com/UltimaShadowX',
 605             'upload_date': '20221005',
 606             'timestamp': 1664992565,
 607             'comment_count': int,
 608             'repost_count': int,
 609             'like_count': int,
 610             'tags': [],
 611             'age_limit': 0,
 612         },
 613         'playlist_count': 4,
 614         'params': {'skip_download': True},
 615     }, {
 616         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 617         'info_dict': {
 618             'id': '1575559336759263233',
 619             'display_id': '1575560063510810624',
 620             'ext': 'mp4',
 621             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 622             'thumbnail': r're:^https?://.*\.jpg',
 623             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 624             'uploader': 'Max Olson',
 625             'uploader_id': 'MesoMax919',
 626             'uploader_url': 'https://twitter.com/MesoMax919',
 627             'duration': 21.321,
 628             'timestamp': 1664477766,
 629             'upload_date': '20220929',
 630             'comment_count': int,
 631             'repost_count': int,
 632             'like_count': int,
 633             'tags': ['HurricaneIan'],
 634             'age_limit': 0,
 635         },
 636     }, {
 637         # Adult content, uses old token
 638         # Fails if not logged in (GraphQL)
 639         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 640         'info_dict': {
 641             'id': '1575199163847000068',
 642             'display_id': '1575199173472927762',
 643             'ext': 'mp4',
 644             'title': str,
 645             'description': str,
 646             'uploader': str,
 647             'uploader_id': 'Rizdraws',
 648             'uploader_url': 'https://twitter.com/Rizdraws',
 649             'upload_date': '20220928',
 650             'timestamp': 1664391723,
 651             'thumbnail': r're:^https?://.+\.jpg',
 652             'like_count': int,
 653             'repost_count': int,
 654             'comment_count': int,
 655             'age_limit': 18,
 656             'tags': []
 657         },
 658         'expected_warnings': ['404'],
 659     }, {
 660         # Description is missing one https://t.co url (GraphQL)
 661         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 662         'playlist_mincount': 2,
 663         'info_dict': {
 664             'id': '1395079556562706435',
 665             'title': str,
 666             'tags': [],
 667             'uploader': str,
 668             'like_count': int,
 669             'upload_date': '20210519',
 670             'age_limit': 0,
 671             'repost_count': int,
 672             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
 673             'uploader_id': 'Srirachachau',
 674             'comment_count': int,
 675             'uploader_url': 'https://twitter.com/Srirachachau',
 676             'timestamp': 1621447860,
 677         },
 678     }, {
 679         # Description is missing one https://t.co url (GraphQL)
 680         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 681         'playlist_mincount': 2,
 682         'info_dict': {
 683             'id': '1578353380363501568',
 684             'title': str,
 685             'uploader_id': 'DavidToons_',
 686             'repost_count': int,
 687             'like_count': int,
 688             'uploader': str,
 689             'timestamp': 1665143744,
 690             'uploader_url': 'https://twitter.com/DavidToons_',
 691             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
 692             'tags': [],
 693             'comment_count': int,
 694             'upload_date': '20221007',
 695             'age_limit': 0,
 696         },
 697     }, {
 698         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 699         'playlist_count': 2,
 700         'info_dict': {
 701             'id': '1578401165338976258',
 702             'title': str,
 703             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 704             'uploader': str,
 705             'uploader_id': 'primevideouk',
 706             'timestamp': 1665155137,
 707             'upload_date': '20221007',
 708             'age_limit': 0,
 709             'uploader_url': 'https://twitter.com/primevideouk',
 710             'comment_count': int,
 711             'repost_count': int,
 712             'like_count': int,
 713             'tags': ['TheRingsOfPower'],
 714         },
 715     }, {
 716         # Twitter Spaces
 717         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 718         'info_dict': {
 719             'id': '1lPJqmBeeNAJb',
 720             'ext': 'm4a',
 721             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 722             'uploader': r're:Monique Camarra.+?',
 723             'uploader_id': 'MoniqueCamarra',
 724             'live_status': 'was_live',
 725             'description': 'md5:acce559345fd49f129c20dbcda3f1201',
 726             'timestamp': 1658407771464,
 727         },
 728         'add_ie': ['TwitterSpaces'],
 729         'params': {'skip_download': 'm3u8'},
 730     }, {
 731         # URL specifies video number but --yes-playlist
 732         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
 733         'playlist_mincount': 2,
 734         'info_dict': {
 735             'id': '1600649710662213632',
 736             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 737             'timestamp': 1670459604.0,
 738             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 739             'comment_count': int,
 740             'uploader_id': 'CTVJLaidlaw',
 741             'repost_count': int,
 742             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 743             'upload_date': '20221208',
 744             'age_limit': 0,
 745             'uploader': 'Jocelyn Laidlaw',
 746             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 747             'like_count': int,
 748         },
 749     }, {
 750         # URL specifies video number and --no-playlist
 751         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
 752         'info_dict': {
 753             'id': '1600649511827013632',
 754             'ext': 'mp4',
 755             'title': 'md5:dac4f4d4c591fcc4e88a253eba472dc3',
 756             'thumbnail': r're:^https?://.+\.jpg',
 757             'timestamp': 1670459604.0,
 758             'uploader_id': 'CTVJLaidlaw',
 759             'uploader': 'Jocelyn Laidlaw',
 760             'repost_count': int,
 761             'comment_count': int,
 762             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 763             'duration': 102.226,
 764             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 765             'display_id': '1600649710662213632',
 766             'like_count': int,
 767             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 768             'upload_date': '20221208',
 769             'age_limit': 0,
 770         },
 771         'params': {'noplaylist': True},
 772     }, {
 773         # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
 774         # note the id different between extraction and url
 775         'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
 776         'info_dict': {
 777             'id': '1621117577354424321',
 778             'display_id': '1621117700482416640',
 779             'ext': 'mp4',
 780             'title': '뽀 - 아 최우제 이동속도 봐',
 781             'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
 782             'duration': 24.598,
 783             'uploader': '뽀',
 784             'uploader_id': 's2FAKER',
 785             'uploader_url': 'https://twitter.com/s2FAKER',
 786             'upload_date': '20230202',
 787             'timestamp': 1675339553.0,
 788             'thumbnail': r're:https?://pbs\.twimg\.com/.+',
 789             'age_limit': 18,
 790             'tags': [],
 791             'like_count': int,
 792             'repost_count': int,
 793             'comment_count': int,
 794         },
 795     }, {
 796         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
 797         'info_dict': {
 798             'id': '1599108643743473680',
 799             'display_id': '1599108751385972737',
 800             'ext': 'mp4',
 801             'title': '\u06ea - \U0001F48B',
 802             'uploader_url': 'https://twitter.com/hlo_again',
 803             'like_count': int,
 804             'uploader_id': 'hlo_again',
 805             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
 806             'repost_count': int,
 807             'duration': 9.531,
 808             'comment_count': int,
 809             'upload_date': '20221203',
 810             'age_limit': 0,
 811             'timestamp': 1670092210.0,
 812             'tags': [],
 813             'uploader': '\u06ea',
 814             'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
 815         },
 816         'params': {'noplaylist': True},
 817     }, {
 818         # Media view count is GraphQL only, force in test
 819         'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
 820         'info_dict': {
 821             'id': '1600009362759733248',
 822             'display_id': '1600009574919962625',
 823             'ext': 'mp4',
 824             'uploader_url': 'https://twitter.com/MunTheShinobi',
 825             'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
 826             'view_count': int,
 827             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
 828             'age_limit': 0,
 829             'uploader': 'Mün The Shinobi | BlaqBoi\'s Therapist',
 830             'repost_count': int,
 831             'upload_date': '20221206',
 832             'title': 'Mün The Shinobi | BlaqBoi\'s Therapist - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
 833             'comment_count': int,
 834             'like_count': int,
 835             'tags': [],
 836             'uploader_id': 'MunTheShinobi',
 837             'duration': 139.987,
 838             'timestamp': 1670306984.0,
 839         },
 840         'params': {'extractor_args': {'twitter': {'force_graphql': ['']}}},
 841     }, {
 842         # onion route
 843         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
 844         'only_matching': True,
 845     }, {
 846         # Twitch Clip Embed
 847         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 848         'only_matching': True,
 849     }, {
 850         # promo_video_website card
 851         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 852         'only_matching': True,
 853     }, {
 854         # promo_video_convo card
 855         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
 856         'only_matching': True,
 857     }, {
 858         # appplayer card
 859         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
 860         'only_matching': True,
 861     }, {
 862         # video_direct_message card
 863         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
 864         'only_matching': True,
 865     }, {
 866         # poll2choice_video card
 867         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
 868         'only_matching': True,
 869     }, {
 870         # poll3choice_video card
 871         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
 872         'only_matching': True,
 873     }, {
 874         # poll4choice_video card
 875         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
 876         'only_matching': True,
 877     }]
 878
 879     def _graphql_to_legacy(self, data, twid):
 880         result = traverse_obj(data, (
 881             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
 882             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
 883             'tweet_results', 'result', ('tweet', None),
 884         ), expected_type=dict, default={}, get_all=False)
 885
 886         if result.get('__typename') not in ('Tweet', None):
 887             self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
 888
 889         if 'tombstone' in result:
 890             cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
 891             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
 892
 893         status = result.get('legacy', {})
 894         status.update(traverse_obj(result, {
 895             'user': ('core', 'user_results', 'result', 'legacy'),
 896             'card': ('card', 'legacy'),
 897             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
 898         }, expected_type=dict, default={}))
 899
 900         # extra transformation is needed since result does not match legacy format
 901         binding_values = {
 902             binding_value.get('key'): binding_value.get('value')
 903             for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
 904         }
 905         if binding_values:
 906             status['card']['binding_values'] = binding_values
 907
 908         return status
 909
 910     def _build_graphql_query(self, media_id):
 911         return {
 912             'variables': {
 913                 'focalTweetId': media_id,
 914                 'includePromotedContent': True,
 915                 'with_rux_injections': False,
 916                 'withBirdwatchNotes': True,
 917                 'withCommunity': True,
 918                 'withDownvotePerspective': False,
 919                 'withQuickPromoteEligibilityTweetFields': True,
 920                 'withReactionsMetadata': False,
 921                 'withReactionsPerspective': False,
 922                 'withSuperFollowsTweetFields': True,
 923                 'withSuperFollowsUserFields': True,
 924                 'withV2Timeline': True,
 925                 'withVoice': True,
 926             },
 927             'features': {
 928                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
 929                 'interactive_text_enabled': True,
 930                 'responsive_web_edit_tweet_api_enabled': True,
 931                 'responsive_web_enhance_cards_enabled': True,
 932                 'responsive_web_graphql_timeline_navigation_enabled': False,
 933                 'responsive_web_text_conversations_enabled': False,
 934                 'responsive_web_uc_gql_enabled': True,
 935                 'standardized_nudges_misinfo': True,
 936                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
 937                 'tweetypie_unmention_optimization_enabled': True,
 938                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
 939                 'verified_phone_label_enabled': False,
 940                 'vibe_api_enabled': True,
 941             },
 942         }
 943
 944     def _real_extract(self, url):
 945         twid, selected_index = self._match_valid_url(url).group('id', 'index')
 946         if self.is_logged_in or self._configuration_arg('force_graphql'):
 947             self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
 948             result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
 949             status = self._graphql_to_legacy(result, twid)
 950
 951         else:
 952             status = self._call_api(f'statuses/show/{twid}.json', twid, {
 953                 'cards_platform': 'Web-12',
 954                 'include_cards': 1,
 955                 'include_reply_count': 1,
 956                 'include_user_entities': 0,
 957                 'tweet_mode': 'extended',
 958             })
 959
 960         title = description = status['full_text'].replace('\n', ' ')
 961         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
 962         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
 963         user = status.get('user') or {}
 964         uploader = user.get('name')
 965         if uploader:
 966             title = f'{uploader} - {title}'
 967         uploader_id = user.get('screen_name')
 968
 969         info = {
 970             'id': twid,
 971             'title': title,
 972             'description': description,
 973             'uploader': uploader,
 974             'timestamp': unified_timestamp(status.get('created_at')),
 975             'uploader_id': uploader_id,
 976             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
 977             'like_count': int_or_none(status.get('favorite_count')),
 978             'repost_count': int_or_none(status.get('retweet_count')),
 979             'comment_count': int_or_none(status.get('reply_count')),
 980             'age_limit': 18 if status.get('possibly_sensitive') else 0,
 981             'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
 982         }
 983
 984         def extract_from_video_info(media):
 985             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
 986             self.write_debug(f'Extracting from video info: {media_id}')
 987             video_info = media.get('video_info') or {}
 988
 989             formats = []
 990             subtitles = {}
 991             for variant in video_info.get('variants', []):
 992                 fmts, subs = self._extract_variant_formats(variant, twid)
 993                 subtitles = self._merge_subtitles(subtitles, subs)
 994                 formats.extend(fmts)
 995
 996             thumbnails = []
 997             media_url = media.get('media_url_https') or media.get('media_url')
 998             if media_url:
 999                 def add_thumbnail(name, size):
1000                     thumbnails.append({
1001                         'id': name,
1002                         'url': update_url_query(media_url, {'name': name}),
1003                         'width': int_or_none(size.get('w') or size.get('width')),
1004                         'height': int_or_none(size.get('h') or size.get('height')),
1005                     })
1006                 for name, size in media.get('sizes', {}).items():
1007                     add_thumbnail(name, size)
1008                 add_thumbnail('orig', media.get('original_info') or {})
1009
1010             return {
1011                 'id': media_id,
1012                 'formats': formats,
1013                 'subtitles': subtitles,
1014                 'thumbnails': thumbnails,
1015                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1016                 'duration': float_or_none(video_info.get('duration_millis'), 1000),
1017                 # The codec of http formats are unknown
1018                 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1019             }
1020
1021         def extract_from_card_info(card):
1022             if not card:
1023                 return
1024
1025             self.write_debug(f'Extracting from card info: {card.get("url")}')
1026             binding_values = card['binding_values']
1027
1028             def get_binding_value(k):
1029                 o = binding_values.get(k) or {}
1030                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1031
1032             card_name = card['name'].split(':')[-1]
1033             if card_name == 'player':
1034                 yield {
1035                     '_type': 'url',
1036                     'url': get_binding_value('player_url'),
1037                 }
1038             elif card_name == 'periscope_broadcast':
1039                 yield {
1040                     '_type': 'url',
1041                     'url': get_binding_value('url') or get_binding_value('player_url'),
1042                     'ie_key': PeriscopeIE.ie_key(),
1043                 }
1044             elif card_name == 'broadcast':
1045                 yield {
1046                     '_type': 'url',
1047                     'url': get_binding_value('broadcast_url'),
1048                     'ie_key': TwitterBroadcastIE.ie_key(),
1049                 }
1050             elif card_name == 'audiospace':
1051                 yield {
1052                     '_type': 'url',
1053                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1054                     'ie_key': TwitterSpacesIE.ie_key(),
1055                 }
1056             elif card_name == 'summary':
1057                 yield {
1058                     '_type': 'url',
1059                     'url': get_binding_value('card_url'),
1060                 }
1061             elif card_name == 'unified_card':
1062                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1063                 yield from map(extract_from_video_info, traverse_obj(
1064                     unified_card, ('media_entities', ...), expected_type=dict))
1065             # amplify, promo_video_website, promo_video_convo, appplayer,
1066             # video_direct_message, poll2choice_video, poll3choice_video,
1067             # poll4choice_video, ...
1068             else:
1069                 is_amplify = card_name == 'amplify'
1070                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1071                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1072                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1073
1074                 thumbnails = []
1075                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1076                     image = get_binding_value('player_image' + suffix) or {}
1077                     image_url = image.get('url')
1078                     if not image_url or '/player-placeholder' in image_url:
1079                         continue
1080                     thumbnails.append({
1081                         'id': suffix[1:] if suffix else 'medium',
1082                         'url': image_url,
1083                         'width': int_or_none(image.get('width')),
1084                         'height': int_or_none(image.get('height')),
1085                     })
1086
1087                 yield {
1088                     'formats': formats,
1089                     'subtitles': subtitles,
1090                     'thumbnails': thumbnails,
1091                     'duration': int_or_none(get_binding_value(
1092                         'content_duration_seconds')),
1093                 }
1094
1095         videos = traverse_obj(status, (
1096             (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1097
1098         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1099             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1100         else:
1101             desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
1102             if not desired_obj:
1103                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1104             elif desired_obj.get('type') != 'video':
1105                 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1106
1107             # Restore original archive id and video index in title
1108             for index, entry in enumerate(videos, 1):
1109                 if entry.get('id') != desired_obj.get('id'):
1110                     continue
1111                 if index == 1:
1112                     info['_old_archive_ids'] = [make_archive_id(self, twid)]
1113                 if len(videos) != 1:
1114                     info['title'] += f' #{index}'
1115                 break
1116
1117             return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1118
1119         entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1120         if not entries:
1121             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1122             if not expanded_url or expanded_url == url:
1123                 raise ExtractorError('No video could be found in this tweet', expected=True)
1124
1125             return self.url_result(expanded_url, display_id=twid, **info)
1126
1127         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1128
1129         if len(entries) == 1:
1130             return entries[0]
1131
1132         for index, entry in enumerate(entries, 1):
1133             entry['title'] += f' #{index}'
1134
1135         return self.playlist_result(entries, **info)
1136
1137
1138 class TwitterAmplifyIE(TwitterBaseIE):
1139     IE_NAME = 'twitter:amplify'
1140     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1141
1142     _TEST = {
1143         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1144         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1145         'info_dict': {
1146             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1147             'ext': 'mp4',
1148             'title': 'Twitter Video',
1149             'thumbnail': 're:^https?://.*',
1150         },
1151         'params': {'format': '[protocol=https]'},
1152     }
1153
1154     def _real_extract(self, url):
1155         video_id = self._match_id(url)
1156         webpage = self._download_webpage(url, video_id)
1157
1158         vmap_url = self._html_search_meta(
1159             'twitter:amplify:vmap', webpage, 'vmap url')
1160         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1161
1162         thumbnails = []
1163         thumbnail = self._html_search_meta(
1164             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1165
1166         def _find_dimension(target):
1167             w = int_or_none(self._html_search_meta(
1168                 'twitter:%s:width' % target, webpage, fatal=False))
1169             h = int_or_none(self._html_search_meta(
1170                 'twitter:%s:height' % target, webpage, fatal=False))
1171             return w, h
1172
1173         if thumbnail:
1174             thumbnail_w, thumbnail_h = _find_dimension('image')
1175             thumbnails.append({
1176                 'url': thumbnail,
1177                 'width': thumbnail_w,
1178                 'height': thumbnail_h,
1179             })
1180
1181         video_w, video_h = _find_dimension('player')
1182         formats[0].update({
1183             'width': video_w,
1184             'height': video_h,
1185         })
1186
1187         return {
1188             'id': video_id,
1189             'title': 'Twitter Video',
1190             'formats': formats,
1191             'thumbnails': thumbnails,
1192         }
1193
1194
1195 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1196     IE_NAME = 'twitter:broadcast'
1197     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1198
1199     _TEST = {
1200         # untitled Periscope video
1201         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1202         'info_dict': {
1203             'id': '1yNGaQLWpejGj',
1204             'ext': 'mp4',
1205             'title': 'Andrea May Sahouri - Periscope Broadcast',
1206             'uploader': 'Andrea May Sahouri',
1207             'uploader_id': '1PXEdBZWpGwKe',
1208             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1209             'view_count': int,
1210         },
1211     }
1212
1213     def _real_extract(self, url):
1214         broadcast_id = self._match_id(url)
1215         broadcast = self._call_api(
1216             'broadcasts/show.json', broadcast_id,
1217             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1218         info = self._parse_broadcast_data(broadcast, broadcast_id)
1219         media_key = broadcast['media_key']
1220         source = self._call_api(
1221             f'live_video_stream/status/{media_key}', media_key)['source']
1222         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1223         if '/live_video_stream/geoblocked/' in m3u8_url:
1224             self.raise_geo_restricted()
1225         m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1226             m3u8_url).query).get('type', [None])[0]
1227         state, width, height = self._extract_common_format_info(broadcast)
1228         info['formats'] = self._extract_pscp_m3u8_formats(
1229             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1230         return info
1231
1232
1233 class TwitterSpacesIE(TwitterBaseIE):
1234     IE_NAME = 'twitter:spaces'
1235     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1236
1237     _TESTS = [{
1238         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1239         'info_dict': {
1240             'id': '1RDxlgyvNXzJL',
1241             'ext': 'm4a',
1242             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1243             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1244             'uploader': r're:Lucio Di Gaetano.*?',
1245             'uploader_id': 'luciodigaetano',
1246             'live_status': 'was_live',
1247             'timestamp': 1659877956397,
1248         },
1249         'params': {'skip_download': 'm3u8'},
1250     }]
1251
1252     SPACE_STATUS = {
1253         'notstarted': 'is_upcoming',
1254         'ended': 'was_live',
1255         'running': 'is_live',
1256         'timedout': 'post_live',
1257     }
1258
1259     def _build_graphql_query(self, space_id):
1260         return {
1261             'variables': {
1262                 'id': space_id,
1263                 'isMetatagsQuery': True,
1264                 'withDownvotePerspective': False,
1265                 'withReactionsMetadata': False,
1266                 'withReactionsPerspective': False,
1267                 'withReplays': True,
1268                 'withSuperFollowsUserFields': True,
1269                 'withSuperFollowsTweetFields': True,
1270             },
1271             'features': {
1272                 'dont_mention_me_view_api_enabled': True,
1273                 'interactive_text_enabled': True,
1274                 'responsive_web_edit_tweet_api_enabled': True,
1275                 'responsive_web_enhance_cards_enabled': True,
1276                 'responsive_web_uc_gql_enabled': True,
1277                 'spaces_2022_h2_clipping': True,
1278                 'spaces_2022_h2_spaces_communities': False,
1279                 'standardized_nudges_misinfo': True,
1280                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1281                 'vibe_api_enabled': True,
1282             },
1283         }
1284
1285     def _real_extract(self, url):
1286         space_id = self._match_id(url)
1287         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1288         if not space_data:
1289             raise ExtractorError('Twitter Space not found', expected=True)
1290
1291         metadata = space_data['metadata']
1292         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1293
1294         formats = []
1295         if live_status == 'is_upcoming':
1296             self.raise_no_formats('Twitter Space not started yet', expected=True)
1297         elif live_status == 'post_live':
1298             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1299         else:
1300             source = self._call_api(
1301                 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1302
1303             # XXX: Native downloader does not work
1304             formats = self._extract_m3u8_formats(
1305                 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1306                 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1307                 headers={'Referer': 'https://twitter.com/'})
1308             for fmt in formats:
1309                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1310
1311         participants = ', '.join(traverse_obj(
1312             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1313         return {
1314             'id': space_id,
1315             'title': metadata.get('title'),
1316             'description': f'Twitter Space participated by {participants}',
1317             'uploader': traverse_obj(
1318                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1319             'uploader_id': traverse_obj(
1320                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1321             'live_status': live_status,
1322             'timestamp': metadata.get('created_at'),
1323             'formats': formats,
1324         }
1325
1326
1327 class TwitterShortenerIE(TwitterBaseIE):
1328     IE_NAME = 'twitter:shortener'
1329     _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1330     _BASE_URL = 'https://t.co/'
1331
1332     def _real_extract(self, url):
1333         mobj = self._match_valid_url(url)
1334         eid, id = mobj.group('eid', 'id')
1335         if eid:
1336             id = eid
1337             url = self._BASE_URL + id
1338         new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1339         __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1340         if new_url.startswith(__UNSAFE_LINK):
1341             new_url = new_url.replace(__UNSAFE_LINK, "")
1342         return self.url_result(new_url)