yt_dlp/extractor/twitter.py

   1 import functools
   2 import json
   3 import random
   4 import re
   5 import urllib.parse
   6
   7 from .common import InfoExtractor
   8 from .periscope import PeriscopeBaseIE, PeriscopeIE
   9 from ..networking.exceptions import HTTPError
  10 from ..utils import (
  11     ExtractorError,
  12     dict_get,
  13     filter_dict,
  14     float_or_none,
  15     format_field,
  16     int_or_none,
  17     make_archive_id,
  18     remove_end,
  19     str_or_none,
  20     strip_or_none,
  21     traverse_obj,
  22     try_call,
  23     try_get,
  24     unified_timestamp,
  25     update_url_query,
  26     url_or_none,
  27     xpath_text,
  28 )
  29
  30
  31 class TwitterBaseIE(InfoExtractor):
  32     _NETRC_MACHINE = 'twitter'
  33     _API_BASE = 'https://api.x.com/1.1/'
  34     _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
  35     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  36     _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
  37     _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
  38     _flow_token = None
  39
  40     _LOGIN_INIT_DATA = json.dumps({
  41         'input_flow_data': {
  42             'flow_context': {
  43                 'debug_overrides': {},
  44                 'start_location': {
  45                     'location': 'unknown',
  46                 },
  47             },
  48         },
  49         'subtask_versions': {
  50             'action_list': 2,
  51             'alert_dialog': 1,
  52             'app_download_cta': 1,
  53             'check_logged_in_account': 1,
  54             'choice_selection': 3,
  55             'contacts_live_sync_permission_prompt': 0,
  56             'cta': 7,
  57             'email_verification': 2,
  58             'end_flow': 1,
  59             'enter_date': 1,
  60             'enter_email': 2,
  61             'enter_password': 5,
  62             'enter_phone': 2,
  63             'enter_recaptcha': 1,
  64             'enter_text': 5,
  65             'enter_username': 2,
  66             'generic_urt': 3,
  67             'in_app_notification': 1,
  68             'interest_picker': 3,
  69             'js_instrumentation': 1,
  70             'menu_dialog': 1,
  71             'notifications_permission_prompt': 2,
  72             'open_account': 2,
  73             'open_home_timeline': 1,
  74             'open_link': 1,
  75             'phone_verification': 4,
  76             'privacy_options': 1,
  77             'security_key': 3,
  78             'select_avatar': 4,
  79             'select_banner': 2,
  80             'settings_list': 7,
  81             'show_code': 1,
  82             'sign_up': 2,
  83             'sign_up_review': 4,
  84             'tweet_selection_urt': 1,
  85             'update_users': 1,
  86             'upload_media': 1,
  87             'user_recommendations_list': 4,
  88             'user_recommendations_urt': 1,
  89             'wait_spinner': 3,
  90             'web_modal': 1,
  91         },
  92     }, separators=(',', ':')).encode()
  93
  94     def _extract_variant_formats(self, variant, video_id):
  95         variant_url = variant.get('url')
  96         if not variant_url:
  97             return [], {}
  98         elif '.m3u8' in variant_url:
  99             fmts, subs = self._extract_m3u8_formats_and_subtitles(
 100                 variant_url, video_id, 'mp4', 'm3u8_native',
 101                 m3u8_id='hls', fatal=False)
 102             for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
 103                 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
 104                     f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
 105             return fmts, subs
 106         else:
 107             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
 108             f = {
 109                 'url': variant_url,
 110                 'format_id': 'http' + (f'-{tbr}' if tbr else ''),
 111                 'tbr': tbr,
 112             }
 113             self._search_dimensions_in_video_url(f, variant_url)
 114             return [f], {}
 115
 116     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
 117         vmap_url = url_or_none(vmap_url)
 118         if not vmap_url:
 119             return [], {}
 120         vmap_data = self._download_xml(vmap_url, video_id)
 121         formats = []
 122         subtitles = {}
 123         urls = []
 124         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
 125             video_variant.attrib['url'] = urllib.parse.unquote(
 126                 video_variant.attrib['url'])
 127             urls.append(video_variant.attrib['url'])
 128             fmts, subs = self._extract_variant_formats(
 129                 video_variant.attrib, video_id)
 130             formats.extend(fmts)
 131             subtitles = self._merge_subtitles(subtitles, subs)
 132         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
 133         if video_url not in urls:
 134             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
 135             formats.extend(fmts)
 136             subtitles = self._merge_subtitles(subtitles, subs)
 137         return formats, subtitles
 138
 139     @staticmethod
 140     def _search_dimensions_in_video_url(a_format, video_url):
 141         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
 142         if m:
 143             a_format.update({
 144                 'width': int(m.group('width')),
 145                 'height': int(m.group('height')),
 146             })
 147
 148     @property
 149     def is_logged_in(self):
 150         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
 151
 152     # XXX: Temporary workaround until twitter.com => x.com migration is completed
 153     def _real_initialize(self):
 154         if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
 155             return
 156         # User has not yet been migrated to x.com and has passed twitter.com cookies
 157         TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
 158         TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
 159
 160     @functools.cached_property
 161     def _selected_api(self):
 162         return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
 163
 164     def _fetch_guest_token(self, display_id):
 165         guest_token = traverse_obj(self._download_json(
 166             f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
 167             headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
 168             ('guest_token', {str}))
 169         if not guest_token:
 170             raise ExtractorError('Could not retrieve guest token')
 171         return guest_token
 172
 173     def _set_base_headers(self, legacy=False):
 174         bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
 175         return filter_dict({
 176             'Authorization': f'Bearer {bearer_token}',
 177             'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
 178         })
 179
 180     def _call_login_api(self, note, headers, query={}, data=None):
 181         response = self._download_json(
 182             f'{self._API_BASE}onboarding/task.json', None, note,
 183             headers=headers, query=query, data=data, expected_status=400)
 184         error = traverse_obj(response, ('errors', 0, 'message', {str}))
 185         if error:
 186             raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
 187         elif traverse_obj(response, 'status') != 'success':
 188             raise ExtractorError('Login was unsuccessful')
 189
 190         subtask = traverse_obj(
 191             response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
 192         if not subtask:
 193             raise ExtractorError('Twitter API did not return next login subtask')
 194
 195         self._flow_token = response['flow_token']
 196
 197         return subtask
 198
 199     def _perform_login(self, username, password):
 200         if self.is_logged_in:
 201             return
 202
 203         guest_token = self._fetch_guest_token(None)
 204         headers = {
 205             **self._set_base_headers(),
 206             'content-type': 'application/json',
 207             'x-guest-token': guest_token,
 208             'x-twitter-client-language': 'en',
 209             'x-twitter-active-user': 'yes',
 210             'Referer': 'https://x.com/',
 211             'Origin': 'https://x.com',
 212         }
 213
 214         def build_login_json(*subtask_inputs):
 215             return json.dumps({
 216                 'flow_token': self._flow_token,
 217                 'subtask_inputs': subtask_inputs,
 218             }, separators=(',', ':')).encode()
 219
 220         def input_dict(subtask_id, text):
 221             return {
 222                 'subtask_id': subtask_id,
 223                 'enter_text': {
 224                     'text': text,
 225                     'link': 'next_link',
 226                 },
 227             }
 228
 229         next_subtask = self._call_login_api(
 230             'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
 231
 232         while not self.is_logged_in:
 233             if next_subtask == 'LoginJsInstrumentationSubtask':
 234                 next_subtask = self._call_login_api(
 235                     'Submitting JS instrumentation response', headers, data=build_login_json({
 236                         'subtask_id': next_subtask,
 237                         'js_instrumentation': {
 238                             'response': '{}',
 239                             'link': 'next_link',
 240                         },
 241                     }))
 242
 243             elif next_subtask == 'LoginEnterUserIdentifierSSO':
 244                 next_subtask = self._call_login_api(
 245                     'Submitting username', headers, data=build_login_json({
 246                         'subtask_id': next_subtask,
 247                         'settings_list': {
 248                             'setting_responses': [{
 249                                 'key': 'user_identifier',
 250                                 'response_data': {
 251                                     'text_data': {
 252                                         'result': username,
 253                                     },
 254                                 },
 255                             }],
 256                             'link': 'next_link',
 257                         },
 258                     }))
 259
 260             elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
 261                 next_subtask = self._call_login_api(
 262                     'Submitting alternate identifier', headers,
 263                     data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
 264                         'one of username, phone number or email that was not used as --username'))))
 265
 266             elif next_subtask == 'LoginEnterPassword':
 267                 next_subtask = self._call_login_api(
 268                     'Submitting password', headers, data=build_login_json({
 269                         'subtask_id': next_subtask,
 270                         'enter_password': {
 271                             'password': password,
 272                             'link': 'next_link',
 273                         },
 274                     }))
 275
 276             elif next_subtask == 'AccountDuplicationCheck':
 277                 next_subtask = self._call_login_api(
 278                     'Submitting account duplication check', headers, data=build_login_json({
 279                         'subtask_id': next_subtask,
 280                         'check_logged_in_account': {
 281                             'link': 'AccountDuplicationCheck_false',
 282                         },
 283                     }))
 284
 285             elif next_subtask == 'LoginTwoFactorAuthChallenge':
 286                 next_subtask = self._call_login_api(
 287                     'Submitting 2FA token', headers, data=build_login_json(input_dict(
 288                         next_subtask, self._get_tfa_info('two-factor authentication token'))))
 289
 290             elif next_subtask == 'LoginAcid':
 291                 next_subtask = self._call_login_api(
 292                     'Submitting confirmation code', headers, data=build_login_json(input_dict(
 293                         next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
 294
 295             elif next_subtask == 'ArkoseLogin':
 296                 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
 297
 298             elif next_subtask == 'DenyLoginSubtask':
 299                 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
 300
 301             elif next_subtask == 'LoginSuccessSubtask':
 302                 raise ExtractorError('Twitter API did not grant auth token cookie')
 303
 304             else:
 305                 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
 306
 307         self.report_login()
 308
 309     def _call_api(self, path, video_id, query={}, graphql=False):
 310         headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
 311         headers.update({
 312             'x-twitter-auth-type': 'OAuth2Session',
 313             'x-twitter-client-language': 'en',
 314             'x-twitter-active-user': 'yes',
 315         } if self.is_logged_in else {
 316             'x-guest-token': self._fetch_guest_token(video_id),
 317         })
 318         allowed_status = {400, 401, 403, 404} if graphql else {403}
 319         result = self._download_json(
 320             (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
 321             video_id, headers=headers, query=query, expected_status=allowed_status,
 322             note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
 323
 324         if result.get('errors'):
 325             errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
 326             if errors and 'not authorized' in errors:
 327                 self.raise_login_required(remove_end(errors, '.'))
 328             raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
 329
 330         return result
 331
 332     def _build_graphql_query(self, media_id):
 333         raise NotImplementedError('Method must be implemented to support GraphQL')
 334
 335     def _call_graphql_api(self, endpoint, media_id):
 336         data = self._build_graphql_query(media_id)
 337         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 338         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 339
 340
 341 class TwitterCardIE(InfoExtractor):
 342     IE_NAME = 'twitter:card'
 343     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 344     _TESTS = [
 345         {
 346             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 347             # MD5 checksums are different in different places
 348             'info_dict': {
 349                 'id': '560070131976392705',
 350                 'ext': 'mp4',
 351                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 352                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 353                 'uploader': 'Twitter',
 354                 'uploader_id': 'Twitter',
 355                 'thumbnail': r're:^https?://.*\.jpg',
 356                 'duration': 30.033,
 357                 'timestamp': 1422366112,
 358                 'upload_date': '20150127',
 359                 'age_limit': 0,
 360                 'comment_count': int,
 361                 'tags': [],
 362                 'repost_count': int,
 363                 'like_count': int,
 364                 'display_id': '560070183650213889',
 365                 'uploader_url': 'https://twitter.com/Twitter',
 366             },
 367         },
 368         {
 369             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 370             'md5': '7137eca597f72b9abbe61e5ae0161399',
 371             'info_dict': {
 372                 'id': '623160978427936768',
 373                 'ext': 'mp4',
 374                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 375                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 376                 'uploader': 'NASA',
 377                 'uploader_id': 'NASA',
 378                 'timestamp': 1437408129,
 379                 'upload_date': '20150720',
 380                 'uploader_url': 'https://twitter.com/NASA',
 381                 'age_limit': 0,
 382                 'comment_count': int,
 383                 'like_count': int,
 384                 'repost_count': int,
 385                 'tags': ['PlutoFlyby'],
 386             },
 387             'params': {'format': '[protocol=https]'},
 388         },
 389         {
 390             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 391             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 392             'info_dict': {
 393                 'id': 'dq4Oj5quskI',
 394                 'ext': 'mp4',
 395                 'title': 'Ubuntu 11.10 Overview',
 396                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 397                 'upload_date': '20111013',
 398                 'uploader': 'OMG! UBUNTU!',
 399                 'uploader_id': 'omgubuntu',
 400                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 401                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 402                 'channel_follower_count': int,
 403                 'chapters': 'count:8',
 404                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 405                 'duration': 138,
 406                 'categories': ['Film & Animation'],
 407                 'age_limit': 0,
 408                 'comment_count': int,
 409                 'availability': 'public',
 410                 'like_count': int,
 411                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 412                 'view_count': int,
 413                 'tags': 'count:12',
 414                 'channel': 'OMG! UBUNTU!',
 415                 'playable_in_embed': True,
 416             },
 417             'add_ie': ['Youtube'],
 418         },
 419         {
 420             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 421             'info_dict': {
 422                 'id': 'iBb2x00UVlv',
 423                 'ext': 'mp4',
 424                 'upload_date': '20151113',
 425                 'uploader_id': '1189339351084113920',
 426                 'uploader': 'ArsenalTerje',
 427                 'title': 'Vine by ArsenalTerje',
 428                 'timestamp': 1447451307,
 429                 'alt_title': 'Vine by ArsenalTerje',
 430                 'comment_count': int,
 431                 'like_count': int,
 432                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 433                 'view_count': int,
 434                 'repost_count': int,
 435             },
 436             'add_ie': ['Vine'],
 437             'params': {'skip_download': 'm3u8'},
 438         },
 439         {
 440             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 441             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 442             'info_dict': {
 443                 'id': '705235433198714880',
 444                 'ext': 'mp4',
 445                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 446                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 447                 'uploader': 'Brent Yarina',
 448                 'uploader_id': 'BTNBrentYarina',
 449                 'timestamp': 1456976204,
 450                 'upload_date': '20160303',
 451             },
 452             'skip': 'This content is no longer available.',
 453         },
 454         {
 455             'url': 'https://twitter.com/i/videos/752274308186120192',
 456             'only_matching': True,
 457         },
 458     ]
 459
 460     def _real_extract(self, url):
 461         status_id = self._match_id(url)
 462         return self.url_result(
 463             'https://twitter.com/statuses/' + status_id,
 464             TwitterIE.ie_key(), status_id)
 465
 466
 467 class TwitterIE(TwitterBaseIE):
 468     IE_NAME = 'twitter'
 469     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 470
 471     _TESTS = [{
 472         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 473         'info_dict': {
 474             'id': '643211870443208704',
 475             'display_id': '643211948184596480',
 476             'ext': 'mp4',
 477             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 478             'thumbnail': r're:^https?://.*\.jpg',
 479             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 480             'channel_id': '549749560',
 481             'uploader': 'FREE THE NIPPLE',
 482             'uploader_id': 'freethenipple',
 483             'duration': 12.922,
 484             'timestamp': 1442188653,
 485             'upload_date': '20150913',
 486             'uploader_url': 'https://twitter.com/freethenipple',
 487             'comment_count': int,
 488             'repost_count': int,
 489             'like_count': int,
 490             'tags': [],
 491             'age_limit': 18,
 492             '_old_archive_ids': ['twitter 643211948184596480'],
 493         },
 494         'skip': 'Requires authentication',
 495     }, {
 496         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 497         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 498         'info_dict': {
 499             'id': '657991469417025536',
 500             'ext': 'mp4',
 501             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 502             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 503             'thumbnail': r're:^https?://.*\.png',
 504             'uploader': 'Gifs',
 505             'uploader_id': 'giphz',
 506         },
 507         'expected_warnings': ['height', 'width'],
 508         'skip': 'Account suspended',
 509     }, {
 510         'url': 'https://twitter.com/starwars/status/665052190608723968',
 511         'info_dict': {
 512             'id': '665052190608723968',
 513             'display_id': '665052190608723968',
 514             'ext': 'mp4',
 515             'title': r're:Star Wars.*A new beginning is coming December 18.*',
 516             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 517             'channel_id': '20106852',
 518             'uploader_id': 'starwars',
 519             'uploader': r're:Star Wars.*',
 520             'timestamp': 1447395772,
 521             'upload_date': '20151113',
 522             'uploader_url': 'https://twitter.com/starwars',
 523             'comment_count': int,
 524             'repost_count': int,
 525             'like_count': int,
 526             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 527             'age_limit': 0,
 528             '_old_archive_ids': ['twitter 665052190608723968'],
 529         },
 530     }, {
 531         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 532         'info_dict': {
 533             'id': '705235433198714880',
 534             'ext': 'mp4',
 535             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 536             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 537             'uploader_id': 'BTNBrentYarina',
 538             'uploader': 'Brent Yarina',
 539             'timestamp': 1456976204,
 540             'upload_date': '20160303',
 541             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 542             'comment_count': int,
 543             'repost_count': int,
 544             'like_count': int,
 545             'tags': [],
 546             'age_limit': 0,
 547         },
 548         'params': {
 549             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 550             # Test case of TwitterCardIE
 551             'skip_download': True,
 552         },
 553         'skip': 'Dead external link',
 554     }, {
 555         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 556         'info_dict': {
 557             'id': '700207414000242688',
 558             'display_id': '700207533655363584',
 559             'ext': 'mp4',
 560             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 561             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 562             'thumbnail': r're:^https?://.*\.jpg',
 563             'channel_id': '1383165541',
 564             'uploader': 'jaydin donte geer',
 565             'uploader_id': 'jaydingeer',
 566             'duration': 30.0,
 567             'timestamp': 1455777459,
 568             'upload_date': '20160218',
 569             'uploader_url': 'https://twitter.com/jaydingeer',
 570             'comment_count': int,
 571             'repost_count': int,
 572             'like_count': int,
 573             'tags': ['Damndaniel'],
 574             'age_limit': 0,
 575             '_old_archive_ids': ['twitter 700207533655363584'],
 576         },
 577     }, {
 578         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 579         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 580         'info_dict': {
 581             'id': 'MIOxnrUteUd',
 582             'ext': 'mp4',
 583             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 584             'uploader': 'TAKUMA',
 585             'uploader_id': '1004126642786242560',
 586             'timestamp': 1402826626,
 587             'upload_date': '20140615',
 588             'thumbnail': r're:^https?://.*\.jpg',
 589             'alt_title': 'Vine by TAKUMA',
 590             'comment_count': int,
 591             'repost_count': int,
 592             'like_count': int,
 593             'view_count': int,
 594         },
 595         'add_ie': ['Vine'],
 596     }, {
 597         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 598         'info_dict': {
 599             'id': '717462543795523584',
 600             'display_id': '719944021058060289',
 601             'ext': 'mp4',
 602             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 603             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 604             'channel_id': '701615052',
 605             'uploader_id': 'CaptainAmerica',
 606             'uploader': 'Captain America',
 607             'duration': 3.17,
 608             'timestamp': 1460483005,
 609             'upload_date': '20160412',
 610             'uploader_url': 'https://twitter.com/CaptainAmerica',
 611             'thumbnail': r're:^https?://.*\.jpg',
 612             'comment_count': int,
 613             'repost_count': int,
 614             'like_count': int,
 615             'tags': [],
 616             'age_limit': 0,
 617             '_old_archive_ids': ['twitter 719944021058060289'],
 618         },
 619     }, {
 620         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 621         'info_dict': {
 622             'id': '1zqKVVlkqLaKB',
 623             'ext': 'mp4',
 624             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 625             'upload_date': '20160923',
 626             'uploader_id': '1PmKqpJdOJQoY',
 627             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 628             'timestamp': 1474613214,
 629             'thumbnail': r're:^https?://.*\.jpg',
 630         },
 631         'add_ie': ['Periscope'],
 632         'skip': 'Broadcast not found',
 633     }, {
 634         # has mp4 formats via mobile API
 635         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 636         'info_dict': {
 637             'id': '852077943283097602',
 638             'ext': 'mp4',
 639             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 640             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 641             'channel_id': '2526757026',
 642             'uploader': 'عالم الأخبار',
 643             'uploader_id': 'news_al3alm',
 644             'duration': 277.4,
 645             'timestamp': 1492000653,
 646             'upload_date': '20170412',
 647             'display_id': '852138619213144067',
 648             'age_limit': 0,
 649             'uploader_url': 'https://twitter.com/news_al3alm',
 650             'thumbnail': r're:^https?://.*\.jpg',
 651             'tags': [],
 652             'repost_count': int,
 653             'like_count': int,
 654             'comment_count': int,
 655             '_old_archive_ids': ['twitter 852138619213144067'],
 656         },
 657     }, {
 658         'url': 'https://twitter.com/i/web/status/910031516746514432',
 659         'info_dict': {
 660             'id': '910030238373089285',
 661             'display_id': '910031516746514432',
 662             'ext': 'mp4',
 663             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 664             'thumbnail': r're:^https?://.*\.jpg',
 665             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 666             'channel_id': '2319432498',
 667             'uploader': 'Préfet de Guadeloupe',
 668             'uploader_id': 'Prefet971',
 669             'duration': 47.48,
 670             'timestamp': 1505803395,
 671             'upload_date': '20170919',
 672             'uploader_url': 'https://twitter.com/Prefet971',
 673             'comment_count': int,
 674             'repost_count': int,
 675             'like_count': int,
 676             'tags': ['Maria'],
 677             'age_limit': 0,
 678             '_old_archive_ids': ['twitter 910031516746514432'],
 679         },
 680         'params': {
 681             'skip_download': True,  # requires ffmpeg
 682         },
 683     }, {
 684         # card via api.twitter.com/1.1/videos/tweet/config
 685         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 686         'info_dict': {
 687             'id': '1001551417340022785',
 688             'display_id': '1001551623938805763',
 689             'ext': 'mp4',
 690             'title': 're:.*?Shep is on a roll today.*?',
 691             'thumbnail': r're:^https?://.*\.jpg',
 692             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 693             'channel_id': '255036353',
 694             'uploader': 'Lis Power',
 695             'uploader_id': 'LisPower1',
 696             'duration': 111.278,
 697             'timestamp': 1527623489,
 698             'upload_date': '20180529',
 699             'uploader_url': 'https://twitter.com/LisPower1',
 700             'comment_count': int,
 701             'repost_count': int,
 702             'like_count': int,
 703             'tags': [],
 704             'age_limit': 0,
 705             '_old_archive_ids': ['twitter 1001551623938805763'],
 706         },
 707         'params': {
 708             'skip_download': True,  # requires ffmpeg
 709         },
 710     }, {
 711         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 712         'info_dict': {
 713             'id': '1087791272830607360',
 714             'display_id': '1087791357756956680',
 715             'ext': 'mp4',
 716             'title': 'X - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 717             'thumbnail': r're:^https?://.*\.jpg',
 718             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 719             'uploader': 'X',
 720             'uploader_id': 'X',
 721             'duration': 61.567,
 722             'timestamp': 1548184644,
 723             'upload_date': '20190122',
 724             'uploader_url': 'https://twitter.com/X',
 725             'comment_count': int,
 726             'repost_count': int,
 727             'like_count': int,
 728             'view_count': int,
 729             'tags': [],
 730             'age_limit': 0,
 731         },
 732         'skip': 'This Tweet is unavailable',
 733     }, {
 734         # not available in Periscope
 735         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 736         'info_dict': {
 737             'id': '1vOGwqejwoWxB',
 738             'ext': 'mp4',
 739             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 740             'uploader': 'Vivi',
 741             'uploader_id': '1eVjYOLGkGrQL',
 742             'thumbnail': r're:^https?://.*\.jpg',
 743             'tags': ['EduTECH2019'],
 744             'view_count': int,
 745         },
 746         'add_ie': ['TwitterBroadcast'],
 747         'skip': 'Broadcast no longer exists',
 748     }, {
 749         # unified card
 750         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 751         'info_dict': {
 752             'id': '1349774757969989634',
 753             'display_id': '1349794411333394432',
 754             'ext': 'mp4',
 755             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 756             'thumbnail': r're:^https?://.*\.jpg',
 757             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 758             'channel_id': '18552281',
 759             'uploader': 'Brooklyn Nets',
 760             'uploader_id': 'BrooklynNets',
 761             'duration': 324.484,
 762             'timestamp': 1610651040,
 763             'upload_date': '20210114',
 764             'uploader_url': 'https://twitter.com/BrooklynNets',
 765             'comment_count': int,
 766             'repost_count': int,
 767             'like_count': int,
 768             'tags': [],
 769             'age_limit': 0,
 770             '_old_archive_ids': ['twitter 1349794411333394432'],
 771         },
 772         'params': {
 773             'skip_download': True,
 774         },
 775     }, {
 776         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 777         'info_dict': {
 778             'id': '1577855447914409984',
 779             'display_id': '1577855540407197696',
 780             'ext': 'mp4',
 781             'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
 782             'description': 'md5:b9c3699335447391d11753ab21c70a74',
 783             'upload_date': '20221006',
 784             'channel_id': '143077138',
 785             'uploader': 'Oshtru',
 786             'uploader_id': 'oshtru',
 787             'uploader_url': 'https://twitter.com/oshtru',
 788             'thumbnail': r're:^https?://.*\.jpg',
 789             'duration': 30.03,
 790             'timestamp': 1665025050,
 791             'comment_count': int,
 792             'repost_count': int,
 793             'like_count': int,
 794             'tags': [],
 795             'age_limit': 0,
 796             '_old_archive_ids': ['twitter 1577855540407197696'],
 797         },
 798         'params': {'skip_download': True},
 799     }, {
 800         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 801         'info_dict': {
 802             'id': '1577719286659006464',
 803             'title': 'Ultima Reload - Test',
 804             'description': 'Test https://t.co/Y3KEZD7Dad',
 805             'channel_id': '168922496',
 806             'uploader': 'Ultima Reload',
 807             'uploader_id': 'UltimaShadowX',
 808             'uploader_url': 'https://twitter.com/UltimaShadowX',
 809             'upload_date': '20221005',
 810             'timestamp': 1664992565,
 811             'comment_count': int,
 812             'repost_count': int,
 813             'like_count': int,
 814             'tags': [],
 815             'age_limit': 0,
 816         },
 817         'playlist_count': 4,
 818         'params': {'skip_download': True},
 819     }, {
 820         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 821         'info_dict': {
 822             'id': '1575559336759263233',
 823             'display_id': '1575560063510810624',
 824             'ext': 'mp4',
 825             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 826             'thumbnail': r're:^https?://.*\.jpg',
 827             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 828             'channel_id': '1094109584',
 829             'uploader': 'Max Olson',
 830             'uploader_id': 'MesoMax919',
 831             'uploader_url': 'https://twitter.com/MesoMax919',
 832             'duration': 21.321,
 833             'timestamp': 1664477766,
 834             'upload_date': '20220929',
 835             'comment_count': int,
 836             'repost_count': int,
 837             'like_count': int,
 838             'tags': ['HurricaneIan'],
 839             'age_limit': 0,
 840             '_old_archive_ids': ['twitter 1575560063510810624'],
 841         },
 842     }, {
 843         # Adult content, fails if not logged in
 844         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 845         'info_dict': {
 846             'id': '1575199163847000068',
 847             'display_id': '1575199173472927762',
 848             'ext': 'mp4',
 849             'title': str,
 850             'description': str,
 851             'channel_id': '1217167793541480450',
 852             'uploader': str,
 853             'uploader_id': 'Rizdraws',
 854             'uploader_url': 'https://twitter.com/Rizdraws',
 855             'upload_date': '20220928',
 856             'timestamp': 1664391723,
 857             'thumbnail': r're:^https?://.+\.jpg',
 858             'like_count': int,
 859             'repost_count': int,
 860             'comment_count': int,
 861             'age_limit': 18,
 862             'tags': [],
 863             '_old_archive_ids': ['twitter 1575199173472927762'],
 864         },
 865         'params': {'skip_download': 'The media could not be played'},
 866         'skip': 'Requires authentication',
 867     }, {
 868         # Playlist result only with graphql API
 869         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 870         'playlist_mincount': 2,
 871         'info_dict': {
 872             'id': '1395079556562706435',
 873             'title': str,
 874             'tags': [],
 875             'channel_id': '21539378',
 876             'uploader': str,
 877             'like_count': int,
 878             'upload_date': '20210519',
 879             'age_limit': 0,
 880             'repost_count': int,
 881             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
 882             'uploader_id': 'Srirachachau',
 883             'comment_count': int,
 884             'uploader_url': 'https://twitter.com/Srirachachau',
 885             'timestamp': 1621447860,
 886         },
 887     }, {
 888         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 889         'playlist_mincount': 2,
 890         'info_dict': {
 891             'id': '1578353380363501568',
 892             'title': str,
 893             'channel_id': '2195866214',
 894             'uploader_id': 'DavidToons_',
 895             'repost_count': int,
 896             'like_count': int,
 897             'uploader': str,
 898             'timestamp': 1665143744,
 899             'uploader_url': 'https://twitter.com/DavidToons_',
 900             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
 901             'tags': [],
 902             'comment_count': int,
 903             'upload_date': '20221007',
 904             'age_limit': 0,
 905         },
 906     }, {
 907         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 908         'playlist_count': 2,
 909         'info_dict': {
 910             'id': '1578401165338976258',
 911             'title': str,
 912             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 913             'channel_id': '19338359',
 914             'uploader': str,
 915             'uploader_id': 'primevideouk',
 916             'timestamp': 1665155137,
 917             'upload_date': '20221007',
 918             'age_limit': 0,
 919             'uploader_url': 'https://twitter.com/primevideouk',
 920             'comment_count': int,
 921             'repost_count': int,
 922             'like_count': int,
 923             'tags': ['TheRingsOfPower'],
 924         },
 925     }, {
 926         # Twitter Spaces
 927         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 928         'info_dict': {
 929             'id': '1lPJqmBeeNAJb',
 930             'ext': 'm4a',
 931             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 932             'uploader': r're:Monique Camarra.+?',
 933             'uploader_id': 'MoniqueCamarra',
 934             'live_status': 'was_live',
 935             'release_timestamp': 1658417414,
 936             'description': 'md5:acce559345fd49f129c20dbcda3f1201',
 937             'timestamp': 1658407771,
 938             'release_date': '20220721',
 939             'upload_date': '20220721',
 940         },
 941         'add_ie': ['TwitterSpaces'],
 942         'params': {'skip_download': 'm3u8'},
 943         'skip': 'Requires authentication',
 944     }, {
 945         # URL specifies video number but --yes-playlist
 946         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
 947         'playlist_mincount': 2,
 948         'info_dict': {
 949             'id': '1600649710662213632',
 950             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 951             'timestamp': 1670459604.0,
 952             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 953             'comment_count': int,
 954             'uploader_id': 'CTVJLaidlaw',
 955             'channel_id': '80082014',
 956             'repost_count': int,
 957             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 958             'upload_date': '20221208',
 959             'age_limit': 0,
 960             'uploader': 'Jocelyn Laidlaw',
 961             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 962             'like_count': int,
 963         },
 964     }, {
 965         # URL specifies video number and --no-playlist
 966         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
 967         'info_dict': {
 968             'id': '1600649511827013632',
 969             'ext': 'mp4',
 970             'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
 971             'thumbnail': r're:^https?://.+\.jpg',
 972             'timestamp': 1670459604.0,
 973             'channel_id': '80082014',
 974             'uploader_id': 'CTVJLaidlaw',
 975             'uploader': 'Jocelyn Laidlaw',
 976             'repost_count': int,
 977             'comment_count': int,
 978             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 979             'duration': 102.226,
 980             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 981             'display_id': '1600649710662213632',
 982             'like_count': int,
 983             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 984             'upload_date': '20221208',
 985             'age_limit': 0,
 986             '_old_archive_ids': ['twitter 1600649710662213632'],
 987         },
 988         'params': {'noplaylist': True},
 989     }, {
 990         # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
 991         # note the id different between extraction and url
 992         'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
 993         'info_dict': {
 994             'id': '1621117577354424321',
 995             'display_id': '1621117700482416640',
 996             'ext': 'mp4',
 997             'title': '뽀 - 아 최우제 이동속도 봐',
 998             'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
 999             'duration': 24.598,
1000             'channel_id': '1281839411068432384',
1001             'uploader': '뽀',
1002             'uploader_id': 's2FAKER',
1003             'uploader_url': 'https://twitter.com/s2FAKER',
1004             'upload_date': '20230202',
1005             'timestamp': 1675339553.0,
1006             'thumbnail': r're:https?://pbs\.twimg\.com/.+',
1007             'age_limit': 18,
1008             'tags': [],
1009             'like_count': int,
1010             'repost_count': int,
1011             'comment_count': int,
1012             '_old_archive_ids': ['twitter 1621117700482416640'],
1013         },
1014         'skip': 'Requires authentication',
1015     }, {
1016         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1017         'info_dict': {
1018             'id': '1599108643743473680',
1019             'display_id': '1599108751385972737',
1020             'ext': 'mp4',
1021             'title': '\u06ea - \U0001F48B',
1022             'channel_id': '1347791436809441283',
1023             'uploader_url': 'https://twitter.com/hlo_again',
1024             'like_count': int,
1025             'uploader_id': 'hlo_again',
1026             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1027             'repost_count': int,
1028             'duration': 9.531,
1029             'comment_count': int,
1030             'upload_date': '20221203',
1031             'age_limit': 0,
1032             'timestamp': 1670092210.0,
1033             'tags': [],
1034             'uploader': '\u06ea',
1035             'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1036             '_old_archive_ids': ['twitter 1599108751385972737'],
1037         },
1038         'params': {'noplaylist': True},
1039     }, {
1040         'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1041         'info_dict': {
1042             'id': '1600009362759733248',
1043             'display_id': '1600009574919962625',
1044             'ext': 'mp4',
1045             'channel_id': '211814412',
1046             'uploader_url': 'https://twitter.com/MunTheShinobi',
1047             'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1048             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1049             'age_limit': 0,
1050             'uploader': 'Mün',
1051             'repost_count': int,
1052             'upload_date': '20221206',
1053             'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1054             'comment_count': int,
1055             'like_count': int,
1056             'tags': [],
1057             'uploader_id': 'MunTheShinobi',
1058             'duration': 139.987,
1059             'timestamp': 1670306984.0,
1060             '_old_archive_ids': ['twitter 1600009574919962625'],
1061         },
1062     }, {
1063         # retweeted_status (private)
1064         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1065         'info_dict': {
1066             'id': '1623274794488659969',
1067             'display_id': '1623739803874349067',
1068             'ext': 'mp4',
1069             'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
1070             'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1071             'uploader': 'Johnny Bullets',
1072             'uploader_id': 'Johnnybull3ts',
1073             'uploader_url': 'https://twitter.com/Johnnybull3ts',
1074             'age_limit': 0,
1075             'tags': [],
1076             'duration': 8.033,
1077             'timestamp': 1675853859.0,
1078             'upload_date': '20230208',
1079             'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1080             'like_count': int,
1081             'repost_count': int,
1082         },
1083         'skip': 'Protected tweet',
1084     }, {
1085         # retweeted_status
1086         'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1087         'info_dict': {
1088             'id': '1694928337846538240',
1089             'ext': 'mp4',
1090             'display_id': '1695424220702888009',
1091             'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1092             'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1093             'channel_id': '15212187',
1094             'uploader': 'Benny Johnson',
1095             'uploader_id': 'bennyjohnson',
1096             'uploader_url': 'https://twitter.com/bennyjohnson',
1097             'age_limit': 0,
1098             'tags': [],
1099             'duration': 45.001,
1100             'timestamp': 1692962814.0,
1101             'upload_date': '20230825',
1102             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1103             'like_count': int,
1104             'repost_count': int,
1105             'comment_count': int,
1106             '_old_archive_ids': ['twitter 1695424220702888009'],
1107         },
1108     }, {
1109         # retweeted_status w/ legacy API
1110         'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1111         'info_dict': {
1112             'id': '1694928337846538240',
1113             'ext': 'mp4',
1114             'display_id': '1695424220702888009',
1115             'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1116             'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1117             'channel_id': '15212187',
1118             'uploader': 'Benny Johnson',
1119             'uploader_id': 'bennyjohnson',
1120             'uploader_url': 'https://twitter.com/bennyjohnson',
1121             'age_limit': 0,
1122             'tags': [],
1123             'duration': 45.001,
1124             'timestamp': 1692962814.0,
1125             'upload_date': '20230825',
1126             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1127             'like_count': int,
1128             'repost_count': int,
1129             '_old_archive_ids': ['twitter 1695424220702888009'],
1130         },
1131         'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1132     }, {
1133         # Broadcast embedded in tweet
1134         'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1135         'info_dict': {
1136             'id': '1rmxPMjLzAXKN',
1137             'ext': 'mp4',
1138             'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1139             'uploader': 'Jessica Dobson',
1140             'uploader_id': 'JessicaDobsonWX',
1141             'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1142             'timestamp': 1701566398,
1143             'upload_date': '20231203',
1144             'live_status': 'was_live',
1145             'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1146             'concurrent_view_count': int,
1147             'view_count': int,
1148         },
1149         'add_ie': ['TwitterBroadcast'],
1150     }, {
1151         # Animated gif and quote tweet video
1152         'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1153         'playlist_mincount': 2,
1154         'info_dict': {
1155             'id': '1696256659889565950',
1156             'title': 'BAKOON - https://t.co/zom968d0a0',
1157             'description': 'https://t.co/zom968d0a0',
1158             'tags': [],
1159             'channel_id': '1263540390',
1160             'uploader': 'BAKOON',
1161             'uploader_id': 'BAKKOOONN',
1162             'uploader_url': 'https://twitter.com/BAKKOOONN',
1163             'age_limit': 18,
1164             'timestamp': 1693254077.0,
1165             'upload_date': '20230828',
1166             'like_count': int,
1167             'comment_count': int,
1168             'repost_count': int,
1169         },
1170         'skip': 'Requires authentication',
1171     }, {
1172         # "stale tweet" with typename "TweetWithVisibilityResults"
1173         'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1174         'md5': '511377ff8dfa7545307084dca4dce319',
1175         'info_dict': {
1176             'id': '1724883339285544960',
1177             'ext': 'mp4',
1178             'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1179             'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1180             'display_id': '1724884212803834154',
1181             'channel_id': '337808606',
1182             'uploader': 'Robert F. Kennedy Jr',
1183             'uploader_id': 'RobertKennedyJr',
1184             'uploader_url': 'https://twitter.com/RobertKennedyJr',
1185             'upload_date': '20231115',
1186             'timestamp': 1700079417.0,
1187             'duration': 341.048,
1188             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1189             'tags': ['Kennedy24'],
1190             'repost_count': int,
1191             'like_count': int,
1192             'comment_count': int,
1193             'age_limit': 0,
1194             '_old_archive_ids': ['twitter 1724884212803834154'],
1195         },
1196     }, {
1197         # x.com
1198         'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1199         'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1200         'info_dict': {
1201             'id': '1790637589910654976',
1202             'ext': 'mp4',
1203             'title': 'Historic Vids - One of the most intense moments in history',
1204             'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1205             'display_id': '1790637656616943991',
1206             'uploader': 'Historic Vids',
1207             'uploader_id': 'historyinmemes',
1208             'uploader_url': 'https://twitter.com/historyinmemes',
1209             'channel_id': '855481986290524160',
1210             'upload_date': '20240515',
1211             'timestamp': 1715756260.0,
1212             'duration': 15.488,
1213             'tags': [],
1214             'comment_count': int,
1215             'repost_count': int,
1216             'like_count': int,
1217             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1218             'age_limit': 0,
1219             '_old_archive_ids': ['twitter 1790637656616943991'],
1220         },
1221     }, {
1222         # onion route
1223         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1224         'only_matching': True,
1225     }, {
1226         # Twitch Clip Embed
1227         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1228         'only_matching': True,
1229     }, {
1230         # promo_video_website card
1231         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1232         'only_matching': True,
1233     }, {
1234         # promo_video_convo card
1235         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1236         'only_matching': True,
1237     }, {
1238         # appplayer card
1239         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1240         'only_matching': True,
1241     }, {
1242         # video_direct_message card
1243         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1244         'only_matching': True,
1245     }, {
1246         # poll2choice_video card
1247         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1248         'only_matching': True,
1249     }, {
1250         # poll3choice_video card
1251         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1252         'only_matching': True,
1253     }, {
1254         # poll4choice_video card
1255         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1256         'only_matching': True,
1257     }]
1258
1259     _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1260
1261     @property
1262     def _GRAPHQL_ENDPOINT(self):
1263         if self.is_logged_in:
1264             return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1265         return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1266
1267     def _graphql_to_legacy(self, data, twid):
1268         result = traverse_obj(data, (
1269             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1270             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1271             'tweet_results', 'result', ('tweet', None), {dict},
1272         ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1273             data, ('tweetResult', 'result', {dict}), default={})
1274
1275         typename = result.get('__typename')
1276         if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1277             self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1278
1279         if 'tombstone' in result:
1280             cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1281             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1282         elif typename == 'TweetUnavailable':
1283             reason = result.get('reason')
1284             if reason == 'NsfwLoggedOut':
1285                 self.raise_login_required('NSFW tweet requires authentication')
1286             elif reason == 'Protected':
1287                 self.raise_login_required('You are not authorized to view this protected tweet')
1288             raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1289         # Result for "stale tweet" needs additional transformation
1290         elif typename == 'TweetWithVisibilityResults':
1291             result = traverse_obj(result, ('tweet', {dict})) or {}
1292
1293         status = result.get('legacy', {})
1294         status.update(traverse_obj(result, {
1295             'user': ('core', 'user_results', 'result', 'legacy'),
1296             'card': ('card', 'legacy'),
1297             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1298             'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1299         }, expected_type=dict, default={}))
1300
1301         # extra transformations needed since result does not match legacy format
1302         if status.get('retweeted_status'):
1303             status['retweeted_status']['user'] = traverse_obj(status, (
1304                 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1305
1306         binding_values = {
1307             binding_value.get('key'): binding_value.get('value')
1308             for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1309         }
1310         if binding_values:
1311             status['card']['binding_values'] = binding_values
1312
1313         return status
1314
1315     def _build_graphql_query(self, media_id):
1316         return {
1317             'variables': {
1318                 'focalTweetId': media_id,
1319                 'includePromotedContent': True,
1320                 'with_rux_injections': False,
1321                 'withBirdwatchNotes': True,
1322                 'withCommunity': True,
1323                 'withDownvotePerspective': False,
1324                 'withQuickPromoteEligibilityTweetFields': True,
1325                 'withReactionsMetadata': False,
1326                 'withReactionsPerspective': False,
1327                 'withSuperFollowsTweetFields': True,
1328                 'withSuperFollowsUserFields': True,
1329                 'withV2Timeline': True,
1330                 'withVoice': True,
1331             },
1332             'features': {
1333                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1334                 'interactive_text_enabled': True,
1335                 'responsive_web_edit_tweet_api_enabled': True,
1336                 'responsive_web_enhance_cards_enabled': True,
1337                 'responsive_web_graphql_timeline_navigation_enabled': False,
1338                 'responsive_web_text_conversations_enabled': False,
1339                 'responsive_web_uc_gql_enabled': True,
1340                 'standardized_nudges_misinfo': True,
1341                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1342                 'tweetypie_unmention_optimization_enabled': True,
1343                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1344                 'verified_phone_label_enabled': False,
1345                 'vibe_api_enabled': True,
1346             },
1347         } if self.is_logged_in else {
1348             'variables': {
1349                 'tweetId': media_id,
1350                 'withCommunity': False,
1351                 'includePromotedContent': False,
1352                 'withVoice': False,
1353             },
1354             'features': {
1355                 'creator_subscriptions_tweet_preview_api_enabled': True,
1356                 'tweetypie_unmention_optimization_enabled': True,
1357                 'responsive_web_edit_tweet_api_enabled': True,
1358                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1359                 'view_counts_everywhere_api_enabled': True,
1360                 'longform_notetweets_consumption_enabled': True,
1361                 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1362                 'tweet_awards_web_tipping_enabled': False,
1363                 'freedom_of_speech_not_reach_fetch_enabled': True,
1364                 'standardized_nudges_misinfo': True,
1365                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1366                 'longform_notetweets_rich_text_read_enabled': True,
1367                 'longform_notetweets_inline_media_enabled': True,
1368                 'responsive_web_graphql_exclude_directive_enabled': True,
1369                 'verified_phone_label_enabled': False,
1370                 'responsive_web_media_download_video_enabled': False,
1371                 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1372                 'responsive_web_graphql_timeline_navigation_enabled': True,
1373                 'responsive_web_enhance_cards_enabled': False,
1374             },
1375             'fieldToggles': {
1376                 'withArticleRichContentState': False,
1377             },
1378         }
1379
1380     def _call_syndication_api(self, twid):
1381         self.report_warning(
1382             'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1383         status = self._download_json(
1384             'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1385             headers={'User-Agent': 'Googlebot'}, query={
1386                 'id': twid,
1387                 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1388                 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1389             })
1390         if not status:
1391             raise ExtractorError('Syndication endpoint returned empty JSON response')
1392         # Transform the result so its structure matches that of legacy/graphql
1393         media = []
1394         for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1395             detail['id_str'] = traverse_obj(detail, (
1396                 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1397             media.append(detail)
1398         status['extended_entities'] = {'media': media}
1399
1400         return status
1401
1402     def _extract_status(self, twid):
1403         if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1404             raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1405
1406         try:
1407             if self.is_logged_in or self._selected_api == 'graphql':
1408                 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1409             elif self._selected_api == 'legacy':
1410                 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1411                     'cards_platform': 'Web-12',
1412                     'include_cards': 1,
1413                     'include_reply_count': 1,
1414                     'include_user_entities': 0,
1415                     'tweet_mode': 'extended',
1416                 })
1417         except ExtractorError as e:
1418             if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1419                 raise
1420             self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1421             status = self._call_syndication_api(twid)
1422
1423         if self._selected_api == 'syndication':
1424             status = self._call_syndication_api(twid)
1425
1426         return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1427
1428     def _real_extract(self, url):
1429         twid, selected_index = self._match_valid_url(url).group('id', 'index')
1430         status = self._extract_status(twid)
1431
1432         title = description = traverse_obj(
1433             status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1434         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
1435         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1436         user = status.get('user') or {}
1437         uploader = user.get('name')
1438         if uploader:
1439             title = f'{uploader} - {title}'
1440         uploader_id = user.get('screen_name')
1441
1442         info = {
1443             'id': twid,
1444             'title': title,
1445             'description': description,
1446             'uploader': uploader,
1447             'timestamp': unified_timestamp(status.get('created_at')),
1448             'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
1449             'uploader_id': uploader_id,
1450             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1451             'like_count': int_or_none(status.get('favorite_count')),
1452             'repost_count': int_or_none(status.get('retweet_count')),
1453             'comment_count': int_or_none(status.get('reply_count')),
1454             'age_limit': 18 if status.get('possibly_sensitive') else 0,
1455             'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1456         }
1457
1458         def extract_from_video_info(media):
1459             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1460             self.write_debug(f'Extracting from video info: {media_id}')
1461
1462             formats = []
1463             subtitles = {}
1464             for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1465                 fmts, subs = self._extract_variant_formats(variant, twid)
1466                 subtitles = self._merge_subtitles(subtitles, subs)
1467                 formats.extend(fmts)
1468
1469             thumbnails = []
1470             media_url = media.get('media_url_https') or media.get('media_url')
1471             if media_url:
1472                 def add_thumbnail(name, size):
1473                     thumbnails.append({
1474                         'id': name,
1475                         'url': update_url_query(media_url, {'name': name}),
1476                         'width': int_or_none(size.get('w') or size.get('width')),
1477                         'height': int_or_none(size.get('h') or size.get('height')),
1478                     })
1479                 for name, size in media.get('sizes', {}).items():
1480                     add_thumbnail(name, size)
1481                 add_thumbnail('orig', media.get('original_info') or {})
1482
1483             return {
1484                 'id': media_id,
1485                 'formats': formats,
1486                 'subtitles': subtitles,
1487                 'thumbnails': thumbnails,
1488                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),  # No longer available
1489                 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1490                 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1491                 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'),  # http format codec is unknown
1492             }
1493
1494         def extract_from_card_info(card):
1495             if not card:
1496                 return
1497
1498             self.write_debug(f'Extracting from card info: {card.get("url")}')
1499             binding_values = card['binding_values']
1500
1501             def get_binding_value(k):
1502                 o = binding_values.get(k) or {}
1503                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1504
1505             card_name = card['name'].split(':')[-1]
1506             if card_name == 'player':
1507                 yield {
1508                     '_type': 'url',
1509                     'url': get_binding_value('player_url'),
1510                 }
1511             elif card_name == 'periscope_broadcast':
1512                 yield {
1513                     '_type': 'url',
1514                     'url': get_binding_value('url') or get_binding_value('player_url'),
1515                     'ie_key': PeriscopeIE.ie_key(),
1516                 }
1517             elif card_name == 'broadcast':
1518                 yield {
1519                     '_type': 'url',
1520                     'url': get_binding_value('broadcast_url'),
1521                     'ie_key': TwitterBroadcastIE.ie_key(),
1522                 }
1523             elif card_name == 'audiospace':
1524                 yield {
1525                     '_type': 'url',
1526                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1527                     'ie_key': TwitterSpacesIE.ie_key(),
1528                 }
1529             elif card_name == 'summary':
1530                 yield {
1531                     '_type': 'url',
1532                     'url': get_binding_value('card_url'),
1533                 }
1534             elif card_name == 'unified_card':
1535                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1536                 yield from map(extract_from_video_info, traverse_obj(
1537                     unified_card, ('media_entities', ...), expected_type=dict))
1538             # amplify, promo_video_website, promo_video_convo, appplayer,
1539             # video_direct_message, poll2choice_video, poll3choice_video,
1540             # poll4choice_video, ...
1541             else:
1542                 is_amplify = card_name == 'amplify'
1543                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1544                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1545                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1546
1547                 thumbnails = []
1548                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1549                     image = get_binding_value('player_image' + suffix) or {}
1550                     image_url = image.get('url')
1551                     if not image_url or '/player-placeholder' in image_url:
1552                         continue
1553                     thumbnails.append({
1554                         'id': suffix[1:] if suffix else 'medium',
1555                         'url': image_url,
1556                         'width': int_or_none(image.get('width')),
1557                         'height': int_or_none(image.get('height')),
1558                     })
1559
1560                 yield {
1561                     'formats': formats,
1562                     'subtitles': subtitles,
1563                     'thumbnails': thumbnails,
1564                     'duration': int_or_none(get_binding_value(
1565                         'content_duration_seconds')),
1566                 }
1567
1568         videos = traverse_obj(status, (
1569             (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1570
1571         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1572             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1573         else:
1574             desired_obj = traverse_obj(status, (
1575                 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1576             if not desired_obj:
1577                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1578             elif desired_obj.get('type') != 'video':
1579                 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1580
1581             # Restore original archive id and video index in title
1582             for index, entry in enumerate(videos, 1):
1583                 if entry.get('id') != desired_obj.get('id'):
1584                     continue
1585                 if index == 1:
1586                     info['_old_archive_ids'] = [make_archive_id(self, twid)]
1587                 if len(videos) != 1:
1588                     info['title'] += f' #{index}'
1589                 break
1590
1591             return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1592
1593         entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1594         if not entries:
1595             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1596             if not expanded_url or expanded_url == url:
1597                 self.raise_no_formats('No video could be found in this tweet', expected=True)
1598                 return info
1599
1600             return self.url_result(expanded_url, display_id=twid, **info)
1601
1602         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1603
1604         if len(entries) == 1:
1605             return entries[0]
1606
1607         for index, entry in enumerate(entries, 1):
1608             entry['title'] += f' #{index}'
1609
1610         return self.playlist_result(entries, **info)
1611
1612
1613 class TwitterAmplifyIE(TwitterBaseIE):
1614     IE_NAME = 'twitter:amplify'
1615     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1616
1617     _TEST = {
1618         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1619         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1620         'info_dict': {
1621             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1622             'ext': 'mp4',
1623             'title': 'Twitter Video',
1624             'thumbnail': 're:^https?://.*',
1625         },
1626         'params': {'format': '[protocol=https]'},
1627     }
1628
1629     def _real_extract(self, url):
1630         video_id = self._match_id(url)
1631         webpage = self._download_webpage(url, video_id)
1632
1633         vmap_url = self._html_search_meta(
1634             'twitter:amplify:vmap', webpage, 'vmap url')
1635         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1636
1637         thumbnails = []
1638         thumbnail = self._html_search_meta(
1639             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1640
1641         def _find_dimension(target):
1642             w = int_or_none(self._html_search_meta(
1643                 f'twitter:{target}:width', webpage, fatal=False))
1644             h = int_or_none(self._html_search_meta(
1645                 f'twitter:{target}:height', webpage, fatal=False))
1646             return w, h
1647
1648         if thumbnail:
1649             thumbnail_w, thumbnail_h = _find_dimension('image')
1650             thumbnails.append({
1651                 'url': thumbnail,
1652                 'width': thumbnail_w,
1653                 'height': thumbnail_h,
1654             })
1655
1656         video_w, video_h = _find_dimension('player')
1657         formats[0].update({
1658             'width': video_w,
1659             'height': video_h,
1660         })
1661
1662         return {
1663             'id': video_id,
1664             'title': 'Twitter Video',
1665             'formats': formats,
1666             'thumbnails': thumbnails,
1667         }
1668
1669
1670 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1671     IE_NAME = 'twitter:broadcast'
1672     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1673
1674     _TESTS = [{
1675         # untitled Periscope video
1676         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1677         'info_dict': {
1678             'id': '1yNGaQLWpejGj',
1679             'ext': 'mp4',
1680             'title': 'Andrea May Sahouri - Periscope Broadcast',
1681             'uploader': 'Andrea May Sahouri',
1682             'uploader_id': 'andreamsahouri',
1683             'uploader_url': 'https://twitter.com/andreamsahouri',
1684             'timestamp': 1590973638,
1685             'upload_date': '20200601',
1686             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1687             'view_count': int,
1688         },
1689     }, {
1690         'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1691         'info_dict': {
1692             'id': '1ZkKzeyrPbaxv',
1693             'ext': 'mp4',
1694             'title': 'Starship | SN10 | High-Altitude Flight Test',
1695             'uploader': 'SpaceX',
1696             'uploader_id': 'SpaceX',
1697             'uploader_url': 'https://twitter.com/SpaceX',
1698             'timestamp': 1614812942,
1699             'upload_date': '20210303',
1700             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1701             'view_count': int,
1702         },
1703     }, {
1704         'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1705         'info_dict': {
1706             'id': '1OyKAVQrgzwGb',
1707             'ext': 'mp4',
1708             'title': 'Starship Flight Test',
1709             'uploader': 'SpaceX',
1710             'uploader_id': 'SpaceX',
1711             'uploader_url': 'https://twitter.com/SpaceX',
1712             'timestamp': 1681993964,
1713             'upload_date': '20230420',
1714             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1715             'view_count': int,
1716         },
1717     }]
1718
1719     def _real_extract(self, url):
1720         broadcast_id = self._match_id(url)
1721         broadcast = self._call_api(
1722             'broadcasts/show.json', broadcast_id,
1723             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1724         if not broadcast:
1725             raise ExtractorError('Broadcast no longer exists', expected=True)
1726         info = self._parse_broadcast_data(broadcast, broadcast_id)
1727         info['title'] = broadcast.get('status') or info.get('title')
1728         info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1729         info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1730         if info['live_status'] == 'is_upcoming':
1731             return info
1732
1733         media_key = broadcast['media_key']
1734         source = self._call_api(
1735             f'live_video_stream/status/{media_key}', media_key)['source']
1736         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1737         if '/live_video_stream/geoblocked/' in m3u8_url:
1738             self.raise_geo_restricted()
1739         m3u8_id = urllib.parse.parse_qs(urllib.parse.urlparse(
1740             m3u8_url).query).get('type', [None])[0]
1741         state, width, height = self._extract_common_format_info(broadcast)
1742         info['formats'] = self._extract_pscp_m3u8_formats(
1743             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1744         return info
1745
1746
1747 class TwitterSpacesIE(TwitterBaseIE):
1748     IE_NAME = 'twitter:spaces'
1749     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1750
1751     _TESTS = [{
1752         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1753         'info_dict': {
1754             'id': '1RDxlgyvNXzJL',
1755             'ext': 'm4a',
1756             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1757             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1758             'uploader': r're:Lucio Di Gaetano.*?',
1759             'uploader_id': 'luciodigaetano',
1760             'live_status': 'was_live',
1761             'timestamp': 1659877956,
1762             'upload_date': '20220807',
1763             'release_timestamp': 1659904215,
1764             'release_date': '20220807',
1765         },
1766         'params': {'skip_download': 'm3u8'},
1767     }, {
1768         # post_live/TimedOut but downloadable
1769         'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1770         'info_dict': {
1771             'id': '1vAxRAVQWONJl',
1772             'ext': 'm4a',
1773             'title': 'Framing Up FinOps: Billing Tools',
1774             'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1775             'uploader': 'Google Cloud',
1776             'uploader_id': 'googlecloud',
1777             'live_status': 'post_live',
1778             'timestamp': 1681409554,
1779             'upload_date': '20230413',
1780             'release_timestamp': 1681839000,
1781             'release_date': '20230418',
1782         },
1783         'params': {'skip_download': 'm3u8'},
1784     }, {
1785         # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1786         'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1787         'info_dict': {
1788             'id': '1eaKbrQbjoRKX',
1789             'ext': 'm4a',
1790             'title': 'あ',
1791             'description': 'Twitter Space participated by nobody yet',
1792             'uploader': '息根とめる🔪Twitchで復活',
1793             'uploader_id': 'tomeru_ikinone',
1794             'live_status': 'was_live',
1795             'timestamp': 1685617198,
1796             'upload_date': '20230601',
1797         },
1798         'params': {'skip_download': 'm3u8'},
1799     }]
1800
1801     SPACE_STATUS = {
1802         'notstarted': 'is_upcoming',
1803         'ended': 'was_live',
1804         'running': 'is_live',
1805         'timedout': 'post_live',
1806     }
1807
1808     def _build_graphql_query(self, space_id):
1809         return {
1810             'variables': {
1811                 'id': space_id,
1812                 'isMetatagsQuery': True,
1813                 'withDownvotePerspective': False,
1814                 'withReactionsMetadata': False,
1815                 'withReactionsPerspective': False,
1816                 'withReplays': True,
1817                 'withSuperFollowsUserFields': True,
1818                 'withSuperFollowsTweetFields': True,
1819             },
1820             'features': {
1821                 'dont_mention_me_view_api_enabled': True,
1822                 'interactive_text_enabled': True,
1823                 'responsive_web_edit_tweet_api_enabled': True,
1824                 'responsive_web_enhance_cards_enabled': True,
1825                 'responsive_web_uc_gql_enabled': True,
1826                 'spaces_2022_h2_clipping': True,
1827                 'spaces_2022_h2_spaces_communities': False,
1828                 'standardized_nudges_misinfo': True,
1829                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1830                 'vibe_api_enabled': True,
1831             },
1832         }
1833
1834     def _real_extract(self, url):
1835         space_id = self._match_id(url)
1836         if not self.is_logged_in:
1837             self.raise_login_required('Twitter Spaces require authentication')
1838         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1839         if not space_data:
1840             raise ExtractorError('Twitter Space not found', expected=True)
1841
1842         metadata = space_data['metadata']
1843         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1844         is_live = live_status == 'is_live'
1845
1846         formats = []
1847         headers = {'Referer': 'https://twitter.com/'}
1848         if live_status == 'is_upcoming':
1849             self.raise_no_formats('Twitter Space not started yet', expected=True)
1850         elif not is_live and not metadata.get('is_space_available_for_replay'):
1851             self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1852         elif metadata.get('media_key'):
1853             source = traverse_obj(
1854                 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1855                 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1856             formats = self._extract_m3u8_formats(  # XXX: Some Spaces need ffmpeg as downloader
1857                 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
1858                 headers=headers, fatal=False) if source else []
1859             for fmt in formats:
1860                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1861                 if not is_live:
1862                     fmt['container'] = 'm4a_dash'
1863
1864         participants = ', '.join(traverse_obj(
1865             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1866
1867         if not formats and live_status == 'post_live':
1868             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1869
1870         return {
1871             'id': space_id,
1872             'title': metadata.get('title'),
1873             'description': f'Twitter Space participated by {participants}',
1874             'uploader': traverse_obj(
1875                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1876             'uploader_id': traverse_obj(
1877                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1878             'live_status': live_status,
1879             'release_timestamp': try_call(
1880                 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1881             'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1882             'formats': formats,
1883             'http_headers': headers,
1884         }
1885
1886
1887 class TwitterShortenerIE(TwitterBaseIE):
1888     IE_NAME = 'twitter:shortener'
1889     _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1890     _BASE_URL = 'https://t.co/'
1891
1892     def _real_extract(self, url):
1893         mobj = self._match_valid_url(url)
1894         eid, shortcode = mobj.group('eid', 'id')
1895         if eid:
1896             shortcode = eid
1897             url = self._BASE_URL + shortcode
1898         new_url = self._request_webpage(url, shortcode, headers={'User-Agent': 'curl'}).url
1899         __UNSAFE_LINK = 'https://twitter.com/safety/unsafe_link_warning?unsafe_link='
1900         if new_url.startswith(__UNSAFE_LINK):
1901             new_url = new_url.replace(__UNSAFE_LINK, '')
1902         return self.url_result(new_url)