yt_dlp/extractor/twitter.py

   1 import json
   2 import random
   3 import re
   4
   5 from .common import InfoExtractor
   6 from .periscope import PeriscopeBaseIE, PeriscopeIE
   7 from ..compat import functools  # isort: split
   8 from ..compat import (
   9     compat_parse_qs,
  10     compat_urllib_parse_unquote,
  11     compat_urllib_parse_urlparse,
  12 )
  13 from ..networking.exceptions import HTTPError
  14 from ..utils import (
  15     ExtractorError,
  16     dict_get,
  17     filter_dict,
  18     float_or_none,
  19     format_field,
  20     int_or_none,
  21     make_archive_id,
  22     remove_end,
  23     str_or_none,
  24     strip_or_none,
  25     traverse_obj,
  26     try_call,
  27     try_get,
  28     unified_timestamp,
  29     update_url_query,
  30     url_or_none,
  31     xpath_text,
  32 )
  33
  34
  35 class TwitterBaseIE(InfoExtractor):
  36     _NETRC_MACHINE = 'twitter'
  37     _API_BASE = 'https://api.twitter.com/1.1/'
  38     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
  39     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  40     _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
  41     _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
  42     _flow_token = None
  43
  44     _LOGIN_INIT_DATA = json.dumps({
  45         'input_flow_data': {
  46             'flow_context': {
  47                 'debug_overrides': {},
  48                 'start_location': {
  49                     'location': 'unknown'
  50                 }
  51             }
  52         },
  53         'subtask_versions': {
  54             'action_list': 2,
  55             'alert_dialog': 1,
  56             'app_download_cta': 1,
  57             'check_logged_in_account': 1,
  58             'choice_selection': 3,
  59             'contacts_live_sync_permission_prompt': 0,
  60             'cta': 7,
  61             'email_verification': 2,
  62             'end_flow': 1,
  63             'enter_date': 1,
  64             'enter_email': 2,
  65             'enter_password': 5,
  66             'enter_phone': 2,
  67             'enter_recaptcha': 1,
  68             'enter_text': 5,
  69             'enter_username': 2,
  70             'generic_urt': 3,
  71             'in_app_notification': 1,
  72             'interest_picker': 3,
  73             'js_instrumentation': 1,
  74             'menu_dialog': 1,
  75             'notifications_permission_prompt': 2,
  76             'open_account': 2,
  77             'open_home_timeline': 1,
  78             'open_link': 1,
  79             'phone_verification': 4,
  80             'privacy_options': 1,
  81             'security_key': 3,
  82             'select_avatar': 4,
  83             'select_banner': 2,
  84             'settings_list': 7,
  85             'show_code': 1,
  86             'sign_up': 2,
  87             'sign_up_review': 4,
  88             'tweet_selection_urt': 1,
  89             'update_users': 1,
  90             'upload_media': 1,
  91             'user_recommendations_list': 4,
  92             'user_recommendations_urt': 1,
  93             'wait_spinner': 3,
  94             'web_modal': 1
  95         }
  96     }, separators=(',', ':')).encode()
  97
  98     def _extract_variant_formats(self, variant, video_id):
  99         variant_url = variant.get('url')
 100         if not variant_url:
 101             return [], {}
 102         elif '.m3u8' in variant_url:
 103             fmts, subs = self._extract_m3u8_formats_and_subtitles(
 104                 variant_url, video_id, 'mp4', 'm3u8_native',
 105                 m3u8_id='hls', fatal=False)
 106             for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
 107                 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
 108                     f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
 109             return fmts, subs
 110         else:
 111             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
 112             f = {
 113                 'url': variant_url,
 114                 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
 115                 'tbr': tbr,
 116             }
 117             self._search_dimensions_in_video_url(f, variant_url)
 118             return [f], {}
 119
 120     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
 121         vmap_url = url_or_none(vmap_url)
 122         if not vmap_url:
 123             return [], {}
 124         vmap_data = self._download_xml(vmap_url, video_id)
 125         formats = []
 126         subtitles = {}
 127         urls = []
 128         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
 129             video_variant.attrib['url'] = compat_urllib_parse_unquote(
 130                 video_variant.attrib['url'])
 131             urls.append(video_variant.attrib['url'])
 132             fmts, subs = self._extract_variant_formats(
 133                 video_variant.attrib, video_id)
 134             formats.extend(fmts)
 135             subtitles = self._merge_subtitles(subtitles, subs)
 136         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
 137         if video_url not in urls:
 138             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
 139             formats.extend(fmts)
 140             subtitles = self._merge_subtitles(subtitles, subs)
 141         return formats, subtitles
 142
 143     @staticmethod
 144     def _search_dimensions_in_video_url(a_format, video_url):
 145         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
 146         if m:
 147             a_format.update({
 148                 'width': int(m.group('width')),
 149                 'height': int(m.group('height')),
 150             })
 151
 152     @property
 153     def is_logged_in(self):
 154         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
 155
 156     @functools.cached_property
 157     def _selected_api(self):
 158         return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
 159
 160     def _fetch_guest_token(self, display_id):
 161         guest_token = traverse_obj(self._download_json(
 162             f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
 163             headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
 164             ('guest_token', {str}))
 165         if not guest_token:
 166             raise ExtractorError('Could not retrieve guest token')
 167         return guest_token
 168
 169     def _set_base_headers(self, legacy=False):
 170         bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
 171         return filter_dict({
 172             'Authorization': f'Bearer {bearer_token}',
 173             'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
 174         })
 175
 176     def _call_login_api(self, note, headers, query={}, data=None):
 177         response = self._download_json(
 178             f'{self._API_BASE}onboarding/task.json', None, note,
 179             headers=headers, query=query, data=data, expected_status=400)
 180         error = traverse_obj(response, ('errors', 0, 'message', {str}))
 181         if error:
 182             raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
 183         elif traverse_obj(response, 'status') != 'success':
 184             raise ExtractorError('Login was unsuccessful')
 185
 186         subtask = traverse_obj(
 187             response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
 188         if not subtask:
 189             raise ExtractorError('Twitter API did not return next login subtask')
 190
 191         self._flow_token = response['flow_token']
 192
 193         return subtask
 194
 195     def _perform_login(self, username, password):
 196         if self.is_logged_in:
 197             return
 198
 199         webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
 200         guest_token = self._search_regex(
 201             r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
 202         headers = {
 203             **self._set_base_headers(),
 204             'content-type': 'application/json',
 205             'x-guest-token': guest_token,
 206             'x-twitter-client-language': 'en',
 207             'x-twitter-active-user': 'yes',
 208             'Referer': 'https://twitter.com/',
 209             'Origin': 'https://twitter.com',
 210         }
 211
 212         def build_login_json(*subtask_inputs):
 213             return json.dumps({
 214                 'flow_token': self._flow_token,
 215                 'subtask_inputs': subtask_inputs
 216             }, separators=(',', ':')).encode()
 217
 218         def input_dict(subtask_id, text):
 219             return {
 220                 'subtask_id': subtask_id,
 221                 'enter_text': {
 222                     'text': text,
 223                     'link': 'next_link'
 224                 }
 225             }
 226
 227         next_subtask = self._call_login_api(
 228             'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
 229
 230         while not self.is_logged_in:
 231             if next_subtask == 'LoginJsInstrumentationSubtask':
 232                 next_subtask = self._call_login_api(
 233                     'Submitting JS instrumentation response', headers, data=build_login_json({
 234                         'subtask_id': next_subtask,
 235                         'js_instrumentation': {
 236                             'response': '{}',
 237                             'link': 'next_link'
 238                         }
 239                     }))
 240
 241             elif next_subtask == 'LoginEnterUserIdentifierSSO':
 242                 next_subtask = self._call_login_api(
 243                     'Submitting username', headers, data=build_login_json({
 244                         'subtask_id': next_subtask,
 245                         'settings_list': {
 246                             'setting_responses': [{
 247                                 'key': 'user_identifier',
 248                                 'response_data': {
 249                                     'text_data': {
 250                                         'result': username
 251                                     }
 252                                 }
 253                             }],
 254                             'link': 'next_link'
 255                         }
 256                     }))
 257
 258             elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
 259                 next_subtask = self._call_login_api(
 260                     'Submitting alternate identifier', headers,
 261                     data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
 262                         'one of username, phone number or email that was not used as --username'))))
 263
 264             elif next_subtask == 'LoginEnterPassword':
 265                 next_subtask = self._call_login_api(
 266                     'Submitting password', headers, data=build_login_json({
 267                         'subtask_id': next_subtask,
 268                         'enter_password': {
 269                             'password': password,
 270                             'link': 'next_link'
 271                         }
 272                     }))
 273
 274             elif next_subtask == 'AccountDuplicationCheck':
 275                 next_subtask = self._call_login_api(
 276                     'Submitting account duplication check', headers, data=build_login_json({
 277                         'subtask_id': next_subtask,
 278                         'check_logged_in_account': {
 279                             'link': 'AccountDuplicationCheck_false'
 280                         }
 281                     }))
 282
 283             elif next_subtask == 'LoginTwoFactorAuthChallenge':
 284                 next_subtask = self._call_login_api(
 285                     'Submitting 2FA token', headers, data=build_login_json(input_dict(
 286                         next_subtask, self._get_tfa_info('two-factor authentication token'))))
 287
 288             elif next_subtask == 'LoginAcid':
 289                 next_subtask = self._call_login_api(
 290                     'Submitting confirmation code', headers, data=build_login_json(input_dict(
 291                         next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
 292
 293             elif next_subtask == 'ArkoseLogin':
 294                 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
 295
 296             elif next_subtask == 'DenyLoginSubtask':
 297                 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
 298
 299             elif next_subtask == 'LoginSuccessSubtask':
 300                 raise ExtractorError('Twitter API did not grant auth token cookie')
 301
 302             else:
 303                 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
 304
 305         self.report_login()
 306
 307     def _call_api(self, path, video_id, query={}, graphql=False):
 308         headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
 309         headers.update({
 310             'x-twitter-auth-type': 'OAuth2Session',
 311             'x-twitter-client-language': 'en',
 312             'x-twitter-active-user': 'yes',
 313         } if self.is_logged_in else {
 314             'x-guest-token': self._fetch_guest_token(video_id)
 315         })
 316         allowed_status = {400, 401, 403, 404} if graphql else {403}
 317         result = self._download_json(
 318             (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
 319             video_id, headers=headers, query=query, expected_status=allowed_status,
 320             note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
 321
 322         if result.get('errors'):
 323             errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
 324             if errors and 'not authorized' in errors:
 325                 self.raise_login_required(remove_end(errors, '.'))
 326             raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
 327
 328         return result
 329
 330     def _build_graphql_query(self, media_id):
 331         raise NotImplementedError('Method must be implemented to support GraphQL')
 332
 333     def _call_graphql_api(self, endpoint, media_id):
 334         data = self._build_graphql_query(media_id)
 335         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 336         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 337
 338
 339 class TwitterCardIE(InfoExtractor):
 340     IE_NAME = 'twitter:card'
 341     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 342     _TESTS = [
 343         {
 344             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 345             # MD5 checksums are different in different places
 346             'info_dict': {
 347                 'id': '560070131976392705',
 348                 'ext': 'mp4',
 349                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 350                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 351                 'uploader': 'Twitter',
 352                 'uploader_id': 'Twitter',
 353                 'thumbnail': r're:^https?://.*\.jpg',
 354                 'duration': 30.033,
 355                 'timestamp': 1422366112,
 356                 'upload_date': '20150127',
 357                 'age_limit': 0,
 358                 'comment_count': int,
 359                 'tags': [],
 360                 'repost_count': int,
 361                 'like_count': int,
 362                 'display_id': '560070183650213889',
 363                 'uploader_url': 'https://twitter.com/Twitter',
 364             },
 365         },
 366         {
 367             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 368             'md5': '7137eca597f72b9abbe61e5ae0161399',
 369             'info_dict': {
 370                 'id': '623160978427936768',
 371                 'ext': 'mp4',
 372                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 373                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 374                 'uploader': 'NASA',
 375                 'uploader_id': 'NASA',
 376                 'timestamp': 1437408129,
 377                 'upload_date': '20150720',
 378                 'uploader_url': 'https://twitter.com/NASA',
 379                 'age_limit': 0,
 380                 'comment_count': int,
 381                 'like_count': int,
 382                 'repost_count': int,
 383                 'tags': ['PlutoFlyby'],
 384             },
 385             'params': {'format': '[protocol=https]'}
 386         },
 387         {
 388             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 389             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 390             'info_dict': {
 391                 'id': 'dq4Oj5quskI',
 392                 'ext': 'mp4',
 393                 'title': 'Ubuntu 11.10 Overview',
 394                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 395                 'upload_date': '20111013',
 396                 'uploader': 'OMG! UBUNTU!',
 397                 'uploader_id': 'omgubuntu',
 398                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 399                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 400                 'channel_follower_count': int,
 401                 'chapters': 'count:8',
 402                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 403                 'duration': 138,
 404                 'categories': ['Film & Animation'],
 405                 'age_limit': 0,
 406                 'comment_count': int,
 407                 'availability': 'public',
 408                 'like_count': int,
 409                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 410                 'view_count': int,
 411                 'tags': 'count:12',
 412                 'channel': 'OMG! UBUNTU!',
 413                 'playable_in_embed': True,
 414             },
 415             'add_ie': ['Youtube'],
 416         },
 417         {
 418             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 419             'info_dict': {
 420                 'id': 'iBb2x00UVlv',
 421                 'ext': 'mp4',
 422                 'upload_date': '20151113',
 423                 'uploader_id': '1189339351084113920',
 424                 'uploader': 'ArsenalTerje',
 425                 'title': 'Vine by ArsenalTerje',
 426                 'timestamp': 1447451307,
 427                 'alt_title': 'Vine by ArsenalTerje',
 428                 'comment_count': int,
 429                 'like_count': int,
 430                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 431                 'view_count': int,
 432                 'repost_count': int,
 433             },
 434             'add_ie': ['Vine'],
 435             'params': {'skip_download': 'm3u8'},
 436         },
 437         {
 438             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 439             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 440             'info_dict': {
 441                 'id': '705235433198714880',
 442                 'ext': 'mp4',
 443                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 444                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 445                 'uploader': 'Brent Yarina',
 446                 'uploader_id': 'BTNBrentYarina',
 447                 'timestamp': 1456976204,
 448                 'upload_date': '20160303',
 449             },
 450             'skip': 'This content is no longer available.',
 451         },
 452         {
 453             'url': 'https://twitter.com/i/videos/752274308186120192',
 454             'only_matching': True,
 455         },
 456     ]
 457
 458     def _real_extract(self, url):
 459         status_id = self._match_id(url)
 460         return self.url_result(
 461             'https://twitter.com/statuses/' + status_id,
 462             TwitterIE.ie_key(), status_id)
 463
 464
 465 class TwitterIE(TwitterBaseIE):
 466     IE_NAME = 'twitter'
 467     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 468
 469     _TESTS = [{
 470         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 471         'info_dict': {
 472             'id': '643211870443208704',
 473             'display_id': '643211948184596480',
 474             'ext': 'mp4',
 475             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 476             'thumbnail': r're:^https?://.*\.jpg',
 477             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 478             'channel_id': '549749560',
 479             'uploader': 'FREE THE NIPPLE',
 480             'uploader_id': 'freethenipple',
 481             'duration': 12.922,
 482             'timestamp': 1442188653,
 483             'upload_date': '20150913',
 484             'uploader_url': 'https://twitter.com/freethenipple',
 485             'comment_count': int,
 486             'repost_count': int,
 487             'like_count': int,
 488             'tags': [],
 489             'age_limit': 18,
 490             '_old_archive_ids': ['twitter 643211948184596480'],
 491         },
 492         'skip': 'Requires authentication',
 493     }, {
 494         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 495         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 496         'info_dict': {
 497             'id': '657991469417025536',
 498             'ext': 'mp4',
 499             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 500             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 501             'thumbnail': r're:^https?://.*\.png',
 502             'uploader': 'Gifs',
 503             'uploader_id': 'giphz',
 504         },
 505         'expected_warnings': ['height', 'width'],
 506         'skip': 'Account suspended',
 507     }, {
 508         'url': 'https://twitter.com/starwars/status/665052190608723968',
 509         'info_dict': {
 510             'id': '665052190608723968',
 511             'display_id': '665052190608723968',
 512             'ext': 'mp4',
 513             'title': r're:Star Wars.*A new beginning is coming December 18.*',
 514             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 515             'channel_id': '20106852',
 516             'uploader_id': 'starwars',
 517             'uploader': r're:Star Wars.*',
 518             'timestamp': 1447395772,
 519             'upload_date': '20151113',
 520             'uploader_url': 'https://twitter.com/starwars',
 521             'comment_count': int,
 522             'repost_count': int,
 523             'like_count': int,
 524             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 525             'age_limit': 0,
 526             '_old_archive_ids': ['twitter 665052190608723968'],
 527         },
 528     }, {
 529         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 530         'info_dict': {
 531             'id': '705235433198714880',
 532             'ext': 'mp4',
 533             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 534             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 535             'uploader_id': 'BTNBrentYarina',
 536             'uploader': 'Brent Yarina',
 537             'timestamp': 1456976204,
 538             'upload_date': '20160303',
 539             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 540             'comment_count': int,
 541             'repost_count': int,
 542             'like_count': int,
 543             'tags': [],
 544             'age_limit': 0,
 545         },
 546         'params': {
 547             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 548             # Test case of TwitterCardIE
 549             'skip_download': True,
 550         },
 551         'skip': 'Dead external link',
 552     }, {
 553         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 554         'info_dict': {
 555             'id': '700207414000242688',
 556             'display_id': '700207533655363584',
 557             'ext': 'mp4',
 558             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 559             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 560             'thumbnail': r're:^https?://.*\.jpg',
 561             'channel_id': '1383165541',
 562             'uploader': 'jaydin donte geer',
 563             'uploader_id': 'jaydingeer',
 564             'duration': 30.0,
 565             'timestamp': 1455777459,
 566             'upload_date': '20160218',
 567             'uploader_url': 'https://twitter.com/jaydingeer',
 568             'comment_count': int,
 569             'repost_count': int,
 570             'like_count': int,
 571             'tags': ['Damndaniel'],
 572             'age_limit': 0,
 573             '_old_archive_ids': ['twitter 700207533655363584'],
 574         },
 575     }, {
 576         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 577         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 578         'info_dict': {
 579             'id': 'MIOxnrUteUd',
 580             'ext': 'mp4',
 581             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 582             'uploader': 'TAKUMA',
 583             'uploader_id': '1004126642786242560',
 584             'timestamp': 1402826626,
 585             'upload_date': '20140615',
 586             'thumbnail': r're:^https?://.*\.jpg',
 587             'alt_title': 'Vine by TAKUMA',
 588             'comment_count': int,
 589             'repost_count': int,
 590             'like_count': int,
 591             'view_count': int,
 592         },
 593         'add_ie': ['Vine'],
 594     }, {
 595         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 596         'info_dict': {
 597             'id': '717462543795523584',
 598             'display_id': '719944021058060289',
 599             'ext': 'mp4',
 600             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 601             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 602             'channel_id': '701615052',
 603             'uploader_id': 'CaptainAmerica',
 604             'uploader': 'Captain America',
 605             'duration': 3.17,
 606             'timestamp': 1460483005,
 607             'upload_date': '20160412',
 608             'uploader_url': 'https://twitter.com/CaptainAmerica',
 609             'thumbnail': r're:^https?://.*\.jpg',
 610             'comment_count': int,
 611             'repost_count': int,
 612             'like_count': int,
 613             'tags': [],
 614             'age_limit': 0,
 615             '_old_archive_ids': ['twitter 719944021058060289'],
 616         },
 617     }, {
 618         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 619         'info_dict': {
 620             'id': '1zqKVVlkqLaKB',
 621             'ext': 'mp4',
 622             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 623             'upload_date': '20160923',
 624             'uploader_id': '1PmKqpJdOJQoY',
 625             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 626             'timestamp': 1474613214,
 627             'thumbnail': r're:^https?://.*\.jpg',
 628         },
 629         'add_ie': ['Periscope'],
 630         'skip': 'Broadcast not found',
 631     }, {
 632         # has mp4 formats via mobile API
 633         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 634         'info_dict': {
 635             'id': '852077943283097602',
 636             'ext': 'mp4',
 637             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 638             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 639             'channel_id': '2526757026',
 640             'uploader': 'عالم الأخبار',
 641             'uploader_id': 'news_al3alm',
 642             'duration': 277.4,
 643             'timestamp': 1492000653,
 644             'upload_date': '20170412',
 645             'display_id': '852138619213144067',
 646             'age_limit': 0,
 647             'uploader_url': 'https://twitter.com/news_al3alm',
 648             'thumbnail': r're:^https?://.*\.jpg',
 649             'tags': [],
 650             'repost_count': int,
 651             'like_count': int,
 652             'comment_count': int,
 653             '_old_archive_ids': ['twitter 852138619213144067'],
 654         },
 655     }, {
 656         'url': 'https://twitter.com/i/web/status/910031516746514432',
 657         'info_dict': {
 658             'id': '910030238373089285',
 659             'display_id': '910031516746514432',
 660             'ext': 'mp4',
 661             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 662             'thumbnail': r're:^https?://.*\.jpg',
 663             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 664             'channel_id': '2319432498',
 665             'uploader': 'Préfet de Guadeloupe',
 666             'uploader_id': 'Prefet971',
 667             'duration': 47.48,
 668             'timestamp': 1505803395,
 669             'upload_date': '20170919',
 670             'uploader_url': 'https://twitter.com/Prefet971',
 671             'comment_count': int,
 672             'repost_count': int,
 673             'like_count': int,
 674             'tags': ['Maria'],
 675             'age_limit': 0,
 676             '_old_archive_ids': ['twitter 910031516746514432'],
 677         },
 678         'params': {
 679             'skip_download': True,  # requires ffmpeg
 680         },
 681     }, {
 682         # card via api.twitter.com/1.1/videos/tweet/config
 683         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 684         'info_dict': {
 685             'id': '1001551417340022785',
 686             'display_id': '1001551623938805763',
 687             'ext': 'mp4',
 688             'title': 're:.*?Shep is on a roll today.*?',
 689             'thumbnail': r're:^https?://.*\.jpg',
 690             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 691             'channel_id': '255036353',
 692             'uploader': 'Lis Power',
 693             'uploader_id': 'LisPower1',
 694             'duration': 111.278,
 695             'timestamp': 1527623489,
 696             'upload_date': '20180529',
 697             'uploader_url': 'https://twitter.com/LisPower1',
 698             'comment_count': int,
 699             'repost_count': int,
 700             'like_count': int,
 701             'tags': [],
 702             'age_limit': 0,
 703             '_old_archive_ids': ['twitter 1001551623938805763'],
 704         },
 705         'params': {
 706             'skip_download': True,  # requires ffmpeg
 707         },
 708     }, {
 709         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 710         'info_dict': {
 711             'id': '1087791272830607360',
 712             'display_id': '1087791357756956680',
 713             'ext': 'mp4',
 714             'title': 'X - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 715             'thumbnail': r're:^https?://.*\.jpg',
 716             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 717             'uploader': 'X',
 718             'uploader_id': 'X',
 719             'duration': 61.567,
 720             'timestamp': 1548184644,
 721             'upload_date': '20190122',
 722             'uploader_url': 'https://twitter.com/X',
 723             'comment_count': int,
 724             'repost_count': int,
 725             'like_count': int,
 726             'view_count': int,
 727             'tags': [],
 728             'age_limit': 0,
 729         },
 730         'skip': 'This Tweet is unavailable',
 731     }, {
 732         # not available in Periscope
 733         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 734         'info_dict': {
 735             'id': '1vOGwqejwoWxB',
 736             'ext': 'mp4',
 737             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 738             'uploader': 'Vivi',
 739             'uploader_id': '1eVjYOLGkGrQL',
 740             'thumbnail': r're:^https?://.*\.jpg',
 741             'tags': ['EduTECH2019'],
 742             'view_count': int,
 743         },
 744         'add_ie': ['TwitterBroadcast'],
 745         'skip': 'Broadcast no longer exists',
 746     }, {
 747         # unified card
 748         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 749         'info_dict': {
 750             'id': '1349774757969989634',
 751             'display_id': '1349794411333394432',
 752             'ext': 'mp4',
 753             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 754             'thumbnail': r're:^https?://.*\.jpg',
 755             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 756             'channel_id': '18552281',
 757             'uploader': 'Brooklyn Nets',
 758             'uploader_id': 'BrooklynNets',
 759             'duration': 324.484,
 760             'timestamp': 1610651040,
 761             'upload_date': '20210114',
 762             'uploader_url': 'https://twitter.com/BrooklynNets',
 763             'comment_count': int,
 764             'repost_count': int,
 765             'like_count': int,
 766             'tags': [],
 767             'age_limit': 0,
 768             '_old_archive_ids': ['twitter 1349794411333394432'],
 769         },
 770         'params': {
 771             'skip_download': True,
 772         },
 773     }, {
 774         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 775         'info_dict': {
 776             'id': '1577855447914409984',
 777             'display_id': '1577855540407197696',
 778             'ext': 'mp4',
 779             'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
 780             'description': 'md5:b9c3699335447391d11753ab21c70a74',
 781             'upload_date': '20221006',
 782             'channel_id': '143077138',
 783             'uploader': 'Oshtru',
 784             'uploader_id': 'oshtru',
 785             'uploader_url': 'https://twitter.com/oshtru',
 786             'thumbnail': r're:^https?://.*\.jpg',
 787             'duration': 30.03,
 788             'timestamp': 1665025050,
 789             'comment_count': int,
 790             'repost_count': int,
 791             'like_count': int,
 792             'tags': [],
 793             'age_limit': 0,
 794             '_old_archive_ids': ['twitter 1577855540407197696'],
 795         },
 796         'params': {'skip_download': True},
 797     }, {
 798         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 799         'info_dict': {
 800             'id': '1577719286659006464',
 801             'title': 'Ultima Reload - Test',
 802             'description': 'Test https://t.co/Y3KEZD7Dad',
 803             'channel_id': '168922496',
 804             'uploader': 'Ultima Reload',
 805             'uploader_id': 'UltimaShadowX',
 806             'uploader_url': 'https://twitter.com/UltimaShadowX',
 807             'upload_date': '20221005',
 808             'timestamp': 1664992565,
 809             'comment_count': int,
 810             'repost_count': int,
 811             'like_count': int,
 812             'tags': [],
 813             'age_limit': 0,
 814         },
 815         'playlist_count': 4,
 816         'params': {'skip_download': True},
 817     }, {
 818         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 819         'info_dict': {
 820             'id': '1575559336759263233',
 821             'display_id': '1575560063510810624',
 822             'ext': 'mp4',
 823             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 824             'thumbnail': r're:^https?://.*\.jpg',
 825             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 826             'channel_id': '1094109584',
 827             'uploader': 'Max Olson',
 828             'uploader_id': 'MesoMax919',
 829             'uploader_url': 'https://twitter.com/MesoMax919',
 830             'duration': 21.321,
 831             'timestamp': 1664477766,
 832             'upload_date': '20220929',
 833             'comment_count': int,
 834             'repost_count': int,
 835             'like_count': int,
 836             'tags': ['HurricaneIan'],
 837             'age_limit': 0,
 838             '_old_archive_ids': ['twitter 1575560063510810624'],
 839         },
 840     }, {
 841         # Adult content, fails if not logged in
 842         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 843         'info_dict': {
 844             'id': '1575199163847000068',
 845             'display_id': '1575199173472927762',
 846             'ext': 'mp4',
 847             'title': str,
 848             'description': str,
 849             'channel_id': '1217167793541480450',
 850             'uploader': str,
 851             'uploader_id': 'Rizdraws',
 852             'uploader_url': 'https://twitter.com/Rizdraws',
 853             'upload_date': '20220928',
 854             'timestamp': 1664391723,
 855             'thumbnail': r're:^https?://.+\.jpg',
 856             'like_count': int,
 857             'repost_count': int,
 858             'comment_count': int,
 859             'age_limit': 18,
 860             'tags': [],
 861             '_old_archive_ids': ['twitter 1575199173472927762'],
 862         },
 863         'params': {'skip_download': 'The media could not be played'},
 864         'skip': 'Requires authentication',
 865     }, {
 866         # Playlist result only with graphql API
 867         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 868         'playlist_mincount': 2,
 869         'info_dict': {
 870             'id': '1395079556562706435',
 871             'title': str,
 872             'tags': [],
 873             'channel_id': '21539378',
 874             'uploader': str,
 875             'like_count': int,
 876             'upload_date': '20210519',
 877             'age_limit': 0,
 878             'repost_count': int,
 879             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
 880             'uploader_id': 'Srirachachau',
 881             'comment_count': int,
 882             'uploader_url': 'https://twitter.com/Srirachachau',
 883             'timestamp': 1621447860,
 884         },
 885     }, {
 886         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 887         'playlist_mincount': 2,
 888         'info_dict': {
 889             'id': '1578353380363501568',
 890             'title': str,
 891             'channel_id': '2195866214',
 892             'uploader_id': 'DavidToons_',
 893             'repost_count': int,
 894             'like_count': int,
 895             'uploader': str,
 896             'timestamp': 1665143744,
 897             'uploader_url': 'https://twitter.com/DavidToons_',
 898             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
 899             'tags': [],
 900             'comment_count': int,
 901             'upload_date': '20221007',
 902             'age_limit': 0,
 903         },
 904     }, {
 905         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 906         'playlist_count': 2,
 907         'info_dict': {
 908             'id': '1578401165338976258',
 909             'title': str,
 910             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 911             'channel_id': '19338359',
 912             'uploader': str,
 913             'uploader_id': 'primevideouk',
 914             'timestamp': 1665155137,
 915             'upload_date': '20221007',
 916             'age_limit': 0,
 917             'uploader_url': 'https://twitter.com/primevideouk',
 918             'comment_count': int,
 919             'repost_count': int,
 920             'like_count': int,
 921             'tags': ['TheRingsOfPower'],
 922         },
 923     }, {
 924         # Twitter Spaces
 925         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 926         'info_dict': {
 927             'id': '1lPJqmBeeNAJb',
 928             'ext': 'm4a',
 929             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 930             'uploader': r're:Monique Camarra.+?',
 931             'uploader_id': 'MoniqueCamarra',
 932             'live_status': 'was_live',
 933             'release_timestamp': 1658417414,
 934             'description': 'md5:acce559345fd49f129c20dbcda3f1201',
 935             'timestamp': 1658407771,
 936             'release_date': '20220721',
 937             'upload_date': '20220721',
 938         },
 939         'add_ie': ['TwitterSpaces'],
 940         'params': {'skip_download': 'm3u8'},
 941         'skip': 'Requires authentication',
 942     }, {
 943         # URL specifies video number but --yes-playlist
 944         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
 945         'playlist_mincount': 2,
 946         'info_dict': {
 947             'id': '1600649710662213632',
 948             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 949             'timestamp': 1670459604.0,
 950             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 951             'comment_count': int,
 952             'uploader_id': 'CTVJLaidlaw',
 953             'channel_id': '80082014',
 954             'repost_count': int,
 955             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 956             'upload_date': '20221208',
 957             'age_limit': 0,
 958             'uploader': 'Jocelyn Laidlaw',
 959             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 960             'like_count': int,
 961         },
 962     }, {
 963         # URL specifies video number and --no-playlist
 964         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
 965         'info_dict': {
 966             'id': '1600649511827013632',
 967             'ext': 'mp4',
 968             'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
 969             'thumbnail': r're:^https?://.+\.jpg',
 970             'timestamp': 1670459604.0,
 971             'channel_id': '80082014',
 972             'uploader_id': 'CTVJLaidlaw',
 973             'uploader': 'Jocelyn Laidlaw',
 974             'repost_count': int,
 975             'comment_count': int,
 976             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 977             'duration': 102.226,
 978             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 979             'display_id': '1600649710662213632',
 980             'like_count': int,
 981             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 982             'upload_date': '20221208',
 983             'age_limit': 0,
 984             '_old_archive_ids': ['twitter 1600649710662213632'],
 985         },
 986         'params': {'noplaylist': True},
 987     }, {
 988         # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
 989         # note the id different between extraction and url
 990         'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
 991         'info_dict': {
 992             'id': '1621117577354424321',
 993             'display_id': '1621117700482416640',
 994             'ext': 'mp4',
 995             'title': '뽀 - 아 최우제 이동속도 봐',
 996             'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
 997             'duration': 24.598,
 998             'channel_id': '1281839411068432384',
 999             'uploader': '뽀',
1000             'uploader_id': 's2FAKER',
1001             'uploader_url': 'https://twitter.com/s2FAKER',
1002             'upload_date': '20230202',
1003             'timestamp': 1675339553.0,
1004             'thumbnail': r're:https?://pbs\.twimg\.com/.+',
1005             'age_limit': 18,
1006             'tags': [],
1007             'like_count': int,
1008             'repost_count': int,
1009             'comment_count': int,
1010             '_old_archive_ids': ['twitter 1621117700482416640'],
1011         },
1012         'skip': 'Requires authentication',
1013     }, {
1014         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1015         'info_dict': {
1016             'id': '1599108643743473680',
1017             'display_id': '1599108751385972737',
1018             'ext': 'mp4',
1019             'title': '\u06ea - \U0001F48B',
1020             'channel_id': '1347791436809441283',
1021             'uploader_url': 'https://twitter.com/hlo_again',
1022             'like_count': int,
1023             'uploader_id': 'hlo_again',
1024             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1025             'repost_count': int,
1026             'duration': 9.531,
1027             'comment_count': int,
1028             'upload_date': '20221203',
1029             'age_limit': 0,
1030             'timestamp': 1670092210.0,
1031             'tags': [],
1032             'uploader': '\u06ea',
1033             'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1034             '_old_archive_ids': ['twitter 1599108751385972737'],
1035         },
1036         'params': {'noplaylist': True},
1037     }, {
1038         'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1039         'info_dict': {
1040             'id': '1600009362759733248',
1041             'display_id': '1600009574919962625',
1042             'ext': 'mp4',
1043             'channel_id': '211814412',
1044             'uploader_url': 'https://twitter.com/MunTheShinobi',
1045             'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1046             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1047             'age_limit': 0,
1048             'uploader': 'Mün',
1049             'repost_count': int,
1050             'upload_date': '20221206',
1051             'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1052             'comment_count': int,
1053             'like_count': int,
1054             'tags': [],
1055             'uploader_id': 'MunTheShinobi',
1056             'duration': 139.987,
1057             'timestamp': 1670306984.0,
1058             '_old_archive_ids': ['twitter 1600009574919962625'],
1059         },
1060     }, {
1061         # retweeted_status (private)
1062         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1063         'info_dict': {
1064             'id': '1623274794488659969',
1065             'display_id': '1623739803874349067',
1066             'ext': 'mp4',
1067             'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
1068             'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1069             'uploader': 'Johnny Bullets',
1070             'uploader_id': 'Johnnybull3ts',
1071             'uploader_url': 'https://twitter.com/Johnnybull3ts',
1072             'age_limit': 0,
1073             'tags': [],
1074             'duration': 8.033,
1075             'timestamp': 1675853859.0,
1076             'upload_date': '20230208',
1077             'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1078             'like_count': int,
1079             'repost_count': int,
1080         },
1081         'skip': 'Protected tweet',
1082     }, {
1083         # retweeted_status
1084         'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1085         'info_dict': {
1086             'id': '1694928337846538240',
1087             'ext': 'mp4',
1088             'display_id': '1695424220702888009',
1089             'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1090             'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1091             'channel_id': '15212187',
1092             'uploader': 'Benny Johnson',
1093             'uploader_id': 'bennyjohnson',
1094             'uploader_url': 'https://twitter.com/bennyjohnson',
1095             'age_limit': 0,
1096             'tags': [],
1097             'duration': 45.001,
1098             'timestamp': 1692962814.0,
1099             'upload_date': '20230825',
1100             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1101             'like_count': int,
1102             'repost_count': int,
1103             'comment_count': int,
1104             '_old_archive_ids': ['twitter 1695424220702888009'],
1105         },
1106     }, {
1107         # retweeted_status w/ legacy API
1108         'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1109         'info_dict': {
1110             'id': '1694928337846538240',
1111             'ext': 'mp4',
1112             'display_id': '1695424220702888009',
1113             'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1114             'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1115             'channel_id': '15212187',
1116             'uploader': 'Benny Johnson',
1117             'uploader_id': 'bennyjohnson',
1118             'uploader_url': 'https://twitter.com/bennyjohnson',
1119             'age_limit': 0,
1120             'tags': [],
1121             'duration': 45.001,
1122             'timestamp': 1692962814.0,
1123             'upload_date': '20230825',
1124             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1125             'like_count': int,
1126             'repost_count': int,
1127             '_old_archive_ids': ['twitter 1695424220702888009'],
1128         },
1129         'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1130     }, {
1131         # Broadcast embedded in tweet
1132         'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1133         'info_dict': {
1134             'id': '1rmxPMjLzAXKN',
1135             'ext': 'mp4',
1136             'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1137             'uploader': 'Jessica Dobson',
1138             'uploader_id': 'JessicaDobsonWX',
1139             'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1140             'timestamp': 1701566398,
1141             'upload_date': '20231203',
1142             'live_status': 'was_live',
1143             'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1144             'concurrent_view_count': int,
1145             'view_count': int,
1146         },
1147         'add_ie': ['TwitterBroadcast'],
1148     }, {
1149         # Animated gif and quote tweet video
1150         'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1151         'playlist_mincount': 2,
1152         'info_dict': {
1153             'id': '1696256659889565950',
1154             'title': 'BAKOON - https://t.co/zom968d0a0',
1155             'description': 'https://t.co/zom968d0a0',
1156             'tags': [],
1157             'channel_id': '1263540390',
1158             'uploader': 'BAKOON',
1159             'uploader_id': 'BAKKOOONN',
1160             'uploader_url': 'https://twitter.com/BAKKOOONN',
1161             'age_limit': 18,
1162             'timestamp': 1693254077.0,
1163             'upload_date': '20230828',
1164             'like_count': int,
1165             'comment_count': int,
1166             'repost_count': int,
1167         },
1168         'skip': 'Requires authentication',
1169     }, {
1170         # "stale tweet" with typename "TweetWithVisibilityResults"
1171         'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1172         'md5': '511377ff8dfa7545307084dca4dce319',
1173         'info_dict': {
1174             'id': '1724883339285544960',
1175             'ext': 'mp4',
1176             'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1177             'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1178             'display_id': '1724884212803834154',
1179             'channel_id': '337808606',
1180             'uploader': 'Robert F. Kennedy Jr',
1181             'uploader_id': 'RobertKennedyJr',
1182             'uploader_url': 'https://twitter.com/RobertKennedyJr',
1183             'upload_date': '20231115',
1184             'timestamp': 1700079417.0,
1185             'duration': 341.048,
1186             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1187             'tags': ['Kennedy24'],
1188             'repost_count': int,
1189             'like_count': int,
1190             'comment_count': int,
1191             'age_limit': 0,
1192             '_old_archive_ids': ['twitter 1724884212803834154'],
1193         },
1194     }, {
1195         # onion route
1196         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1197         'only_matching': True,
1198     }, {
1199         # Twitch Clip Embed
1200         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1201         'only_matching': True,
1202     }, {
1203         # promo_video_website card
1204         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1205         'only_matching': True,
1206     }, {
1207         # promo_video_convo card
1208         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1209         'only_matching': True,
1210     }, {
1211         # appplayer card
1212         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1213         'only_matching': True,
1214     }, {
1215         # video_direct_message card
1216         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1217         'only_matching': True,
1218     }, {
1219         # poll2choice_video card
1220         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1221         'only_matching': True,
1222     }, {
1223         # poll3choice_video card
1224         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1225         'only_matching': True,
1226     }, {
1227         # poll4choice_video card
1228         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1229         'only_matching': True,
1230     }]
1231
1232     _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1233
1234     @property
1235     def _GRAPHQL_ENDPOINT(self):
1236         if self.is_logged_in:
1237             return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1238         return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1239
1240     def _graphql_to_legacy(self, data, twid):
1241         result = traverse_obj(data, (
1242             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1243             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1244             'tweet_results', 'result', ('tweet', None), {dict},
1245         ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1246             data, ('tweetResult', 'result', {dict}), default={})
1247
1248         typename = result.get('__typename')
1249         if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1250             self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1251
1252         if 'tombstone' in result:
1253             cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1254             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1255         elif typename == 'TweetUnavailable':
1256             reason = result.get('reason')
1257             if reason == 'NsfwLoggedOut':
1258                 self.raise_login_required('NSFW tweet requires authentication')
1259             elif reason == 'Protected':
1260                 self.raise_login_required('You are not authorized to view this protected tweet')
1261             raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1262         # Result for "stale tweet" needs additional transformation
1263         elif typename == 'TweetWithVisibilityResults':
1264             result = traverse_obj(result, ('tweet', {dict})) or {}
1265
1266         status = result.get('legacy', {})
1267         status.update(traverse_obj(result, {
1268             'user': ('core', 'user_results', 'result', 'legacy'),
1269             'card': ('card', 'legacy'),
1270             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1271             'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1272         }, expected_type=dict, default={}))
1273
1274         # extra transformations needed since result does not match legacy format
1275         if status.get('retweeted_status'):
1276             status['retweeted_status']['user'] = traverse_obj(status, (
1277                 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1278
1279         binding_values = {
1280             binding_value.get('key'): binding_value.get('value')
1281             for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1282         }
1283         if binding_values:
1284             status['card']['binding_values'] = binding_values
1285
1286         return status
1287
1288     def _build_graphql_query(self, media_id):
1289         return {
1290             'variables': {
1291                 'focalTweetId': media_id,
1292                 'includePromotedContent': True,
1293                 'with_rux_injections': False,
1294                 'withBirdwatchNotes': True,
1295                 'withCommunity': True,
1296                 'withDownvotePerspective': False,
1297                 'withQuickPromoteEligibilityTweetFields': True,
1298                 'withReactionsMetadata': False,
1299                 'withReactionsPerspective': False,
1300                 'withSuperFollowsTweetFields': True,
1301                 'withSuperFollowsUserFields': True,
1302                 'withV2Timeline': True,
1303                 'withVoice': True,
1304             },
1305             'features': {
1306                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1307                 'interactive_text_enabled': True,
1308                 'responsive_web_edit_tweet_api_enabled': True,
1309                 'responsive_web_enhance_cards_enabled': True,
1310                 'responsive_web_graphql_timeline_navigation_enabled': False,
1311                 'responsive_web_text_conversations_enabled': False,
1312                 'responsive_web_uc_gql_enabled': True,
1313                 'standardized_nudges_misinfo': True,
1314                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1315                 'tweetypie_unmention_optimization_enabled': True,
1316                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1317                 'verified_phone_label_enabled': False,
1318                 'vibe_api_enabled': True,
1319             },
1320         } if self.is_logged_in else {
1321             'variables': {
1322                 'tweetId': media_id,
1323                 'withCommunity': False,
1324                 'includePromotedContent': False,
1325                 'withVoice': False,
1326             },
1327             'features': {
1328                 'creator_subscriptions_tweet_preview_api_enabled': True,
1329                 'tweetypie_unmention_optimization_enabled': True,
1330                 'responsive_web_edit_tweet_api_enabled': True,
1331                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1332                 'view_counts_everywhere_api_enabled': True,
1333                 'longform_notetweets_consumption_enabled': True,
1334                 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1335                 'tweet_awards_web_tipping_enabled': False,
1336                 'freedom_of_speech_not_reach_fetch_enabled': True,
1337                 'standardized_nudges_misinfo': True,
1338                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1339                 'longform_notetweets_rich_text_read_enabled': True,
1340                 'longform_notetweets_inline_media_enabled': True,
1341                 'responsive_web_graphql_exclude_directive_enabled': True,
1342                 'verified_phone_label_enabled': False,
1343                 'responsive_web_media_download_video_enabled': False,
1344                 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1345                 'responsive_web_graphql_timeline_navigation_enabled': True,
1346                 'responsive_web_enhance_cards_enabled': False
1347             },
1348             'fieldToggles': {
1349                 'withArticleRichContentState': False
1350             }
1351         }
1352
1353     def _call_syndication_api(self, twid):
1354         self.report_warning(
1355             'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1356         status = self._download_json(
1357             'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1358             headers={'User-Agent': 'Googlebot'}, query={
1359                 'id': twid,
1360                 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1361                 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1362             })
1363         if not status:
1364             raise ExtractorError('Syndication endpoint returned empty JSON response')
1365         # Transform the result so its structure matches that of legacy/graphql
1366         media = []
1367         for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1368             detail['id_str'] = traverse_obj(detail, (
1369                 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1370             media.append(detail)
1371         status['extended_entities'] = {'media': media}
1372
1373         return status
1374
1375     def _extract_status(self, twid):
1376         if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1377             raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1378
1379         try:
1380             if self.is_logged_in or self._selected_api == 'graphql':
1381                 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1382             elif self._selected_api == 'legacy':
1383                 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1384                     'cards_platform': 'Web-12',
1385                     'include_cards': 1,
1386                     'include_reply_count': 1,
1387                     'include_user_entities': 0,
1388                     'tweet_mode': 'extended',
1389                 })
1390         except ExtractorError as e:
1391             if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1392                 raise
1393             self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1394             status = self._call_syndication_api(twid)
1395
1396         if self._selected_api == 'syndication':
1397             status = self._call_syndication_api(twid)
1398
1399         return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1400
1401     def _real_extract(self, url):
1402         twid, selected_index = self._match_valid_url(url).group('id', 'index')
1403         status = self._extract_status(twid)
1404
1405         title = description = traverse_obj(
1406             status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1407         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
1408         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1409         user = status.get('user') or {}
1410         uploader = user.get('name')
1411         if uploader:
1412             title = f'{uploader} - {title}'
1413         uploader_id = user.get('screen_name')
1414
1415         info = {
1416             'id': twid,
1417             'title': title,
1418             'description': description,
1419             'uploader': uploader,
1420             'timestamp': unified_timestamp(status.get('created_at')),
1421             'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
1422             'uploader_id': uploader_id,
1423             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1424             'like_count': int_or_none(status.get('favorite_count')),
1425             'repost_count': int_or_none(status.get('retweet_count')),
1426             'comment_count': int_or_none(status.get('reply_count')),
1427             'age_limit': 18 if status.get('possibly_sensitive') else 0,
1428             'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1429         }
1430
1431         def extract_from_video_info(media):
1432             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1433             self.write_debug(f'Extracting from video info: {media_id}')
1434
1435             formats = []
1436             subtitles = {}
1437             for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1438                 fmts, subs = self._extract_variant_formats(variant, twid)
1439                 subtitles = self._merge_subtitles(subtitles, subs)
1440                 formats.extend(fmts)
1441
1442             thumbnails = []
1443             media_url = media.get('media_url_https') or media.get('media_url')
1444             if media_url:
1445                 def add_thumbnail(name, size):
1446                     thumbnails.append({
1447                         'id': name,
1448                         'url': update_url_query(media_url, {'name': name}),
1449                         'width': int_or_none(size.get('w') or size.get('width')),
1450                         'height': int_or_none(size.get('h') or size.get('height')),
1451                     })
1452                 for name, size in media.get('sizes', {}).items():
1453                     add_thumbnail(name, size)
1454                 add_thumbnail('orig', media.get('original_info') or {})
1455
1456             return {
1457                 'id': media_id,
1458                 'formats': formats,
1459                 'subtitles': subtitles,
1460                 'thumbnails': thumbnails,
1461                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),  # No longer available
1462                 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1463                 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1464                 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'),  # http format codec is unknown
1465             }
1466
1467         def extract_from_card_info(card):
1468             if not card:
1469                 return
1470
1471             self.write_debug(f'Extracting from card info: {card.get("url")}')
1472             binding_values = card['binding_values']
1473
1474             def get_binding_value(k):
1475                 o = binding_values.get(k) or {}
1476                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1477
1478             card_name = card['name'].split(':')[-1]
1479             if card_name == 'player':
1480                 yield {
1481                     '_type': 'url',
1482                     'url': get_binding_value('player_url'),
1483                 }
1484             elif card_name == 'periscope_broadcast':
1485                 yield {
1486                     '_type': 'url',
1487                     'url': get_binding_value('url') or get_binding_value('player_url'),
1488                     'ie_key': PeriscopeIE.ie_key(),
1489                 }
1490             elif card_name == 'broadcast':
1491                 yield {
1492                     '_type': 'url',
1493                     'url': get_binding_value('broadcast_url'),
1494                     'ie_key': TwitterBroadcastIE.ie_key(),
1495                 }
1496             elif card_name == 'audiospace':
1497                 yield {
1498                     '_type': 'url',
1499                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1500                     'ie_key': TwitterSpacesIE.ie_key(),
1501                 }
1502             elif card_name == 'summary':
1503                 yield {
1504                     '_type': 'url',
1505                     'url': get_binding_value('card_url'),
1506                 }
1507             elif card_name == 'unified_card':
1508                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1509                 yield from map(extract_from_video_info, traverse_obj(
1510                     unified_card, ('media_entities', ...), expected_type=dict))
1511             # amplify, promo_video_website, promo_video_convo, appplayer,
1512             # video_direct_message, poll2choice_video, poll3choice_video,
1513             # poll4choice_video, ...
1514             else:
1515                 is_amplify = card_name == 'amplify'
1516                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1517                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1518                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1519
1520                 thumbnails = []
1521                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1522                     image = get_binding_value('player_image' + suffix) or {}
1523                     image_url = image.get('url')
1524                     if not image_url or '/player-placeholder' in image_url:
1525                         continue
1526                     thumbnails.append({
1527                         'id': suffix[1:] if suffix else 'medium',
1528                         'url': image_url,
1529                         'width': int_or_none(image.get('width')),
1530                         'height': int_or_none(image.get('height')),
1531                     })
1532
1533                 yield {
1534                     'formats': formats,
1535                     'subtitles': subtitles,
1536                     'thumbnails': thumbnails,
1537                     'duration': int_or_none(get_binding_value(
1538                         'content_duration_seconds')),
1539                 }
1540
1541         videos = traverse_obj(status, (
1542             (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1543
1544         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1545             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1546         else:
1547             desired_obj = traverse_obj(status, (
1548                 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1549             if not desired_obj:
1550                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1551             elif desired_obj.get('type') != 'video':
1552                 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1553
1554             # Restore original archive id and video index in title
1555             for index, entry in enumerate(videos, 1):
1556                 if entry.get('id') != desired_obj.get('id'):
1557                     continue
1558                 if index == 1:
1559                     info['_old_archive_ids'] = [make_archive_id(self, twid)]
1560                 if len(videos) != 1:
1561                     info['title'] += f' #{index}'
1562                 break
1563
1564             return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1565
1566         entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1567         if not entries:
1568             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1569             if not expanded_url or expanded_url == url:
1570                 self.raise_no_formats('No video could be found in this tweet', expected=True)
1571                 return info
1572
1573             return self.url_result(expanded_url, display_id=twid, **info)
1574
1575         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1576
1577         if len(entries) == 1:
1578             return entries[0]
1579
1580         for index, entry in enumerate(entries, 1):
1581             entry['title'] += f' #{index}'
1582
1583         return self.playlist_result(entries, **info)
1584
1585
1586 class TwitterAmplifyIE(TwitterBaseIE):
1587     IE_NAME = 'twitter:amplify'
1588     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1589
1590     _TEST = {
1591         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1592         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1593         'info_dict': {
1594             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1595             'ext': 'mp4',
1596             'title': 'Twitter Video',
1597             'thumbnail': 're:^https?://.*',
1598         },
1599         'params': {'format': '[protocol=https]'},
1600     }
1601
1602     def _real_extract(self, url):
1603         video_id = self._match_id(url)
1604         webpage = self._download_webpage(url, video_id)
1605
1606         vmap_url = self._html_search_meta(
1607             'twitter:amplify:vmap', webpage, 'vmap url')
1608         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1609
1610         thumbnails = []
1611         thumbnail = self._html_search_meta(
1612             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1613
1614         def _find_dimension(target):
1615             w = int_or_none(self._html_search_meta(
1616                 'twitter:%s:width' % target, webpage, fatal=False))
1617             h = int_or_none(self._html_search_meta(
1618                 'twitter:%s:height' % target, webpage, fatal=False))
1619             return w, h
1620
1621         if thumbnail:
1622             thumbnail_w, thumbnail_h = _find_dimension('image')
1623             thumbnails.append({
1624                 'url': thumbnail,
1625                 'width': thumbnail_w,
1626                 'height': thumbnail_h,
1627             })
1628
1629         video_w, video_h = _find_dimension('player')
1630         formats[0].update({
1631             'width': video_w,
1632             'height': video_h,
1633         })
1634
1635         return {
1636             'id': video_id,
1637             'title': 'Twitter Video',
1638             'formats': formats,
1639             'thumbnails': thumbnails,
1640         }
1641
1642
1643 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1644     IE_NAME = 'twitter:broadcast'
1645     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1646
1647     _TESTS = [{
1648         # untitled Periscope video
1649         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1650         'info_dict': {
1651             'id': '1yNGaQLWpejGj',
1652             'ext': 'mp4',
1653             'title': 'Andrea May Sahouri - Periscope Broadcast',
1654             'uploader': 'Andrea May Sahouri',
1655             'uploader_id': 'andreamsahouri',
1656             'uploader_url': 'https://twitter.com/andreamsahouri',
1657             'timestamp': 1590973638,
1658             'upload_date': '20200601',
1659             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1660             'view_count': int,
1661         },
1662     }, {
1663         'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1664         'info_dict': {
1665             'id': '1ZkKzeyrPbaxv',
1666             'ext': 'mp4',
1667             'title': 'Starship | SN10 | High-Altitude Flight Test',
1668             'uploader': 'SpaceX',
1669             'uploader_id': 'SpaceX',
1670             'uploader_url': 'https://twitter.com/SpaceX',
1671             'timestamp': 1614812942,
1672             'upload_date': '20210303',
1673             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1674             'view_count': int,
1675         },
1676     }, {
1677         'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1678         'info_dict': {
1679             'id': '1OyKAVQrgzwGb',
1680             'ext': 'mp4',
1681             'title': 'Starship Flight Test',
1682             'uploader': 'SpaceX',
1683             'uploader_id': 'SpaceX',
1684             'uploader_url': 'https://twitter.com/SpaceX',
1685             'timestamp': 1681993964,
1686             'upload_date': '20230420',
1687             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1688             'view_count': int,
1689         },
1690     }]
1691
1692     def _real_extract(self, url):
1693         broadcast_id = self._match_id(url)
1694         broadcast = self._call_api(
1695             'broadcasts/show.json', broadcast_id,
1696             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1697         if not broadcast:
1698             raise ExtractorError('Broadcast no longer exists', expected=True)
1699         info = self._parse_broadcast_data(broadcast, broadcast_id)
1700         info['title'] = broadcast.get('status') or info.get('title')
1701         info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1702         info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1703         if info['live_status'] == 'is_upcoming':
1704             return info
1705
1706         media_key = broadcast['media_key']
1707         source = self._call_api(
1708             f'live_video_stream/status/{media_key}', media_key)['source']
1709         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1710         if '/live_video_stream/geoblocked/' in m3u8_url:
1711             self.raise_geo_restricted()
1712         m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1713             m3u8_url).query).get('type', [None])[0]
1714         state, width, height = self._extract_common_format_info(broadcast)
1715         info['formats'] = self._extract_pscp_m3u8_formats(
1716             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1717         return info
1718
1719
1720 class TwitterSpacesIE(TwitterBaseIE):
1721     IE_NAME = 'twitter:spaces'
1722     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1723
1724     _TESTS = [{
1725         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1726         'info_dict': {
1727             'id': '1RDxlgyvNXzJL',
1728             'ext': 'm4a',
1729             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1730             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1731             'uploader': r're:Lucio Di Gaetano.*?',
1732             'uploader_id': 'luciodigaetano',
1733             'live_status': 'was_live',
1734             'timestamp': 1659877956,
1735             'upload_date': '20220807',
1736             'release_timestamp': 1659904215,
1737             'release_date': '20220807',
1738         },
1739         'params': {'skip_download': 'm3u8'},
1740     }, {
1741         # post_live/TimedOut but downloadable
1742         'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1743         'info_dict': {
1744             'id': '1vAxRAVQWONJl',
1745             'ext': 'm4a',
1746             'title': 'Framing Up FinOps: Billing Tools',
1747             'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1748             'uploader': 'Google Cloud',
1749             'uploader_id': 'googlecloud',
1750             'live_status': 'post_live',
1751             'timestamp': 1681409554,
1752             'upload_date': '20230413',
1753             'release_timestamp': 1681839000,
1754             'release_date': '20230418',
1755         },
1756         'params': {'skip_download': 'm3u8'},
1757     }, {
1758         # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1759         'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1760         'info_dict': {
1761             'id': '1eaKbrQbjoRKX',
1762             'ext': 'm4a',
1763             'title': 'あ',
1764             'description': 'Twitter Space participated by nobody yet',
1765             'uploader': '息根とめる🔪Twitchで復活',
1766             'uploader_id': 'tomeru_ikinone',
1767             'live_status': 'was_live',
1768             'timestamp': 1685617198,
1769             'upload_date': '20230601',
1770         },
1771         'params': {'skip_download': 'm3u8'},
1772     }]
1773
1774     SPACE_STATUS = {
1775         'notstarted': 'is_upcoming',
1776         'ended': 'was_live',
1777         'running': 'is_live',
1778         'timedout': 'post_live',
1779     }
1780
1781     def _build_graphql_query(self, space_id):
1782         return {
1783             'variables': {
1784                 'id': space_id,
1785                 'isMetatagsQuery': True,
1786                 'withDownvotePerspective': False,
1787                 'withReactionsMetadata': False,
1788                 'withReactionsPerspective': False,
1789                 'withReplays': True,
1790                 'withSuperFollowsUserFields': True,
1791                 'withSuperFollowsTweetFields': True,
1792             },
1793             'features': {
1794                 'dont_mention_me_view_api_enabled': True,
1795                 'interactive_text_enabled': True,
1796                 'responsive_web_edit_tweet_api_enabled': True,
1797                 'responsive_web_enhance_cards_enabled': True,
1798                 'responsive_web_uc_gql_enabled': True,
1799                 'spaces_2022_h2_clipping': True,
1800                 'spaces_2022_h2_spaces_communities': False,
1801                 'standardized_nudges_misinfo': True,
1802                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1803                 'vibe_api_enabled': True,
1804             },
1805         }
1806
1807     def _real_extract(self, url):
1808         space_id = self._match_id(url)
1809         if not self.is_logged_in:
1810             self.raise_login_required('Twitter Spaces require authentication')
1811         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1812         if not space_data:
1813             raise ExtractorError('Twitter Space not found', expected=True)
1814
1815         metadata = space_data['metadata']
1816         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1817         is_live = live_status == 'is_live'
1818
1819         formats = []
1820         headers = {'Referer': 'https://twitter.com/'}
1821         if live_status == 'is_upcoming':
1822             self.raise_no_formats('Twitter Space not started yet', expected=True)
1823         elif not is_live and not metadata.get('is_space_available_for_replay'):
1824             self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1825         elif metadata.get('media_key'):
1826             source = traverse_obj(
1827                 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1828                 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1829             formats = self._extract_m3u8_formats(  # XXX: Some Spaces need ffmpeg as downloader
1830                 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
1831                 headers=headers, fatal=False) if source else []
1832             for fmt in formats:
1833                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1834                 if not is_live:
1835                     fmt['container'] = 'm4a_dash'
1836
1837         participants = ', '.join(traverse_obj(
1838             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1839
1840         if not formats and live_status == 'post_live':
1841             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1842
1843         return {
1844             'id': space_id,
1845             'title': metadata.get('title'),
1846             'description': f'Twitter Space participated by {participants}',
1847             'uploader': traverse_obj(
1848                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1849             'uploader_id': traverse_obj(
1850                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1851             'live_status': live_status,
1852             'release_timestamp': try_call(
1853                 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1854             'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1855             'formats': formats,
1856             'http_headers': headers,
1857         }
1858
1859
1860 class TwitterShortenerIE(TwitterBaseIE):
1861     IE_NAME = 'twitter:shortener'
1862     _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1863     _BASE_URL = 'https://t.co/'
1864
1865     def _real_extract(self, url):
1866         mobj = self._match_valid_url(url)
1867         eid, id = mobj.group('eid', 'id')
1868         if eid:
1869             id = eid
1870             url = self._BASE_URL + id
1871         new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
1872         __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1873         if new_url.startswith(__UNSAFE_LINK):
1874             new_url = new_url.replace(__UNSAFE_LINK, "")
1875         return self.url_result(new_url)