yt_dlp/extractor/twitter.py

   1 import json
   2 import re
   3
   4 from .common import InfoExtractor
   5 from .periscope import PeriscopeBaseIE, PeriscopeIE
   6 from ..compat import (
   7     compat_parse_qs,
   8     compat_urllib_parse_unquote,
   9     compat_urllib_parse_urlparse,
  10 )
  11 from ..utils import (
  12     ExtractorError,
  13     dict_get,
  14     float_or_none,
  15     format_field,
  16     int_or_none,
  17     make_archive_id,
  18     remove_end,
  19     str_or_none,
  20     strip_or_none,
  21     traverse_obj,
  22     try_call,
  23     try_get,
  24     unified_timestamp,
  25     update_url_query,
  26     url_or_none,
  27     xpath_text,
  28 )
  29
  30
  31 class TwitterBaseIE(InfoExtractor):
  32     _NETRC_MACHINE = 'twitter'
  33     _API_BASE = 'https://api.twitter.com/1.1/'
  34     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
  35     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  36     _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
  37     _guest_token = None
  38     _flow_token = None
  39
  40     _LOGIN_INIT_DATA = json.dumps({
  41         'input_flow_data': {
  42             'flow_context': {
  43                 'debug_overrides': {},
  44                 'start_location': {
  45                     'location': 'unknown'
  46                 }
  47             }
  48         },
  49         'subtask_versions': {
  50             'action_list': 2,
  51             'alert_dialog': 1,
  52             'app_download_cta': 1,
  53             'check_logged_in_account': 1,
  54             'choice_selection': 3,
  55             'contacts_live_sync_permission_prompt': 0,
  56             'cta': 7,
  57             'email_verification': 2,
  58             'end_flow': 1,
  59             'enter_date': 1,
  60             'enter_email': 2,
  61             'enter_password': 5,
  62             'enter_phone': 2,
  63             'enter_recaptcha': 1,
  64             'enter_text': 5,
  65             'enter_username': 2,
  66             'generic_urt': 3,
  67             'in_app_notification': 1,
  68             'interest_picker': 3,
  69             'js_instrumentation': 1,
  70             'menu_dialog': 1,
  71             'notifications_permission_prompt': 2,
  72             'open_account': 2,
  73             'open_home_timeline': 1,
  74             'open_link': 1,
  75             'phone_verification': 4,
  76             'privacy_options': 1,
  77             'security_key': 3,
  78             'select_avatar': 4,
  79             'select_banner': 2,
  80             'settings_list': 7,
  81             'show_code': 1,
  82             'sign_up': 2,
  83             'sign_up_review': 4,
  84             'tweet_selection_urt': 1,
  85             'update_users': 1,
  86             'upload_media': 1,
  87             'user_recommendations_list': 4,
  88             'user_recommendations_urt': 1,
  89             'wait_spinner': 3,
  90             'web_modal': 1
  91         }
  92     }, separators=(',', ':')).encode()
  93
  94     def _extract_variant_formats(self, variant, video_id):
  95         variant_url = variant.get('url')
  96         if not variant_url:
  97             return [], {}
  98         elif '.m3u8' in variant_url:
  99             return self._extract_m3u8_formats_and_subtitles(
 100                 variant_url, video_id, 'mp4', 'm3u8_native',
 101                 m3u8_id='hls', fatal=False)
 102         else:
 103             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
 104             f = {
 105                 'url': variant_url,
 106                 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
 107                 'tbr': tbr,
 108             }
 109             self._search_dimensions_in_video_url(f, variant_url)
 110             return [f], {}
 111
 112     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
 113         vmap_url = url_or_none(vmap_url)
 114         if not vmap_url:
 115             return [], {}
 116         vmap_data = self._download_xml(vmap_url, video_id)
 117         formats = []
 118         subtitles = {}
 119         urls = []
 120         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
 121             video_variant.attrib['url'] = compat_urllib_parse_unquote(
 122                 video_variant.attrib['url'])
 123             urls.append(video_variant.attrib['url'])
 124             fmts, subs = self._extract_variant_formats(
 125                 video_variant.attrib, video_id)
 126             formats.extend(fmts)
 127             subtitles = self._merge_subtitles(subtitles, subs)
 128         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
 129         if video_url not in urls:
 130             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
 131             formats.extend(fmts)
 132             subtitles = self._merge_subtitles(subtitles, subs)
 133         return formats, subtitles
 134
 135     @staticmethod
 136     def _search_dimensions_in_video_url(a_format, video_url):
 137         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
 138         if m:
 139             a_format.update({
 140                 'width': int(m.group('width')),
 141                 'height': int(m.group('height')),
 142             })
 143
 144     @property
 145     def is_logged_in(self):
 146         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
 147
 148     def _fetch_guest_token(self, headers, display_id):
 149         headers.pop('x-guest-token', None)
 150         self._guest_token = traverse_obj(self._download_json(
 151             f'{self._API_BASE}guest/activate.json', display_id,
 152             'Downloading guest token', data=b'', headers=headers), 'guest_token')
 153         if not self._guest_token:
 154             raise ExtractorError('Could not retrieve guest token')
 155
 156     def _set_base_headers(self):
 157         headers = self._AUTH.copy()
 158         csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
 159         if csrf_token:
 160             headers['x-csrf-token'] = csrf_token
 161         return headers
 162
 163     def _call_login_api(self, note, headers, query={}, data=None):
 164         response = self._download_json(
 165             f'{self._API_BASE}onboarding/task.json', None, note,
 166             headers=headers, query=query, data=data, expected_status=400)
 167         error = traverse_obj(response, ('errors', 0, 'message', {str}))
 168         if error:
 169             raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
 170         elif traverse_obj(response, 'status') != 'success':
 171             raise ExtractorError('Login was unsuccessful')
 172
 173         subtask = traverse_obj(
 174             response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
 175         if not subtask:
 176             raise ExtractorError('Twitter API did not return next login subtask')
 177
 178         self._flow_token = response['flow_token']
 179
 180         return subtask
 181
 182     def _perform_login(self, username, password):
 183         if self.is_logged_in:
 184             return
 185
 186         self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
 187         headers = self._set_base_headers()
 188         self._fetch_guest_token(headers, None)
 189         headers.update({
 190             'content-type': 'application/json',
 191             'x-guest-token': self._guest_token,
 192             'x-twitter-client-language': 'en',
 193             'x-twitter-active-user': 'yes',
 194             'Referer': 'https://twitter.com/',
 195             'Origin': 'https://twitter.com',
 196         })
 197
 198         def build_login_json(*subtask_inputs):
 199             return json.dumps({
 200                 'flow_token': self._flow_token,
 201                 'subtask_inputs': subtask_inputs
 202             }, separators=(',', ':')).encode()
 203
 204         def input_dict(subtask_id, text):
 205             return {
 206                 'subtask_id': subtask_id,
 207                 'enter_text': {
 208                     'text': text,
 209                     'link': 'next_link'
 210                 }
 211             }
 212
 213         next_subtask = self._call_login_api(
 214             'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
 215
 216         while not self.is_logged_in:
 217             if next_subtask == 'LoginJsInstrumentationSubtask':
 218                 next_subtask = self._call_login_api(
 219                     'Submitting JS instrumentation response', headers, data=build_login_json({
 220                         'subtask_id': next_subtask,
 221                         'js_instrumentation': {
 222                             'response': '{}',
 223                             'link': 'next_link'
 224                         }
 225                     }))
 226
 227             elif next_subtask == 'LoginEnterUserIdentifierSSO':
 228                 next_subtask = self._call_login_api(
 229                     'Submitting username', headers, data=build_login_json({
 230                         'subtask_id': next_subtask,
 231                         'settings_list': {
 232                             'setting_responses': [{
 233                                 'key': 'user_identifier',
 234                                 'response_data': {
 235                                     'text_data': {
 236                                         'result': username
 237                                     }
 238                                 }
 239                             }],
 240                             'link': 'next_link'
 241                         }
 242                     }))
 243
 244             elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
 245                 next_subtask = self._call_login_api(
 246                     'Submitting alternate identifier', headers,
 247                     data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
 248                         'one of username, phone number or email that was not used as --username'))))
 249
 250             elif next_subtask == 'LoginEnterPassword':
 251                 next_subtask = self._call_login_api(
 252                     'Submitting password', headers, data=build_login_json({
 253                         'subtask_id': next_subtask,
 254                         'enter_password': {
 255                             'password': password,
 256                             'link': 'next_link'
 257                         }
 258                     }))
 259
 260             elif next_subtask == 'AccountDuplicationCheck':
 261                 next_subtask = self._call_login_api(
 262                     'Submitting account duplication check', headers, data=build_login_json({
 263                         'subtask_id': next_subtask,
 264                         'check_logged_in_account': {
 265                             'link': 'AccountDuplicationCheck_false'
 266                         }
 267                     }))
 268
 269             elif next_subtask == 'LoginTwoFactorAuthChallenge':
 270                 next_subtask = self._call_login_api(
 271                     'Submitting 2FA token', headers, data=build_login_json(input_dict(
 272                         next_subtask, self._get_tfa_info('two-factor authentication token'))))
 273
 274             elif next_subtask == 'LoginAcid':
 275                 next_subtask = self._call_login_api(
 276                     'Submitting confirmation code', headers, data=build_login_json(input_dict(
 277                         next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
 278
 279             elif next_subtask == 'LoginSuccessSubtask':
 280                 raise ExtractorError('Twitter API did not grant auth token cookie')
 281
 282             else:
 283                 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
 284
 285         self.report_login()
 286
 287     def _call_api(self, path, video_id, query={}, graphql=False):
 288         headers = self._set_base_headers()
 289         if self.is_logged_in:
 290             headers.update({
 291                 'x-twitter-auth-type': 'OAuth2Session',
 292                 'x-twitter-client-language': 'en',
 293                 'x-twitter-active-user': 'yes',
 294             })
 295
 296         for first_attempt in (True, False):
 297             if not self.is_logged_in:
 298                 if not self._guest_token:
 299                     self._fetch_guest_token(headers, video_id)
 300                 headers['x-guest-token'] = self._guest_token
 301
 302             allowed_status = {400, 401, 403, 404} if graphql else {403}
 303             result = self._download_json(
 304                 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
 305                 video_id, headers=headers, query=query, expected_status=allowed_status,
 306                 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
 307
 308             if result.get('errors'):
 309                 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
 310                 if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
 311                     self.to_screen('Guest token has expired. Refreshing guest token')
 312                     self._guest_token = None
 313                     continue
 314
 315                 raise ExtractorError(
 316                     f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
 317
 318             return result
 319
 320     def _build_graphql_query(self, media_id):
 321         raise NotImplementedError('Method must be implemented to support GraphQL')
 322
 323     def _call_graphql_api(self, endpoint, media_id):
 324         data = self._build_graphql_query(media_id)
 325         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 326         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 327
 328
 329 class TwitterCardIE(InfoExtractor):
 330     IE_NAME = 'twitter:card'
 331     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 332     _TESTS = [
 333         {
 334             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 335             # MD5 checksums are different in different places
 336             'info_dict': {
 337                 'id': '560070131976392705',
 338                 'ext': 'mp4',
 339                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 340                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 341                 'uploader': 'Twitter',
 342                 'uploader_id': 'Twitter',
 343                 'thumbnail': r're:^https?://.*\.jpg',
 344                 'duration': 30.033,
 345                 'timestamp': 1422366112,
 346                 'upload_date': '20150127',
 347                 'age_limit': 0,
 348                 'comment_count': int,
 349                 'tags': [],
 350                 'repost_count': int,
 351                 'like_count': int,
 352                 'display_id': '560070183650213889',
 353                 'uploader_url': 'https://twitter.com/Twitter',
 354             },
 355         },
 356         {
 357             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 358             'md5': '7137eca597f72b9abbe61e5ae0161399',
 359             'info_dict': {
 360                 'id': '623160978427936768',
 361                 'ext': 'mp4',
 362                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 363                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 364                 'uploader': 'NASA',
 365                 'uploader_id': 'NASA',
 366                 'timestamp': 1437408129,
 367                 'upload_date': '20150720',
 368                 'uploader_url': 'https://twitter.com/NASA',
 369                 'age_limit': 0,
 370                 'comment_count': int,
 371                 'like_count': int,
 372                 'repost_count': int,
 373                 'tags': ['PlutoFlyby'],
 374             },
 375             'params': {'format': '[protocol=https]'}
 376         },
 377         {
 378             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 379             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 380             'info_dict': {
 381                 'id': 'dq4Oj5quskI',
 382                 'ext': 'mp4',
 383                 'title': 'Ubuntu 11.10 Overview',
 384                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 385                 'upload_date': '20111013',
 386                 'uploader': 'OMG! UBUNTU!',
 387                 'uploader_id': 'omgubuntu',
 388                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 389                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 390                 'channel_follower_count': int,
 391                 'chapters': 'count:8',
 392                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 393                 'duration': 138,
 394                 'categories': ['Film & Animation'],
 395                 'age_limit': 0,
 396                 'comment_count': int,
 397                 'availability': 'public',
 398                 'like_count': int,
 399                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 400                 'view_count': int,
 401                 'tags': 'count:12',
 402                 'channel': 'OMG! UBUNTU!',
 403                 'playable_in_embed': True,
 404             },
 405             'add_ie': ['Youtube'],
 406         },
 407         {
 408             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 409             'info_dict': {
 410                 'id': 'iBb2x00UVlv',
 411                 'ext': 'mp4',
 412                 'upload_date': '20151113',
 413                 'uploader_id': '1189339351084113920',
 414                 'uploader': 'ArsenalTerje',
 415                 'title': 'Vine by ArsenalTerje',
 416                 'timestamp': 1447451307,
 417                 'alt_title': 'Vine by ArsenalTerje',
 418                 'comment_count': int,
 419                 'like_count': int,
 420                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 421                 'view_count': int,
 422                 'repost_count': int,
 423             },
 424             'add_ie': ['Vine'],
 425             'params': {'skip_download': 'm3u8'},
 426         },
 427         {
 428             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 429             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 430             'info_dict': {
 431                 'id': '705235433198714880',
 432                 'ext': 'mp4',
 433                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 434                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 435                 'uploader': 'Brent Yarina',
 436                 'uploader_id': 'BTNBrentYarina',
 437                 'timestamp': 1456976204,
 438                 'upload_date': '20160303',
 439             },
 440             'skip': 'This content is no longer available.',
 441         },
 442         {
 443             'url': 'https://twitter.com/i/videos/752274308186120192',
 444             'only_matching': True,
 445         },
 446     ]
 447
 448     def _real_extract(self, url):
 449         status_id = self._match_id(url)
 450         return self.url_result(
 451             'https://twitter.com/statuses/' + status_id,
 452             TwitterIE.ie_key(), status_id)
 453
 454
 455 class TwitterIE(TwitterBaseIE):
 456     IE_NAME = 'twitter'
 457     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 458
 459     _TESTS = [{
 460         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 461         'info_dict': {
 462             'id': '643211870443208704',
 463             'display_id': '643211948184596480',
 464             'ext': 'mp4',
 465             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 466             'thumbnail': r're:^https?://.*\.jpg',
 467             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 468             'uploader': 'FREE THE NIPPLE',
 469             'uploader_id': 'freethenipple',
 470             'duration': 12.922,
 471             'timestamp': 1442188653,
 472             'upload_date': '20150913',
 473             'uploader_url': 'https://twitter.com/freethenipple',
 474             'comment_count': int,
 475             'repost_count': int,
 476             'like_count': int,
 477             'view_count': int,
 478             'tags': [],
 479             'age_limit': 18,
 480         },
 481     }, {
 482         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 483         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 484         'info_dict': {
 485             'id': '657991469417025536',
 486             'ext': 'mp4',
 487             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 488             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 489             'thumbnail': r're:^https?://.*\.png',
 490             'uploader': 'Gifs',
 491             'uploader_id': 'giphz',
 492         },
 493         'expected_warnings': ['height', 'width'],
 494         'skip': 'Account suspended',
 495     }, {
 496         'url': 'https://twitter.com/starwars/status/665052190608723968',
 497         'info_dict': {
 498             'id': '665052190608723968',
 499             'display_id': '665052190608723968',
 500             'ext': 'mp4',
 501             'title': r're:Star Wars.*A new beginning is coming December 18.*',
 502             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 503             'uploader_id': 'starwars',
 504             'uploader': r're:Star Wars.*',
 505             'timestamp': 1447395772,
 506             'upload_date': '20151113',
 507             'uploader_url': 'https://twitter.com/starwars',
 508             'comment_count': int,
 509             'repost_count': int,
 510             'like_count': int,
 511             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 512             'age_limit': 0,
 513         },
 514     }, {
 515         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 516         'info_dict': {
 517             'id': '705235433198714880',
 518             'ext': 'mp4',
 519             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 520             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 521             'uploader_id': 'BTNBrentYarina',
 522             'uploader': 'Brent Yarina',
 523             'timestamp': 1456976204,
 524             'upload_date': '20160303',
 525             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 526             'comment_count': int,
 527             'repost_count': int,
 528             'like_count': int,
 529             'tags': [],
 530             'age_limit': 0,
 531         },
 532         'params': {
 533             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 534             # Test case of TwitterCardIE
 535             'skip_download': True,
 536         },
 537         'skip': 'Dead external link',
 538     }, {
 539         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 540         'info_dict': {
 541             'id': '700207414000242688',
 542             'display_id': '700207533655363584',
 543             'ext': 'mp4',
 544             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 545             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 546             'thumbnail': r're:^https?://.*\.jpg',
 547             'uploader': 'jaydin donte geer',
 548             'uploader_id': 'jaydingeer',
 549             'duration': 30.0,
 550             'timestamp': 1455777459,
 551             'upload_date': '20160218',
 552             'uploader_url': 'https://twitter.com/jaydingeer',
 553             'comment_count': int,
 554             'repost_count': int,
 555             'like_count': int,
 556             'view_count': int,
 557             'tags': ['Damndaniel'],
 558             'age_limit': 0,
 559         },
 560     }, {
 561         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 562         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 563         'info_dict': {
 564             'id': 'MIOxnrUteUd',
 565             'ext': 'mp4',
 566             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 567             'uploader': 'TAKUMA',
 568             'uploader_id': '1004126642786242560',
 569             'timestamp': 1402826626,
 570             'upload_date': '20140615',
 571             'thumbnail': r're:^https?://.*\.jpg',
 572             'alt_title': 'Vine by TAKUMA',
 573             'comment_count': int,
 574             'repost_count': int,
 575             'like_count': int,
 576             'view_count': int,
 577         },
 578         'add_ie': ['Vine'],
 579     }, {
 580         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 581         'info_dict': {
 582             'id': '717462543795523584',
 583             'display_id': '719944021058060289',
 584             'ext': 'mp4',
 585             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 586             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 587             'uploader_id': 'CaptainAmerica',
 588             'uploader': 'Captain America',
 589             'duration': 3.17,
 590             'timestamp': 1460483005,
 591             'upload_date': '20160412',
 592             'uploader_url': 'https://twitter.com/CaptainAmerica',
 593             'thumbnail': r're:^https?://.*\.jpg',
 594             'comment_count': int,
 595             'repost_count': int,
 596             'like_count': int,
 597             'view_count': int,
 598             'tags': [],
 599             'age_limit': 0,
 600         },
 601     }, {
 602         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 603         'info_dict': {
 604             'id': '1zqKVVlkqLaKB',
 605             'ext': 'mp4',
 606             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 607             'upload_date': '20160923',
 608             'uploader_id': '1PmKqpJdOJQoY',
 609             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 610             'timestamp': 1474613214,
 611             'thumbnail': r're:^https?://.*\.jpg',
 612         },
 613         'add_ie': ['Periscope'],
 614     }, {
 615         # has mp4 formats via mobile API
 616         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 617         'info_dict': {
 618             'id': '852138619213144067',
 619             'ext': 'mp4',
 620             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 621             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 622             'uploader': 'عالم الأخبار',
 623             'uploader_id': 'news_al3alm',
 624             'duration': 277.4,
 625             'timestamp': 1492000653,
 626             'upload_date': '20170412',
 627         },
 628         'skip': 'Account suspended',
 629     }, {
 630         'url': 'https://twitter.com/i/web/status/910031516746514432',
 631         'info_dict': {
 632             'id': '910030238373089285',
 633             'display_id': '910031516746514432',
 634             'ext': 'mp4',
 635             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 636             'thumbnail': r're:^https?://.*\.jpg',
 637             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 638             'uploader': 'Préfet de Guadeloupe',
 639             'uploader_id': 'Prefet971',
 640             'duration': 47.48,
 641             'timestamp': 1505803395,
 642             'upload_date': '20170919',
 643             'uploader_url': 'https://twitter.com/Prefet971',
 644             'comment_count': int,
 645             'repost_count': int,
 646             'like_count': int,
 647             'view_count': int,
 648             'tags': ['Maria'],
 649             'age_limit': 0,
 650         },
 651         'params': {
 652             'skip_download': True,  # requires ffmpeg
 653         },
 654     }, {
 655         # card via api.twitter.com/1.1/videos/tweet/config
 656         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 657         'info_dict': {
 658             'id': '1001551417340022785',
 659             'display_id': '1001551623938805763',
 660             'ext': 'mp4',
 661             'title': 're:.*?Shep is on a roll today.*?',
 662             'thumbnail': r're:^https?://.*\.jpg',
 663             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 664             'uploader': 'Lis Power',
 665             'uploader_id': 'LisPower1',
 666             'duration': 111.278,
 667             'timestamp': 1527623489,
 668             'upload_date': '20180529',
 669             'uploader_url': 'https://twitter.com/LisPower1',
 670             'comment_count': int,
 671             'repost_count': int,
 672             'like_count': int,
 673             'view_count': int,
 674             'tags': [],
 675             'age_limit': 0,
 676         },
 677         'params': {
 678             'skip_download': True,  # requires ffmpeg
 679         },
 680     }, {
 681         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 682         'info_dict': {
 683             'id': '1087791272830607360',
 684             'display_id': '1087791357756956680',
 685             'ext': 'mp4',
 686             'title': 'Twitter - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 687             'thumbnail': r're:^https?://.*\.jpg',
 688             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 689             'uploader': 'Twitter',
 690             'uploader_id': 'Twitter',
 691             'duration': 61.567,
 692             'timestamp': 1548184644,
 693             'upload_date': '20190122',
 694             'uploader_url': 'https://twitter.com/Twitter',
 695             'comment_count': int,
 696             'repost_count': int,
 697             'like_count': int,
 698             'view_count': int,
 699             'tags': [],
 700             'age_limit': 0,
 701         },
 702     }, {
 703         # not available in Periscope
 704         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 705         'info_dict': {
 706             'id': '1vOGwqejwoWxB',
 707             'ext': 'mp4',
 708             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 709             'uploader': 'Vivi',
 710             'uploader_id': '1eVjYOLGkGrQL',
 711             'thumbnail': r're:^https?://.*\.jpg',
 712             'tags': ['EduTECH2019'],
 713             'view_count': int,
 714         },
 715         'add_ie': ['TwitterBroadcast'],
 716     }, {
 717         # unified card
 718         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 719         'info_dict': {
 720             'id': '1349774757969989634',
 721             'display_id': '1349794411333394432',
 722             'ext': 'mp4',
 723             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 724             'thumbnail': r're:^https?://.*\.jpg',
 725             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 726             'uploader': 'Brooklyn Nets',
 727             'uploader_id': 'BrooklynNets',
 728             'duration': 324.484,
 729             'timestamp': 1610651040,
 730             'upload_date': '20210114',
 731             'uploader_url': 'https://twitter.com/BrooklynNets',
 732             'comment_count': int,
 733             'repost_count': int,
 734             'like_count': int,
 735             'tags': [],
 736             'age_limit': 0,
 737         },
 738         'params': {
 739             'skip_download': True,
 740         },
 741     }, {
 742         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 743         'info_dict': {
 744             'id': '1577855447914409984',
 745             'display_id': '1577855540407197696',
 746             'ext': 'mp4',
 747             'title': 'md5:9d198efb93557b8f8d5b78c480407214',
 748             'description': 'md5:b9c3699335447391d11753ab21c70a74',
 749             'upload_date': '20221006',
 750             'uploader': 'oshtru',
 751             'uploader_id': 'oshtru',
 752             'uploader_url': 'https://twitter.com/oshtru',
 753             'thumbnail': r're:^https?://.*\.jpg',
 754             'duration': 30.03,
 755             'timestamp': 1665025050,
 756             'comment_count': int,
 757             'repost_count': int,
 758             'like_count': int,
 759             'view_count': int,
 760             'tags': [],
 761             'age_limit': 0,
 762         },
 763         'params': {'skip_download': True},
 764     }, {
 765         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 766         'info_dict': {
 767             'id': '1577719286659006464',
 768             'title': 'Ultima | #\u0432\u029f\u043c - Test',
 769             'description': 'Test https://t.co/Y3KEZD7Dad',
 770             'uploader': 'Ultima | #\u0432\u029f\u043c',
 771             'uploader_id': 'UltimaShadowX',
 772             'uploader_url': 'https://twitter.com/UltimaShadowX',
 773             'upload_date': '20221005',
 774             'timestamp': 1664992565,
 775             'comment_count': int,
 776             'repost_count': int,
 777             'like_count': int,
 778             'tags': [],
 779             'age_limit': 0,
 780         },
 781         'playlist_count': 4,
 782         'params': {'skip_download': True},
 783     }, {
 784         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 785         'info_dict': {
 786             'id': '1575559336759263233',
 787             'display_id': '1575560063510810624',
 788             'ext': 'mp4',
 789             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 790             'thumbnail': r're:^https?://.*\.jpg',
 791             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 792             'uploader': 'Max Olson',
 793             'uploader_id': 'MesoMax919',
 794             'uploader_url': 'https://twitter.com/MesoMax919',
 795             'duration': 21.321,
 796             'timestamp': 1664477766,
 797             'upload_date': '20220929',
 798             'comment_count': int,
 799             'repost_count': int,
 800             'like_count': int,
 801             'view_count': int,
 802             'tags': ['HurricaneIan'],
 803             'age_limit': 0,
 804         },
 805     }, {
 806         # Adult content, fails if not logged in (GraphQL)
 807         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 808         'info_dict': {
 809             'id': '1575199163847000068',
 810             'display_id': '1575199173472927762',
 811             'ext': 'mp4',
 812             'title': str,
 813             'description': str,
 814             'uploader': str,
 815             'uploader_id': 'Rizdraws',
 816             'uploader_url': 'https://twitter.com/Rizdraws',
 817             'upload_date': '20220928',
 818             'timestamp': 1664391723,
 819             'thumbnail': r're:^https?://.+\.jpg',
 820             'like_count': int,
 821             'repost_count': int,
 822             'comment_count': int,
 823             'age_limit': 18,
 824             'tags': []
 825         },
 826         'skip': 'Requires authentication',
 827     }, {
 828         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 829         'playlist_mincount': 2,
 830         'info_dict': {
 831             'id': '1395079556562706435',
 832             'title': str,
 833             'tags': [],
 834             'uploader': str,
 835             'like_count': int,
 836             'upload_date': '20210519',
 837             'age_limit': 0,
 838             'repost_count': int,
 839             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
 840             'uploader_id': 'Srirachachau',
 841             'comment_count': int,
 842             'uploader_url': 'https://twitter.com/Srirachachau',
 843             'timestamp': 1621447860,
 844         },
 845     }, {
 846         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 847         'playlist_mincount': 2,
 848         'info_dict': {
 849             'id': '1578353380363501568',
 850             'title': str,
 851             'uploader_id': 'DavidToons_',
 852             'repost_count': int,
 853             'like_count': int,
 854             'uploader': str,
 855             'timestamp': 1665143744,
 856             'uploader_url': 'https://twitter.com/DavidToons_',
 857             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
 858             'tags': [],
 859             'comment_count': int,
 860             'upload_date': '20221007',
 861             'age_limit': 0,
 862         },
 863     }, {
 864         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 865         'playlist_count': 2,
 866         'info_dict': {
 867             'id': '1578401165338976258',
 868             'title': str,
 869             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 870             'uploader': str,
 871             'uploader_id': 'primevideouk',
 872             'timestamp': 1665155137,
 873             'upload_date': '20221007',
 874             'age_limit': 0,
 875             'uploader_url': 'https://twitter.com/primevideouk',
 876             'comment_count': int,
 877             'repost_count': int,
 878             'like_count': int,
 879             'tags': ['TheRingsOfPower'],
 880         },
 881     }, {
 882         # Twitter Spaces
 883         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 884         'info_dict': {
 885             'id': '1lPJqmBeeNAJb',
 886             'ext': 'm4a',
 887             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 888             'uploader': r're:Monique Camarra.+?',
 889             'uploader_id': 'MoniqueCamarra',
 890             'live_status': 'was_live',
 891             'release_timestamp': 1658417414,
 892             'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
 893             'timestamp': 1658407771,
 894             'release_date': '20220721',
 895             'upload_date': '20220721',
 896         },
 897         'add_ie': ['TwitterSpaces'],
 898         'params': {'skip_download': 'm3u8'},
 899     }, {
 900         # URL specifies video number but --yes-playlist
 901         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
 902         'playlist_mincount': 2,
 903         'info_dict': {
 904             'id': '1600649710662213632',
 905             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 906             'timestamp': 1670459604.0,
 907             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 908             'comment_count': int,
 909             'uploader_id': 'CTVJLaidlaw',
 910             'repost_count': int,
 911             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 912             'upload_date': '20221208',
 913             'age_limit': 0,
 914             'uploader': 'Jocelyn Laidlaw',
 915             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 916             'like_count': int,
 917         },
 918     }, {
 919         # URL specifies video number and --no-playlist
 920         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
 921         'info_dict': {
 922             'id': '1600649511827013632',
 923             'ext': 'mp4',
 924             'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
 925             'thumbnail': r're:^https?://.+\.jpg',
 926             'timestamp': 1670459604.0,
 927             'uploader_id': 'CTVJLaidlaw',
 928             'uploader': 'Jocelyn Laidlaw',
 929             'repost_count': int,
 930             'comment_count': int,
 931             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 932             'duration': 102.226,
 933             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 934             'display_id': '1600649710662213632',
 935             'like_count': int,
 936             'view_count': int,
 937             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 938             'upload_date': '20221208',
 939             'age_limit': 0,
 940         },
 941         'params': {'noplaylist': True},
 942     }, {
 943         # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
 944         # note the id different between extraction and url
 945         'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
 946         'info_dict': {
 947             'id': '1621117577354424321',
 948             'display_id': '1621117700482416640',
 949             'ext': 'mp4',
 950             'title': '뽀 - 아 최우제 이동속도 봐',
 951             'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
 952             'duration': 24.598,
 953             'uploader': '뽀',
 954             'uploader_id': 's2FAKER',
 955             'uploader_url': 'https://twitter.com/s2FAKER',
 956             'upload_date': '20230202',
 957             'timestamp': 1675339553.0,
 958             'thumbnail': r're:https?://pbs\.twimg\.com/.+',
 959             'age_limit': 18,
 960             'tags': [],
 961             'like_count': int,
 962             'repost_count': int,
 963             'comment_count': int,
 964             'view_count': int,
 965         },
 966     }, {
 967         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
 968         'info_dict': {
 969             'id': '1599108643743473680',
 970             'display_id': '1599108751385972737',
 971             'ext': 'mp4',
 972             'title': '\u06ea - \U0001F48B',
 973             'uploader_url': 'https://twitter.com/hlo_again',
 974             'like_count': int,
 975             'uploader_id': 'hlo_again',
 976             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
 977             'repost_count': int,
 978             'duration': 9.531,
 979             'comment_count': int,
 980             'view_count': int,
 981             'upload_date': '20221203',
 982             'age_limit': 0,
 983             'timestamp': 1670092210.0,
 984             'tags': [],
 985             'uploader': '\u06ea',
 986             'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
 987         },
 988         'params': {'noplaylist': True},
 989     }, {
 990         'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
 991         'info_dict': {
 992             'id': '1600009362759733248',
 993             'display_id': '1600009574919962625',
 994             'ext': 'mp4',
 995             'uploader_url': 'https://twitter.com/MunTheShinobi',
 996             'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
 997             'view_count': int,
 998             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
 999             'age_limit': 0,
1000             'uploader': 'Mün The Shinobi',
1001             'repost_count': int,
1002             'upload_date': '20221206',
1003             'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1004             'comment_count': int,
1005             'like_count': int,
1006             'tags': [],
1007             'uploader_id': 'MunTheShinobi',
1008             'duration': 139.987,
1009             'timestamp': 1670306984.0,
1010         },
1011     }, {
1012         # url to retweet id, legacy API
1013         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1014         'info_dict': {
1015             'id': '1623274794488659969',
1016             'display_id': '1623739803874349067',
1017             'ext': 'mp4',
1018             'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
1019             'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
1020             'uploader': 'Johnny Bullets',
1021             'uploader_id': 'Johnnybull3ts',
1022             'uploader_url': 'https://twitter.com/Johnnybull3ts',
1023             'age_limit': 0,
1024             'tags': [],
1025             'duration': 8.033,
1026             'timestamp': 1675853859.0,
1027             'upload_date': '20230208',
1028             'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1029             'like_count': int,
1030             'repost_count': int,
1031             'comment_count': int,
1032         },
1033         'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
1034     }, {
1035         # onion route
1036         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1037         'only_matching': True,
1038     }, {
1039         # Twitch Clip Embed
1040         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1041         'only_matching': True,
1042     }, {
1043         # promo_video_website card
1044         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1045         'only_matching': True,
1046     }, {
1047         # promo_video_convo card
1048         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1049         'only_matching': True,
1050     }, {
1051         # appplayer card
1052         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1053         'only_matching': True,
1054     }, {
1055         # video_direct_message card
1056         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1057         'only_matching': True,
1058     }, {
1059         # poll2choice_video card
1060         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1061         'only_matching': True,
1062     }, {
1063         # poll3choice_video card
1064         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1065         'only_matching': True,
1066     }, {
1067         # poll4choice_video card
1068         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1069         'only_matching': True,
1070     }]
1071
1072     def _graphql_to_legacy(self, data, twid):
1073         result = traverse_obj(data, (
1074             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1075             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1076             'tweet_results', 'result', ('tweet', None),
1077         ), expected_type=dict, default={}, get_all=False)
1078
1079         if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
1080             self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
1081
1082         if 'tombstone' in result:
1083             cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1084             if cause and 'adult content' in cause:
1085                 self.raise_login_required(cause)
1086             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1087
1088         status = result.get('legacy', {})
1089         status.update(traverse_obj(result, {
1090             'user': ('core', 'user_results', 'result', 'legacy'),
1091             'card': ('card', 'legacy'),
1092             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1093         }, expected_type=dict, default={}))
1094
1095         # extra transformation is needed since result does not match legacy format
1096         binding_values = {
1097             binding_value.get('key'): binding_value.get('value')
1098             for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1099         }
1100         if binding_values:
1101             status['card']['binding_values'] = binding_values
1102
1103         return status
1104
1105     def _build_graphql_query(self, media_id):
1106         return {
1107             'variables': {
1108                 'focalTweetId': media_id,
1109                 'includePromotedContent': True,
1110                 'with_rux_injections': False,
1111                 'withBirdwatchNotes': True,
1112                 'withCommunity': True,
1113                 'withDownvotePerspective': False,
1114                 'withQuickPromoteEligibilityTweetFields': True,
1115                 'withReactionsMetadata': False,
1116                 'withReactionsPerspective': False,
1117                 'withSuperFollowsTweetFields': True,
1118                 'withSuperFollowsUserFields': True,
1119                 'withV2Timeline': True,
1120                 'withVoice': True,
1121             },
1122             'features': {
1123                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1124                 'interactive_text_enabled': True,
1125                 'responsive_web_edit_tweet_api_enabled': True,
1126                 'responsive_web_enhance_cards_enabled': True,
1127                 'responsive_web_graphql_timeline_navigation_enabled': False,
1128                 'responsive_web_text_conversations_enabled': False,
1129                 'responsive_web_uc_gql_enabled': True,
1130                 'standardized_nudges_misinfo': True,
1131                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1132                 'tweetypie_unmention_optimization_enabled': True,
1133                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1134                 'verified_phone_label_enabled': False,
1135                 'vibe_api_enabled': True,
1136             },
1137         }
1138
1139     def _real_extract(self, url):
1140         twid, selected_index = self._match_valid_url(url).group('id', 'index')
1141         if self._configuration_arg('legacy_api') and not self.is_logged_in:
1142             status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
1143                 'cards_platform': 'Web-12',
1144                 'include_cards': 1,
1145                 'include_reply_count': 1,
1146                 'include_user_entities': 0,
1147                 'tweet_mode': 'extended',
1148             }), 'retweeted_status', None)
1149         else:
1150             result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
1151             status = self._graphql_to_legacy(result, twid)
1152
1153         title = description = status['full_text'].replace('\n', ' ')
1154         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
1155         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1156         user = status.get('user') or {}
1157         uploader = user.get('name')
1158         if uploader:
1159             title = f'{uploader} - {title}'
1160         uploader_id = user.get('screen_name')
1161
1162         info = {
1163             'id': twid,
1164             'title': title,
1165             'description': description,
1166             'uploader': uploader,
1167             'timestamp': unified_timestamp(status.get('created_at')),
1168             'uploader_id': uploader_id,
1169             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1170             'like_count': int_or_none(status.get('favorite_count')),
1171             'repost_count': int_or_none(status.get('retweet_count')),
1172             'comment_count': int_or_none(status.get('reply_count')),
1173             'age_limit': 18 if status.get('possibly_sensitive') else 0,
1174             'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1175         }
1176
1177         def extract_from_video_info(media):
1178             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1179             self.write_debug(f'Extracting from video info: {media_id}')
1180             video_info = media.get('video_info') or {}
1181
1182             formats = []
1183             subtitles = {}
1184             for variant in video_info.get('variants', []):
1185                 fmts, subs = self._extract_variant_formats(variant, twid)
1186                 subtitles = self._merge_subtitles(subtitles, subs)
1187                 formats.extend(fmts)
1188
1189             thumbnails = []
1190             media_url = media.get('media_url_https') or media.get('media_url')
1191             if media_url:
1192                 def add_thumbnail(name, size):
1193                     thumbnails.append({
1194                         'id': name,
1195                         'url': update_url_query(media_url, {'name': name}),
1196                         'width': int_or_none(size.get('w') or size.get('width')),
1197                         'height': int_or_none(size.get('h') or size.get('height')),
1198                     })
1199                 for name, size in media.get('sizes', {}).items():
1200                     add_thumbnail(name, size)
1201                 add_thumbnail('orig', media.get('original_info') or {})
1202
1203             return {
1204                 'id': media_id,
1205                 'formats': formats,
1206                 'subtitles': subtitles,
1207                 'thumbnails': thumbnails,
1208                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1209                 'duration': float_or_none(video_info.get('duration_millis'), 1000),
1210                 # The codec of http formats are unknown
1211                 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1212             }
1213
1214         def extract_from_card_info(card):
1215             if not card:
1216                 return
1217
1218             self.write_debug(f'Extracting from card info: {card.get("url")}')
1219             binding_values = card['binding_values']
1220
1221             def get_binding_value(k):
1222                 o = binding_values.get(k) or {}
1223                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1224
1225             card_name = card['name'].split(':')[-1]
1226             if card_name == 'player':
1227                 yield {
1228                     '_type': 'url',
1229                     'url': get_binding_value('player_url'),
1230                 }
1231             elif card_name == 'periscope_broadcast':
1232                 yield {
1233                     '_type': 'url',
1234                     'url': get_binding_value('url') or get_binding_value('player_url'),
1235                     'ie_key': PeriscopeIE.ie_key(),
1236                 }
1237             elif card_name == 'broadcast':
1238                 yield {
1239                     '_type': 'url',
1240                     'url': get_binding_value('broadcast_url'),
1241                     'ie_key': TwitterBroadcastIE.ie_key(),
1242                 }
1243             elif card_name == 'audiospace':
1244                 yield {
1245                     '_type': 'url',
1246                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1247                     'ie_key': TwitterSpacesIE.ie_key(),
1248                 }
1249             elif card_name == 'summary':
1250                 yield {
1251                     '_type': 'url',
1252                     'url': get_binding_value('card_url'),
1253                 }
1254             elif card_name == 'unified_card':
1255                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1256                 yield from map(extract_from_video_info, traverse_obj(
1257                     unified_card, ('media_entities', ...), expected_type=dict))
1258             # amplify, promo_video_website, promo_video_convo, appplayer,
1259             # video_direct_message, poll2choice_video, poll3choice_video,
1260             # poll4choice_video, ...
1261             else:
1262                 is_amplify = card_name == 'amplify'
1263                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1264                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1265                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1266
1267                 thumbnails = []
1268                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1269                     image = get_binding_value('player_image' + suffix) or {}
1270                     image_url = image.get('url')
1271                     if not image_url or '/player-placeholder' in image_url:
1272                         continue
1273                     thumbnails.append({
1274                         'id': suffix[1:] if suffix else 'medium',
1275                         'url': image_url,
1276                         'width': int_or_none(image.get('width')),
1277                         'height': int_or_none(image.get('height')),
1278                     })
1279
1280                 yield {
1281                     'formats': formats,
1282                     'subtitles': subtitles,
1283                     'thumbnails': thumbnails,
1284                     'duration': int_or_none(get_binding_value(
1285                         'content_duration_seconds')),
1286                 }
1287
1288         videos = traverse_obj(status, (
1289             (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1290
1291         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1292             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1293         else:
1294             desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
1295             if not desired_obj:
1296                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1297             elif desired_obj.get('type') != 'video':
1298                 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1299
1300             # Restore original archive id and video index in title
1301             for index, entry in enumerate(videos, 1):
1302                 if entry.get('id') != desired_obj.get('id'):
1303                     continue
1304                 if index == 1:
1305                     info['_old_archive_ids'] = [make_archive_id(self, twid)]
1306                 if len(videos) != 1:
1307                     info['title'] += f' #{index}'
1308                 break
1309
1310             return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1311
1312         entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1313         if not entries:
1314             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1315             if not expanded_url or expanded_url == url:
1316                 self.raise_no_formats('No video could be found in this tweet', expected=True)
1317                 return info
1318
1319             return self.url_result(expanded_url, display_id=twid, **info)
1320
1321         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1322
1323         if len(entries) == 1:
1324             return entries[0]
1325
1326         for index, entry in enumerate(entries, 1):
1327             entry['title'] += f' #{index}'
1328
1329         return self.playlist_result(entries, **info)
1330
1331
1332 class TwitterAmplifyIE(TwitterBaseIE):
1333     IE_NAME = 'twitter:amplify'
1334     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1335
1336     _TEST = {
1337         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1338         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1339         'info_dict': {
1340             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1341             'ext': 'mp4',
1342             'title': 'Twitter Video',
1343             'thumbnail': 're:^https?://.*',
1344         },
1345         'params': {'format': '[protocol=https]'},
1346     }
1347
1348     def _real_extract(self, url):
1349         video_id = self._match_id(url)
1350         webpage = self._download_webpage(url, video_id)
1351
1352         vmap_url = self._html_search_meta(
1353             'twitter:amplify:vmap', webpage, 'vmap url')
1354         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1355
1356         thumbnails = []
1357         thumbnail = self._html_search_meta(
1358             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1359
1360         def _find_dimension(target):
1361             w = int_or_none(self._html_search_meta(
1362                 'twitter:%s:width' % target, webpage, fatal=False))
1363             h = int_or_none(self._html_search_meta(
1364                 'twitter:%s:height' % target, webpage, fatal=False))
1365             return w, h
1366
1367         if thumbnail:
1368             thumbnail_w, thumbnail_h = _find_dimension('image')
1369             thumbnails.append({
1370                 'url': thumbnail,
1371                 'width': thumbnail_w,
1372                 'height': thumbnail_h,
1373             })
1374
1375         video_w, video_h = _find_dimension('player')
1376         formats[0].update({
1377             'width': video_w,
1378             'height': video_h,
1379         })
1380
1381         return {
1382             'id': video_id,
1383             'title': 'Twitter Video',
1384             'formats': formats,
1385             'thumbnails': thumbnails,
1386         }
1387
1388
1389 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1390     IE_NAME = 'twitter:broadcast'
1391     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1392
1393     _TEST = {
1394         # untitled Periscope video
1395         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1396         'info_dict': {
1397             'id': '1yNGaQLWpejGj',
1398             'ext': 'mp4',
1399             'title': 'Andrea May Sahouri - Periscope Broadcast',
1400             'uploader': 'Andrea May Sahouri',
1401             'uploader_id': '1PXEdBZWpGwKe',
1402             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1403             'view_count': int,
1404         },
1405     }
1406
1407     def _real_extract(self, url):
1408         broadcast_id = self._match_id(url)
1409         broadcast = self._call_api(
1410             'broadcasts/show.json', broadcast_id,
1411             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1412         info = self._parse_broadcast_data(broadcast, broadcast_id)
1413         media_key = broadcast['media_key']
1414         source = self._call_api(
1415             f'live_video_stream/status/{media_key}', media_key)['source']
1416         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1417         if '/live_video_stream/geoblocked/' in m3u8_url:
1418             self.raise_geo_restricted()
1419         m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1420             m3u8_url).query).get('type', [None])[0]
1421         state, width, height = self._extract_common_format_info(broadcast)
1422         info['formats'] = self._extract_pscp_m3u8_formats(
1423             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1424         return info
1425
1426
1427 class TwitterSpacesIE(TwitterBaseIE):
1428     IE_NAME = 'twitter:spaces'
1429     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1430
1431     _TESTS = [{
1432         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1433         'info_dict': {
1434             'id': '1RDxlgyvNXzJL',
1435             'ext': 'm4a',
1436             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1437             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1438             'uploader': r're:Lucio Di Gaetano.*?',
1439             'uploader_id': 'luciodigaetano',
1440             'live_status': 'was_live',
1441             'timestamp': 1659877956,
1442             'upload_date': '20220807',
1443             'release_timestamp': 1659904215,
1444             'release_date': '20220807',
1445         },
1446         'params': {'skip_download': 'm3u8'},
1447     }]
1448
1449     SPACE_STATUS = {
1450         'notstarted': 'is_upcoming',
1451         'ended': 'was_live',
1452         'running': 'is_live',
1453         'timedout': 'post_live',
1454     }
1455
1456     def _build_graphql_query(self, space_id):
1457         return {
1458             'variables': {
1459                 'id': space_id,
1460                 'isMetatagsQuery': True,
1461                 'withDownvotePerspective': False,
1462                 'withReactionsMetadata': False,
1463                 'withReactionsPerspective': False,
1464                 'withReplays': True,
1465                 'withSuperFollowsUserFields': True,
1466                 'withSuperFollowsTweetFields': True,
1467             },
1468             'features': {
1469                 'dont_mention_me_view_api_enabled': True,
1470                 'interactive_text_enabled': True,
1471                 'responsive_web_edit_tweet_api_enabled': True,
1472                 'responsive_web_enhance_cards_enabled': True,
1473                 'responsive_web_uc_gql_enabled': True,
1474                 'spaces_2022_h2_clipping': True,
1475                 'spaces_2022_h2_spaces_communities': False,
1476                 'standardized_nudges_misinfo': True,
1477                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1478                 'vibe_api_enabled': True,
1479             },
1480         }
1481
1482     def _real_extract(self, url):
1483         space_id = self._match_id(url)
1484         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1485         if not space_data:
1486             raise ExtractorError('Twitter Space not found', expected=True)
1487
1488         metadata = space_data['metadata']
1489         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1490         is_live = live_status == 'is_live'
1491
1492         formats = []
1493         if live_status == 'is_upcoming':
1494             self.raise_no_formats('Twitter Space not started yet', expected=True)
1495         elif not is_live and not metadata.get('is_space_available_for_replay'):
1496             self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1497         elif metadata.get('media_key'):
1498             source = traverse_obj(
1499                 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1500                 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1501             formats = self._extract_m3u8_formats(
1502                 source, metadata['media_key'], 'm4a', live=is_live, fatal=False,
1503                 headers={'Referer': 'https://twitter.com/'}) if source else []
1504             for fmt in formats:
1505                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1506                 if not is_live:
1507                     fmt['container'] = 'm4a_dash'
1508
1509         participants = ', '.join(traverse_obj(
1510             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1511
1512         if not formats and live_status == 'post_live':
1513             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1514
1515         return {
1516             'id': space_id,
1517             'title': metadata.get('title'),
1518             'description': f'Twitter Space participated by {participants}',
1519             'uploader': traverse_obj(
1520                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1521             'uploader_id': traverse_obj(
1522                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1523             'live_status': live_status,
1524             'release_timestamp': try_call(
1525                 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1526             'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1527             'formats': formats,
1528         }
1529
1530
1531 class TwitterShortenerIE(TwitterBaseIE):
1532     IE_NAME = 'twitter:shortener'
1533     _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1534     _BASE_URL = 'https://t.co/'
1535
1536     def _real_extract(self, url):
1537         mobj = self._match_valid_url(url)
1538         eid, id = mobj.group('eid', 'id')
1539         if eid:
1540             id = eid
1541             url = self._BASE_URL + id
1542         new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1543         __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1544         if new_url.startswith(__UNSAFE_LINK):
1545             new_url = new_url.replace(__UNSAFE_LINK, "")
1546         return self.url_result(new_url)