yt_dlp/extractor/twitter.py

   1 import json
   2 import re
   3
   4 from .common import InfoExtractor
   5 from .periscope import PeriscopeBaseIE, PeriscopeIE
   6 from ..compat import (
   7     compat_parse_qs,
   8     compat_urllib_parse_unquote,
   9     compat_urllib_parse_urlparse,
  10 )
  11 from ..utils import (
  12     ExtractorError,
  13     dict_get,
  14     float_or_none,
  15     format_field,
  16     int_or_none,
  17     make_archive_id,
  18     remove_end,
  19     str_or_none,
  20     strip_or_none,
  21     traverse_obj,
  22     try_call,
  23     try_get,
  24     unified_timestamp,
  25     update_url_query,
  26     url_or_none,
  27     xpath_text,
  28 )
  29
  30
  31 class TwitterBaseIE(InfoExtractor):
  32     _NETRC_MACHINE = 'twitter'
  33     _API_BASE = 'https://api.twitter.com/1.1/'
  34     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
  35     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  36     _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
  37     _guest_token = None
  38     _flow_token = None
  39
  40     _LOGIN_INIT_DATA = json.dumps({
  41         'input_flow_data': {
  42             'flow_context': {
  43                 'debug_overrides': {},
  44                 'start_location': {
  45                     'location': 'unknown'
  46                 }
  47             }
  48         },
  49         'subtask_versions': {
  50             'action_list': 2,
  51             'alert_dialog': 1,
  52             'app_download_cta': 1,
  53             'check_logged_in_account': 1,
  54             'choice_selection': 3,
  55             'contacts_live_sync_permission_prompt': 0,
  56             'cta': 7,
  57             'email_verification': 2,
  58             'end_flow': 1,
  59             'enter_date': 1,
  60             'enter_email': 2,
  61             'enter_password': 5,
  62             'enter_phone': 2,
  63             'enter_recaptcha': 1,
  64             'enter_text': 5,
  65             'enter_username': 2,
  66             'generic_urt': 3,
  67             'in_app_notification': 1,
  68             'interest_picker': 3,
  69             'js_instrumentation': 1,
  70             'menu_dialog': 1,
  71             'notifications_permission_prompt': 2,
  72             'open_account': 2,
  73             'open_home_timeline': 1,
  74             'open_link': 1,
  75             'phone_verification': 4,
  76             'privacy_options': 1,
  77             'security_key': 3,
  78             'select_avatar': 4,
  79             'select_banner': 2,
  80             'settings_list': 7,
  81             'show_code': 1,
  82             'sign_up': 2,
  83             'sign_up_review': 4,
  84             'tweet_selection_urt': 1,
  85             'update_users': 1,
  86             'upload_media': 1,
  87             'user_recommendations_list': 4,
  88             'user_recommendations_urt': 1,
  89             'wait_spinner': 3,
  90             'web_modal': 1
  91         }
  92     }, separators=(',', ':')).encode()
  93
  94     def _extract_variant_formats(self, variant, video_id):
  95         variant_url = variant.get('url')
  96         if not variant_url:
  97             return [], {}
  98         elif '.m3u8' in variant_url:
  99             return self._extract_m3u8_formats_and_subtitles(
 100                 variant_url, video_id, 'mp4', 'm3u8_native',
 101                 m3u8_id='hls', fatal=False)
 102         else:
 103             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
 104             f = {
 105                 'url': variant_url,
 106                 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
 107                 'tbr': tbr,
 108             }
 109             self._search_dimensions_in_video_url(f, variant_url)
 110             return [f], {}
 111
 112     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
 113         vmap_url = url_or_none(vmap_url)
 114         if not vmap_url:
 115             return [], {}
 116         vmap_data = self._download_xml(vmap_url, video_id)
 117         formats = []
 118         subtitles = {}
 119         urls = []
 120         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
 121             video_variant.attrib['url'] = compat_urllib_parse_unquote(
 122                 video_variant.attrib['url'])
 123             urls.append(video_variant.attrib['url'])
 124             fmts, subs = self._extract_variant_formats(
 125                 video_variant.attrib, video_id)
 126             formats.extend(fmts)
 127             subtitles = self._merge_subtitles(subtitles, subs)
 128         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
 129         if video_url not in urls:
 130             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
 131             formats.extend(fmts)
 132             subtitles = self._merge_subtitles(subtitles, subs)
 133         return formats, subtitles
 134
 135     @staticmethod
 136     def _search_dimensions_in_video_url(a_format, video_url):
 137         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
 138         if m:
 139             a_format.update({
 140                 'width': int(m.group('width')),
 141                 'height': int(m.group('height')),
 142             })
 143
 144     @property
 145     def is_logged_in(self):
 146         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
 147
 148     def _fetch_guest_token(self, headers, display_id):
 149         headers.pop('x-guest-token', None)
 150         self._guest_token = traverse_obj(self._download_json(
 151             f'{self._API_BASE}guest/activate.json', display_id,
 152             'Downloading guest token', data=b'', headers=headers), 'guest_token')
 153         if not self._guest_token:
 154             raise ExtractorError('Could not retrieve guest token')
 155
 156     def _set_base_headers(self):
 157         headers = self._AUTH.copy()
 158         csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
 159         if csrf_token:
 160             headers['x-csrf-token'] = csrf_token
 161         return headers
 162
 163     def _call_login_api(self, note, headers, query={}, data=None):
 164         response = self._download_json(
 165             f'{self._API_BASE}onboarding/task.json', None, note,
 166             headers=headers, query=query, data=data, expected_status=400)
 167         error = traverse_obj(response, ('errors', 0, 'message', {str}))
 168         if error:
 169             raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
 170         elif traverse_obj(response, 'status') != 'success':
 171             raise ExtractorError('Login was unsuccessful')
 172
 173         subtask = traverse_obj(
 174             response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
 175         if not subtask:
 176             raise ExtractorError('Twitter API did not return next login subtask')
 177
 178         self._flow_token = response['flow_token']
 179
 180         return subtask
 181
 182     def _perform_login(self, username, password):
 183         if self.is_logged_in:
 184             return
 185
 186         self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
 187         headers = self._set_base_headers()
 188         self._fetch_guest_token(headers, None)
 189         headers.update({
 190             'content-type': 'application/json',
 191             'x-guest-token': self._guest_token,
 192             'x-twitter-client-language': 'en',
 193             'x-twitter-active-user': 'yes',
 194             'Referer': 'https://twitter.com/',
 195             'Origin': 'https://twitter.com',
 196         })
 197
 198         def build_login_json(*subtask_inputs):
 199             return json.dumps({
 200                 'flow_token': self._flow_token,
 201                 'subtask_inputs': subtask_inputs
 202             }, separators=(',', ':')).encode()
 203
 204         def input_dict(subtask_id, text):
 205             return {
 206                 'subtask_id': subtask_id,
 207                 'enter_text': {
 208                     'text': text,
 209                     'link': 'next_link'
 210                 }
 211             }
 212
 213         next_subtask = self._call_login_api(
 214             'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
 215
 216         while not self.is_logged_in:
 217             if next_subtask == 'LoginJsInstrumentationSubtask':
 218                 next_subtask = self._call_login_api(
 219                     'Submitting JS instrumentation response', headers, data=build_login_json({
 220                         'subtask_id': next_subtask,
 221                         'js_instrumentation': {
 222                             'response': '{}',
 223                             'link': 'next_link'
 224                         }
 225                     }))
 226
 227             elif next_subtask == 'LoginEnterUserIdentifierSSO':
 228                 next_subtask = self._call_login_api(
 229                     'Submitting username', headers, data=build_login_json({
 230                         'subtask_id': next_subtask,
 231                         'settings_list': {
 232                             'setting_responses': [{
 233                                 'key': 'user_identifier',
 234                                 'response_data': {
 235                                     'text_data': {
 236                                         'result': username
 237                                     }
 238                                 }
 239                             }],
 240                             'link': 'next_link'
 241                         }
 242                     }))
 243
 244             elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
 245                 next_subtask = self._call_login_api(
 246                     'Submitting alternate identifier', headers,
 247                     data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
 248                         'one of username, phone number or email that was not used as --username'))))
 249
 250             elif next_subtask == 'LoginEnterPassword':
 251                 next_subtask = self._call_login_api(
 252                     'Submitting password', headers, data=build_login_json({
 253                         'subtask_id': next_subtask,
 254                         'enter_password': {
 255                             'password': password,
 256                             'link': 'next_link'
 257                         }
 258                     }))
 259
 260             elif next_subtask == 'AccountDuplicationCheck':
 261                 next_subtask = self._call_login_api(
 262                     'Submitting account duplication check', headers, data=build_login_json({
 263                         'subtask_id': next_subtask,
 264                         'check_logged_in_account': {
 265                             'link': 'AccountDuplicationCheck_false'
 266                         }
 267                     }))
 268
 269             elif next_subtask == 'LoginTwoFactorAuthChallenge':
 270                 next_subtask = self._call_login_api(
 271                     'Submitting 2FA token', headers, data=build_login_json(input_dict(
 272                         next_subtask, self._get_tfa_info('two-factor authentication token'))))
 273
 274             elif next_subtask == 'LoginAcid':
 275                 next_subtask = self._call_login_api(
 276                     'Submitting confirmation code', headers, data=build_login_json(input_dict(
 277                         next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
 278
 279             elif next_subtask == 'LoginSuccessSubtask':
 280                 raise ExtractorError('Twitter API did not grant auth token cookie')
 281
 282             else:
 283                 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
 284
 285         self.report_login()
 286
 287     def _call_api(self, path, video_id, query={}, graphql=False):
 288         headers = self._set_base_headers()
 289         if self.is_logged_in:
 290             headers.update({
 291                 'x-twitter-auth-type': 'OAuth2Session',
 292                 'x-twitter-client-language': 'en',
 293                 'x-twitter-active-user': 'yes',
 294             })
 295
 296         for first_attempt in (True, False):
 297             if not self.is_logged_in:
 298                 if not self._guest_token:
 299                     self._fetch_guest_token(headers, video_id)
 300                 headers['x-guest-token'] = self._guest_token
 301
 302             allowed_status = {400, 401, 403, 404} if graphql else {403}
 303             result = self._download_json(
 304                 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
 305                 video_id, headers=headers, query=query, expected_status=allowed_status,
 306                 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
 307
 308             if result.get('errors'):
 309                 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
 310                 if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
 311                     self.to_screen('Guest token has expired. Refreshing guest token')
 312                     self._guest_token = None
 313                     continue
 314
 315                 raise ExtractorError(
 316                     f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
 317
 318             return result
 319
 320     def _build_graphql_query(self, media_id):
 321         raise NotImplementedError('Method must be implemented to support GraphQL')
 322
 323     def _call_graphql_api(self, endpoint, media_id):
 324         data = self._build_graphql_query(media_id)
 325         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 326         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 327
 328
 329 class TwitterCardIE(InfoExtractor):
 330     IE_NAME = 'twitter:card'
 331     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 332     _TESTS = [
 333         {
 334             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 335             # MD5 checksums are different in different places
 336             'info_dict': {
 337                 'id': '560070131976392705',
 338                 'ext': 'mp4',
 339                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 340                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 341                 'uploader': 'Twitter',
 342                 'uploader_id': 'Twitter',
 343                 'thumbnail': r're:^https?://.*\.jpg',
 344                 'duration': 30.033,
 345                 'timestamp': 1422366112,
 346                 'upload_date': '20150127',
 347                 'age_limit': 0,
 348                 'comment_count': int,
 349                 'tags': [],
 350                 'repost_count': int,
 351                 'like_count': int,
 352                 'display_id': '560070183650213889',
 353                 'uploader_url': 'https://twitter.com/Twitter',
 354             },
 355         },
 356         {
 357             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 358             'md5': '7137eca597f72b9abbe61e5ae0161399',
 359             'info_dict': {
 360                 'id': '623160978427936768',
 361                 'ext': 'mp4',
 362                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 363                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 364                 'uploader': 'NASA',
 365                 'uploader_id': 'NASA',
 366                 'timestamp': 1437408129,
 367                 'upload_date': '20150720',
 368                 'uploader_url': 'https://twitter.com/NASA',
 369                 'age_limit': 0,
 370                 'comment_count': int,
 371                 'like_count': int,
 372                 'repost_count': int,
 373                 'tags': ['PlutoFlyby'],
 374             },
 375             'params': {'format': '[protocol=https]'}
 376         },
 377         {
 378             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 379             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 380             'info_dict': {
 381                 'id': 'dq4Oj5quskI',
 382                 'ext': 'mp4',
 383                 'title': 'Ubuntu 11.10 Overview',
 384                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 385                 'upload_date': '20111013',
 386                 'uploader': 'OMG! UBUNTU!',
 387                 'uploader_id': 'omgubuntu',
 388                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 389                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 390                 'channel_follower_count': int,
 391                 'chapters': 'count:8',
 392                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 393                 'duration': 138,
 394                 'categories': ['Film & Animation'],
 395                 'age_limit': 0,
 396                 'comment_count': int,
 397                 'availability': 'public',
 398                 'like_count': int,
 399                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 400                 'view_count': int,
 401                 'tags': 'count:12',
 402                 'channel': 'OMG! UBUNTU!',
 403                 'playable_in_embed': True,
 404             },
 405             'add_ie': ['Youtube'],
 406         },
 407         {
 408             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 409             'info_dict': {
 410                 'id': 'iBb2x00UVlv',
 411                 'ext': 'mp4',
 412                 'upload_date': '20151113',
 413                 'uploader_id': '1189339351084113920',
 414                 'uploader': 'ArsenalTerje',
 415                 'title': 'Vine by ArsenalTerje',
 416                 'timestamp': 1447451307,
 417                 'alt_title': 'Vine by ArsenalTerje',
 418                 'comment_count': int,
 419                 'like_count': int,
 420                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 421                 'view_count': int,
 422                 'repost_count': int,
 423             },
 424             'add_ie': ['Vine'],
 425             'params': {'skip_download': 'm3u8'},
 426         },
 427         {
 428             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 429             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 430             'info_dict': {
 431                 'id': '705235433198714880',
 432                 'ext': 'mp4',
 433                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 434                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 435                 'uploader': 'Brent Yarina',
 436                 'uploader_id': 'BTNBrentYarina',
 437                 'timestamp': 1456976204,
 438                 'upload_date': '20160303',
 439             },
 440             'skip': 'This content is no longer available.',
 441         },
 442         {
 443             'url': 'https://twitter.com/i/videos/752274308186120192',
 444             'only_matching': True,
 445         },
 446     ]
 447
 448     def _real_extract(self, url):
 449         status_id = self._match_id(url)
 450         return self.url_result(
 451             'https://twitter.com/statuses/' + status_id,
 452             TwitterIE.ie_key(), status_id)
 453
 454
 455 class TwitterIE(TwitterBaseIE):
 456     IE_NAME = 'twitter'
 457     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 458
 459     _TESTS = [{
 460         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 461         'info_dict': {
 462             'id': '643211870443208704',
 463             'display_id': '643211948184596480',
 464             'ext': 'mp4',
 465             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 466             'thumbnail': r're:^https?://.*\.jpg',
 467             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 468             'uploader': 'FREE THE NIPPLE',
 469             'uploader_id': 'freethenipple',
 470             'duration': 12.922,
 471             'timestamp': 1442188653,
 472             'upload_date': '20150913',
 473             'uploader_url': 'https://twitter.com/freethenipple',
 474             'comment_count': int,
 475             'repost_count': int,
 476             'like_count': int,
 477             'view_count': int,
 478             'tags': [],
 479             'age_limit': 18,
 480         },
 481     }, {
 482         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 483         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 484         'info_dict': {
 485             'id': '657991469417025536',
 486             'ext': 'mp4',
 487             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 488             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 489             'thumbnail': r're:^https?://.*\.png',
 490             'uploader': 'Gifs',
 491             'uploader_id': 'giphz',
 492         },
 493         'expected_warnings': ['height', 'width'],
 494         'skip': 'Account suspended',
 495     }, {
 496         'url': 'https://twitter.com/starwars/status/665052190608723968',
 497         'info_dict': {
 498             'id': '665052190608723968',
 499             'display_id': '665052190608723968',
 500             'ext': 'mp4',
 501             'title': r're:Star Wars.*A new beginning is coming December 18.*',
 502             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 503             'uploader_id': 'starwars',
 504             'uploader': r're:Star Wars.*',
 505             'timestamp': 1447395772,
 506             'upload_date': '20151113',
 507             'uploader_url': 'https://twitter.com/starwars',
 508             'comment_count': int,
 509             'repost_count': int,
 510             'like_count': int,
 511             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 512             'age_limit': 0,
 513         },
 514     }, {
 515         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 516         'info_dict': {
 517             'id': '705235433198714880',
 518             'ext': 'mp4',
 519             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 520             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 521             'uploader_id': 'BTNBrentYarina',
 522             'uploader': 'Brent Yarina',
 523             'timestamp': 1456976204,
 524             'upload_date': '20160303',
 525             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 526             'comment_count': int,
 527             'repost_count': int,
 528             'like_count': int,
 529             'tags': [],
 530             'age_limit': 0,
 531         },
 532         'params': {
 533             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 534             # Test case of TwitterCardIE
 535             'skip_download': True,
 536         },
 537         'skip': 'Dead external link',
 538     }, {
 539         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 540         'info_dict': {
 541             'id': '700207414000242688',
 542             'display_id': '700207533655363584',
 543             'ext': 'mp4',
 544             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 545             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 546             'thumbnail': r're:^https?://.*\.jpg',
 547             'uploader': 'jaydin donte geer',
 548             'uploader_id': 'jaydingeer',
 549             'duration': 30.0,
 550             'timestamp': 1455777459,
 551             'upload_date': '20160218',
 552             'uploader_url': 'https://twitter.com/jaydingeer',
 553             'comment_count': int,
 554             'repost_count': int,
 555             'like_count': int,
 556             'view_count': int,
 557             'tags': ['Damndaniel'],
 558             'age_limit': 0,
 559         },
 560     }, {
 561         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 562         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 563         'info_dict': {
 564             'id': 'MIOxnrUteUd',
 565             'ext': 'mp4',
 566             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 567             'uploader': 'TAKUMA',
 568             'uploader_id': '1004126642786242560',
 569             'timestamp': 1402826626,
 570             'upload_date': '20140615',
 571             'thumbnail': r're:^https?://.*\.jpg',
 572             'alt_title': 'Vine by TAKUMA',
 573             'comment_count': int,
 574             'repost_count': int,
 575             'like_count': int,
 576             'view_count': int,
 577         },
 578         'add_ie': ['Vine'],
 579     }, {
 580         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 581         'info_dict': {
 582             'id': '717462543795523584',
 583             'display_id': '719944021058060289',
 584             'ext': 'mp4',
 585             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 586             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 587             'uploader_id': 'CaptainAmerica',
 588             'uploader': 'Captain America',
 589             'duration': 3.17,
 590             'timestamp': 1460483005,
 591             'upload_date': '20160412',
 592             'uploader_url': 'https://twitter.com/CaptainAmerica',
 593             'thumbnail': r're:^https?://.*\.jpg',
 594             'comment_count': int,
 595             'repost_count': int,
 596             'like_count': int,
 597             'view_count': int,
 598             'tags': [],
 599             'age_limit': 0,
 600         },
 601     }, {
 602         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 603         'info_dict': {
 604             'id': '1zqKVVlkqLaKB',
 605             'ext': 'mp4',
 606             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 607             'upload_date': '20160923',
 608             'uploader_id': '1PmKqpJdOJQoY',
 609             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 610             'timestamp': 1474613214,
 611             'thumbnail': r're:^https?://.*\.jpg',
 612         },
 613         'add_ie': ['Periscope'],
 614     }, {
 615         # has mp4 formats via mobile API
 616         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 617         'info_dict': {
 618             'id': '852138619213144067',
 619             'ext': 'mp4',
 620             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 621             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 622             'uploader': 'عالم الأخبار',
 623             'uploader_id': 'news_al3alm',
 624             'duration': 277.4,
 625             'timestamp': 1492000653,
 626             'upload_date': '20170412',
 627         },
 628         'skip': 'Account suspended',
 629     }, {
 630         'url': 'https://twitter.com/i/web/status/910031516746514432',
 631         'info_dict': {
 632             'id': '910030238373089285',
 633             'display_id': '910031516746514432',
 634             'ext': 'mp4',
 635             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 636             'thumbnail': r're:^https?://.*\.jpg',
 637             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 638             'uploader': 'Préfet de Guadeloupe',
 639             'uploader_id': 'Prefet971',
 640             'duration': 47.48,
 641             'timestamp': 1505803395,
 642             'upload_date': '20170919',
 643             'uploader_url': 'https://twitter.com/Prefet971',
 644             'comment_count': int,
 645             'repost_count': int,
 646             'like_count': int,
 647             'view_count': int,
 648             'tags': ['Maria'],
 649             'age_limit': 0,
 650         },
 651         'params': {
 652             'skip_download': True,  # requires ffmpeg
 653         },
 654     }, {
 655         # card via api.twitter.com/1.1/videos/tweet/config
 656         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 657         'info_dict': {
 658             'id': '1001551417340022785',
 659             'display_id': '1001551623938805763',
 660             'ext': 'mp4',
 661             'title': 're:.*?Shep is on a roll today.*?',
 662             'thumbnail': r're:^https?://.*\.jpg',
 663             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 664             'uploader': 'Lis Power',
 665             'uploader_id': 'LisPower1',
 666             'duration': 111.278,
 667             'timestamp': 1527623489,
 668             'upload_date': '20180529',
 669             'uploader_url': 'https://twitter.com/LisPower1',
 670             'comment_count': int,
 671             'repost_count': int,
 672             'like_count': int,
 673             'view_count': int,
 674             'tags': [],
 675             'age_limit': 0,
 676         },
 677         'params': {
 678             'skip_download': True,  # requires ffmpeg
 679         },
 680     }, {
 681         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 682         'info_dict': {
 683             'id': '1087791272830607360',
 684             'display_id': '1087791357756956680',
 685             'ext': 'mp4',
 686             'title': 'Twitter - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 687             'thumbnail': r're:^https?://.*\.jpg',
 688             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 689             'uploader': 'Twitter',
 690             'uploader_id': 'Twitter',
 691             'duration': 61.567,
 692             'timestamp': 1548184644,
 693             'upload_date': '20190122',
 694             'uploader_url': 'https://twitter.com/Twitter',
 695             'comment_count': int,
 696             'repost_count': int,
 697             'like_count': int,
 698             'view_count': int,
 699             'tags': [],
 700             'age_limit': 0,
 701         },
 702     }, {
 703         # not available in Periscope
 704         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 705         'info_dict': {
 706             'id': '1vOGwqejwoWxB',
 707             'ext': 'mp4',
 708             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 709             'uploader': 'Vivi',
 710             'uploader_id': '1eVjYOLGkGrQL',
 711             'thumbnail': r're:^https?://.*\.jpg',
 712             'tags': ['EduTECH2019'],
 713             'view_count': int,
 714         },
 715         'add_ie': ['TwitterBroadcast'],
 716     }, {
 717         # unified card
 718         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 719         'info_dict': {
 720             'id': '1349774757969989634',
 721             'display_id': '1349794411333394432',
 722             'ext': 'mp4',
 723             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 724             'thumbnail': r're:^https?://.*\.jpg',
 725             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 726             'uploader': 'Brooklyn Nets',
 727             'uploader_id': 'BrooklynNets',
 728             'duration': 324.484,
 729             'timestamp': 1610651040,
 730             'upload_date': '20210114',
 731             'uploader_url': 'https://twitter.com/BrooklynNets',
 732             'comment_count': int,
 733             'repost_count': int,
 734             'like_count': int,
 735             'tags': [],
 736             'age_limit': 0,
 737         },
 738         'params': {
 739             'skip_download': True,
 740         },
 741     }, {
 742         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 743         'info_dict': {
 744             'id': '1577855447914409984',
 745             'display_id': '1577855540407197696',
 746             'ext': 'mp4',
 747             'title': 'md5:9d198efb93557b8f8d5b78c480407214',
 748             'description': 'md5:b9c3699335447391d11753ab21c70a74',
 749             'upload_date': '20221006',
 750             'uploader': 'oshtru',
 751             'uploader_id': 'oshtru',
 752             'uploader_url': 'https://twitter.com/oshtru',
 753             'thumbnail': r're:^https?://.*\.jpg',
 754             'duration': 30.03,
 755             'timestamp': 1665025050,
 756             'comment_count': int,
 757             'repost_count': int,
 758             'like_count': int,
 759             'view_count': int,
 760             'tags': [],
 761             'age_limit': 0,
 762         },
 763         'params': {'skip_download': True},
 764     }, {
 765         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 766         'info_dict': {
 767             'id': '1577719286659006464',
 768             'title': 'Ultima | #\u0432\u029f\u043c - Test',
 769             'description': 'Test https://t.co/Y3KEZD7Dad',
 770             'uploader': 'Ultima | #\u0432\u029f\u043c',
 771             'uploader_id': 'UltimaShadowX',
 772             'uploader_url': 'https://twitter.com/UltimaShadowX',
 773             'upload_date': '20221005',
 774             'timestamp': 1664992565,
 775             'comment_count': int,
 776             'repost_count': int,
 777             'like_count': int,
 778             'tags': [],
 779             'age_limit': 0,
 780         },
 781         'playlist_count': 4,
 782         'params': {'skip_download': True},
 783     }, {
 784         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 785         'info_dict': {
 786             'id': '1575559336759263233',
 787             'display_id': '1575560063510810624',
 788             'ext': 'mp4',
 789             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 790             'thumbnail': r're:^https?://.*\.jpg',
 791             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 792             'uploader': 'Max Olson',
 793             'uploader_id': 'MesoMax919',
 794             'uploader_url': 'https://twitter.com/MesoMax919',
 795             'duration': 21.321,
 796             'timestamp': 1664477766,
 797             'upload_date': '20220929',
 798             'comment_count': int,
 799             'repost_count': int,
 800             'like_count': int,
 801             'view_count': int,
 802             'tags': ['HurricaneIan'],
 803             'age_limit': 0,
 804         },
 805     }, {
 806         # Adult content, fails if not logged in (GraphQL)
 807         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 808         'info_dict': {
 809             'id': '1575199163847000068',
 810             'display_id': '1575199173472927762',
 811             'ext': 'mp4',
 812             'title': str,
 813             'description': str,
 814             'uploader': str,
 815             'uploader_id': 'Rizdraws',
 816             'uploader_url': 'https://twitter.com/Rizdraws',
 817             'upload_date': '20220928',
 818             'timestamp': 1664391723,
 819             'thumbnail': r're:^https?://.+\.jpg',
 820             'like_count': int,
 821             'repost_count': int,
 822             'comment_count': int,
 823             'age_limit': 18,
 824             'tags': []
 825         },
 826         'skip': 'Requires authentication',
 827     }, {
 828         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 829         'playlist_mincount': 2,
 830         'info_dict': {
 831             'id': '1395079556562706435',
 832             'title': str,
 833             'tags': [],
 834             'uploader': str,
 835             'like_count': int,
 836             'upload_date': '20210519',
 837             'age_limit': 0,
 838             'repost_count': int,
 839             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
 840             'uploader_id': 'Srirachachau',
 841             'comment_count': int,
 842             'uploader_url': 'https://twitter.com/Srirachachau',
 843             'timestamp': 1621447860,
 844         },
 845     }, {
 846         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 847         'playlist_mincount': 2,
 848         'info_dict': {
 849             'id': '1578353380363501568',
 850             'title': str,
 851             'uploader_id': 'DavidToons_',
 852             'repost_count': int,
 853             'like_count': int,
 854             'uploader': str,
 855             'timestamp': 1665143744,
 856             'uploader_url': 'https://twitter.com/DavidToons_',
 857             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
 858             'tags': [],
 859             'comment_count': int,
 860             'upload_date': '20221007',
 861             'age_limit': 0,
 862         },
 863     }, {
 864         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 865         'playlist_count': 2,
 866         'info_dict': {
 867             'id': '1578401165338976258',
 868             'title': str,
 869             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 870             'uploader': str,
 871             'uploader_id': 'primevideouk',
 872             'timestamp': 1665155137,
 873             'upload_date': '20221007',
 874             'age_limit': 0,
 875             'uploader_url': 'https://twitter.com/primevideouk',
 876             'comment_count': int,
 877             'repost_count': int,
 878             'like_count': int,
 879             'tags': ['TheRingsOfPower'],
 880         },
 881     }, {
 882         # Twitter Spaces
 883         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 884         'info_dict': {
 885             'id': '1lPJqmBeeNAJb',
 886             'ext': 'm4a',
 887             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 888             'uploader': r're:Monique Camarra.+?',
 889             'uploader_id': 'MoniqueCamarra',
 890             'live_status': 'was_live',
 891             'release_timestamp': 1658417414,
 892             'description': 'md5:acce559345fd49f129c20dbcda3f1201',
 893             'timestamp': 1658407771464,
 894         },
 895         'add_ie': ['TwitterSpaces'],
 896         'params': {'skip_download': 'm3u8'},
 897     }, {
 898         # URL specifies video number but --yes-playlist
 899         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
 900         'playlist_mincount': 2,
 901         'info_dict': {
 902             'id': '1600649710662213632',
 903             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 904             'timestamp': 1670459604.0,
 905             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 906             'comment_count': int,
 907             'uploader_id': 'CTVJLaidlaw',
 908             'repost_count': int,
 909             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 910             'upload_date': '20221208',
 911             'age_limit': 0,
 912             'uploader': 'Jocelyn Laidlaw',
 913             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 914             'like_count': int,
 915         },
 916     }, {
 917         # URL specifies video number and --no-playlist
 918         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
 919         'info_dict': {
 920             'id': '1600649511827013632',
 921             'ext': 'mp4',
 922             'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
 923             'thumbnail': r're:^https?://.+\.jpg',
 924             'timestamp': 1670459604.0,
 925             'uploader_id': 'CTVJLaidlaw',
 926             'uploader': 'Jocelyn Laidlaw',
 927             'repost_count': int,
 928             'comment_count': int,
 929             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 930             'duration': 102.226,
 931             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 932             'display_id': '1600649710662213632',
 933             'like_count': int,
 934             'view_count': int,
 935             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 936             'upload_date': '20221208',
 937             'age_limit': 0,
 938         },
 939         'params': {'noplaylist': True},
 940     }, {
 941         # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
 942         # note the id different between extraction and url
 943         'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
 944         'info_dict': {
 945             'id': '1621117577354424321',
 946             'display_id': '1621117700482416640',
 947             'ext': 'mp4',
 948             'title': '뽀 - 아 최우제 이동속도 봐',
 949             'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
 950             'duration': 24.598,
 951             'uploader': '뽀',
 952             'uploader_id': 's2FAKER',
 953             'uploader_url': 'https://twitter.com/s2FAKER',
 954             'upload_date': '20230202',
 955             'timestamp': 1675339553.0,
 956             'thumbnail': r're:https?://pbs\.twimg\.com/.+',
 957             'age_limit': 18,
 958             'tags': [],
 959             'like_count': int,
 960             'repost_count': int,
 961             'comment_count': int,
 962             'view_count': int,
 963         },
 964     }, {
 965         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
 966         'info_dict': {
 967             'id': '1599108643743473680',
 968             'display_id': '1599108751385972737',
 969             'ext': 'mp4',
 970             'title': '\u06ea - \U0001F48B',
 971             'uploader_url': 'https://twitter.com/hlo_again',
 972             'like_count': int,
 973             'uploader_id': 'hlo_again',
 974             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
 975             'repost_count': int,
 976             'duration': 9.531,
 977             'comment_count': int,
 978             'view_count': int,
 979             'upload_date': '20221203',
 980             'age_limit': 0,
 981             'timestamp': 1670092210.0,
 982             'tags': [],
 983             'uploader': '\u06ea',
 984             'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
 985         },
 986         'params': {'noplaylist': True},
 987     }, {
 988         'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
 989         'info_dict': {
 990             'id': '1600009362759733248',
 991             'display_id': '1600009574919962625',
 992             'ext': 'mp4',
 993             'uploader_url': 'https://twitter.com/MunTheShinobi',
 994             'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
 995             'view_count': int,
 996             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
 997             'age_limit': 0,
 998             'uploader': 'Mün The Shinobi',
 999             'repost_count': int,
1000             'upload_date': '20221206',
1001             'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1002             'comment_count': int,
1003             'like_count': int,
1004             'tags': [],
1005             'uploader_id': 'MunTheShinobi',
1006             'duration': 139.987,
1007             'timestamp': 1670306984.0,
1008         },
1009     }, {
1010         # url to retweet id, legacy API
1011         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1012         'info_dict': {
1013             'id': '1623274794488659969',
1014             'display_id': '1623739803874349067',
1015             'ext': 'mp4',
1016             'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
1017             'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
1018             'uploader': 'Johnny Bullets',
1019             'uploader_id': 'Johnnybull3ts',
1020             'uploader_url': 'https://twitter.com/Johnnybull3ts',
1021             'age_limit': 0,
1022             'tags': [],
1023             'duration': 8.033,
1024             'timestamp': 1675853859.0,
1025             'upload_date': '20230208',
1026             'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1027             'like_count': int,
1028             'repost_count': int,
1029             'comment_count': int,
1030         },
1031         'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
1032     }, {
1033         # onion route
1034         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1035         'only_matching': True,
1036     }, {
1037         # Twitch Clip Embed
1038         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1039         'only_matching': True,
1040     }, {
1041         # promo_video_website card
1042         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1043         'only_matching': True,
1044     }, {
1045         # promo_video_convo card
1046         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1047         'only_matching': True,
1048     }, {
1049         # appplayer card
1050         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1051         'only_matching': True,
1052     }, {
1053         # video_direct_message card
1054         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1055         'only_matching': True,
1056     }, {
1057         # poll2choice_video card
1058         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1059         'only_matching': True,
1060     }, {
1061         # poll3choice_video card
1062         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1063         'only_matching': True,
1064     }, {
1065         # poll4choice_video card
1066         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1067         'only_matching': True,
1068     }]
1069
1070     def _graphql_to_legacy(self, data, twid):
1071         result = traverse_obj(data, (
1072             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1073             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1074             'tweet_results', 'result', ('tweet', None),
1075         ), expected_type=dict, default={}, get_all=False)
1076
1077         if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
1078             self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
1079
1080         if 'tombstone' in result:
1081             cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1082             if cause and 'adult content' in cause:
1083                 self.raise_login_required(cause)
1084             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1085
1086         status = result.get('legacy', {})
1087         status.update(traverse_obj(result, {
1088             'user': ('core', 'user_results', 'result', 'legacy'),
1089             'card': ('card', 'legacy'),
1090             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1091         }, expected_type=dict, default={}))
1092
1093         # extra transformation is needed since result does not match legacy format
1094         binding_values = {
1095             binding_value.get('key'): binding_value.get('value')
1096             for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1097         }
1098         if binding_values:
1099             status['card']['binding_values'] = binding_values
1100
1101         return status
1102
1103     def _build_graphql_query(self, media_id):
1104         return {
1105             'variables': {
1106                 'focalTweetId': media_id,
1107                 'includePromotedContent': True,
1108                 'with_rux_injections': False,
1109                 'withBirdwatchNotes': True,
1110                 'withCommunity': True,
1111                 'withDownvotePerspective': False,
1112                 'withQuickPromoteEligibilityTweetFields': True,
1113                 'withReactionsMetadata': False,
1114                 'withReactionsPerspective': False,
1115                 'withSuperFollowsTweetFields': True,
1116                 'withSuperFollowsUserFields': True,
1117                 'withV2Timeline': True,
1118                 'withVoice': True,
1119             },
1120             'features': {
1121                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1122                 'interactive_text_enabled': True,
1123                 'responsive_web_edit_tweet_api_enabled': True,
1124                 'responsive_web_enhance_cards_enabled': True,
1125                 'responsive_web_graphql_timeline_navigation_enabled': False,
1126                 'responsive_web_text_conversations_enabled': False,
1127                 'responsive_web_uc_gql_enabled': True,
1128                 'standardized_nudges_misinfo': True,
1129                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1130                 'tweetypie_unmention_optimization_enabled': True,
1131                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1132                 'verified_phone_label_enabled': False,
1133                 'vibe_api_enabled': True,
1134             },
1135         }
1136
1137     def _real_extract(self, url):
1138         twid, selected_index = self._match_valid_url(url).group('id', 'index')
1139         if self._configuration_arg('legacy_api') and not self.is_logged_in:
1140             status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
1141                 'cards_platform': 'Web-12',
1142                 'include_cards': 1,
1143                 'include_reply_count': 1,
1144                 'include_user_entities': 0,
1145                 'tweet_mode': 'extended',
1146             }), 'retweeted_status', None)
1147         else:
1148             result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
1149             status = self._graphql_to_legacy(result, twid)
1150
1151         title = description = status['full_text'].replace('\n', ' ')
1152         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
1153         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1154         user = status.get('user') or {}
1155         uploader = user.get('name')
1156         if uploader:
1157             title = f'{uploader} - {title}'
1158         uploader_id = user.get('screen_name')
1159
1160         info = {
1161             'id': twid,
1162             'title': title,
1163             'description': description,
1164             'uploader': uploader,
1165             'timestamp': unified_timestamp(status.get('created_at')),
1166             'uploader_id': uploader_id,
1167             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1168             'like_count': int_or_none(status.get('favorite_count')),
1169             'repost_count': int_or_none(status.get('retweet_count')),
1170             'comment_count': int_or_none(status.get('reply_count')),
1171             'age_limit': 18 if status.get('possibly_sensitive') else 0,
1172             'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1173         }
1174
1175         def extract_from_video_info(media):
1176             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1177             self.write_debug(f'Extracting from video info: {media_id}')
1178             video_info = media.get('video_info') or {}
1179
1180             formats = []
1181             subtitles = {}
1182             for variant in video_info.get('variants', []):
1183                 fmts, subs = self._extract_variant_formats(variant, twid)
1184                 subtitles = self._merge_subtitles(subtitles, subs)
1185                 formats.extend(fmts)
1186
1187             thumbnails = []
1188             media_url = media.get('media_url_https') or media.get('media_url')
1189             if media_url:
1190                 def add_thumbnail(name, size):
1191                     thumbnails.append({
1192                         'id': name,
1193                         'url': update_url_query(media_url, {'name': name}),
1194                         'width': int_or_none(size.get('w') or size.get('width')),
1195                         'height': int_or_none(size.get('h') or size.get('height')),
1196                     })
1197                 for name, size in media.get('sizes', {}).items():
1198                     add_thumbnail(name, size)
1199                 add_thumbnail('orig', media.get('original_info') or {})
1200
1201             return {
1202                 'id': media_id,
1203                 'formats': formats,
1204                 'subtitles': subtitles,
1205                 'thumbnails': thumbnails,
1206                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1207                 'duration': float_or_none(video_info.get('duration_millis'), 1000),
1208                 # The codec of http formats are unknown
1209                 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1210             }
1211
1212         def extract_from_card_info(card):
1213             if not card:
1214                 return
1215
1216             self.write_debug(f'Extracting from card info: {card.get("url")}')
1217             binding_values = card['binding_values']
1218
1219             def get_binding_value(k):
1220                 o = binding_values.get(k) or {}
1221                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1222
1223             card_name = card['name'].split(':')[-1]
1224             if card_name == 'player':
1225                 yield {
1226                     '_type': 'url',
1227                     'url': get_binding_value('player_url'),
1228                 }
1229             elif card_name == 'periscope_broadcast':
1230                 yield {
1231                     '_type': 'url',
1232                     'url': get_binding_value('url') or get_binding_value('player_url'),
1233                     'ie_key': PeriscopeIE.ie_key(),
1234                 }
1235             elif card_name == 'broadcast':
1236                 yield {
1237                     '_type': 'url',
1238                     'url': get_binding_value('broadcast_url'),
1239                     'ie_key': TwitterBroadcastIE.ie_key(),
1240                 }
1241             elif card_name == 'audiospace':
1242                 yield {
1243                     '_type': 'url',
1244                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1245                     'ie_key': TwitterSpacesIE.ie_key(),
1246                 }
1247             elif card_name == 'summary':
1248                 yield {
1249                     '_type': 'url',
1250                     'url': get_binding_value('card_url'),
1251                 }
1252             elif card_name == 'unified_card':
1253                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1254                 yield from map(extract_from_video_info, traverse_obj(
1255                     unified_card, ('media_entities', ...), expected_type=dict))
1256             # amplify, promo_video_website, promo_video_convo, appplayer,
1257             # video_direct_message, poll2choice_video, poll3choice_video,
1258             # poll4choice_video, ...
1259             else:
1260                 is_amplify = card_name == 'amplify'
1261                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1262                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1263                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1264
1265                 thumbnails = []
1266                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1267                     image = get_binding_value('player_image' + suffix) or {}
1268                     image_url = image.get('url')
1269                     if not image_url or '/player-placeholder' in image_url:
1270                         continue
1271                     thumbnails.append({
1272                         'id': suffix[1:] if suffix else 'medium',
1273                         'url': image_url,
1274                         'width': int_or_none(image.get('width')),
1275                         'height': int_or_none(image.get('height')),
1276                     })
1277
1278                 yield {
1279                     'formats': formats,
1280                     'subtitles': subtitles,
1281                     'thumbnails': thumbnails,
1282                     'duration': int_or_none(get_binding_value(
1283                         'content_duration_seconds')),
1284                 }
1285
1286         videos = traverse_obj(status, (
1287             (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1288
1289         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1290             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1291         else:
1292             desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
1293             if not desired_obj:
1294                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1295             elif desired_obj.get('type') != 'video':
1296                 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1297
1298             # Restore original archive id and video index in title
1299             for index, entry in enumerate(videos, 1):
1300                 if entry.get('id') != desired_obj.get('id'):
1301                     continue
1302                 if index == 1:
1303                     info['_old_archive_ids'] = [make_archive_id(self, twid)]
1304                 if len(videos) != 1:
1305                     info['title'] += f' #{index}'
1306                 break
1307
1308             return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1309
1310         entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1311         if not entries:
1312             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1313             if not expanded_url or expanded_url == url:
1314                 self.raise_no_formats('No video could be found in this tweet', expected=True)
1315                 return info
1316
1317             return self.url_result(expanded_url, display_id=twid, **info)
1318
1319         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1320
1321         if len(entries) == 1:
1322             return entries[0]
1323
1324         for index, entry in enumerate(entries, 1):
1325             entry['title'] += f' #{index}'
1326
1327         return self.playlist_result(entries, **info)
1328
1329
1330 class TwitterAmplifyIE(TwitterBaseIE):
1331     IE_NAME = 'twitter:amplify'
1332     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1333
1334     _TEST = {
1335         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1336         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1337         'info_dict': {
1338             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1339             'ext': 'mp4',
1340             'title': 'Twitter Video',
1341             'thumbnail': 're:^https?://.*',
1342         },
1343         'params': {'format': '[protocol=https]'},
1344     }
1345
1346     def _real_extract(self, url):
1347         video_id = self._match_id(url)
1348         webpage = self._download_webpage(url, video_id)
1349
1350         vmap_url = self._html_search_meta(
1351             'twitter:amplify:vmap', webpage, 'vmap url')
1352         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1353
1354         thumbnails = []
1355         thumbnail = self._html_search_meta(
1356             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1357
1358         def _find_dimension(target):
1359             w = int_or_none(self._html_search_meta(
1360                 'twitter:%s:width' % target, webpage, fatal=False))
1361             h = int_or_none(self._html_search_meta(
1362                 'twitter:%s:height' % target, webpage, fatal=False))
1363             return w, h
1364
1365         if thumbnail:
1366             thumbnail_w, thumbnail_h = _find_dimension('image')
1367             thumbnails.append({
1368                 'url': thumbnail,
1369                 'width': thumbnail_w,
1370                 'height': thumbnail_h,
1371             })
1372
1373         video_w, video_h = _find_dimension('player')
1374         formats[0].update({
1375             'width': video_w,
1376             'height': video_h,
1377         })
1378
1379         return {
1380             'id': video_id,
1381             'title': 'Twitter Video',
1382             'formats': formats,
1383             'thumbnails': thumbnails,
1384         }
1385
1386
1387 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1388     IE_NAME = 'twitter:broadcast'
1389     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1390
1391     _TEST = {
1392         # untitled Periscope video
1393         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1394         'info_dict': {
1395             'id': '1yNGaQLWpejGj',
1396             'ext': 'mp4',
1397             'title': 'Andrea May Sahouri - Periscope Broadcast',
1398             'uploader': 'Andrea May Sahouri',
1399             'uploader_id': '1PXEdBZWpGwKe',
1400             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1401             'view_count': int,
1402         },
1403     }
1404
1405     def _real_extract(self, url):
1406         broadcast_id = self._match_id(url)
1407         broadcast = self._call_api(
1408             'broadcasts/show.json', broadcast_id,
1409             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1410         info = self._parse_broadcast_data(broadcast, broadcast_id)
1411         media_key = broadcast['media_key']
1412         source = self._call_api(
1413             f'live_video_stream/status/{media_key}', media_key)['source']
1414         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1415         if '/live_video_stream/geoblocked/' in m3u8_url:
1416             self.raise_geo_restricted()
1417         m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1418             m3u8_url).query).get('type', [None])[0]
1419         state, width, height = self._extract_common_format_info(broadcast)
1420         info['formats'] = self._extract_pscp_m3u8_formats(
1421             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1422         return info
1423
1424
1425 class TwitterSpacesIE(TwitterBaseIE):
1426     IE_NAME = 'twitter:spaces'
1427     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1428
1429     _TESTS = [{
1430         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1431         'info_dict': {
1432             'id': '1RDxlgyvNXzJL',
1433             'ext': 'm4a',
1434             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1435             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1436             'uploader': r're:Lucio Di Gaetano.*?',
1437             'uploader_id': 'luciodigaetano',
1438             'live_status': 'was_live',
1439             'timestamp': 1659877956397,
1440         },
1441         'params': {'skip_download': 'm3u8'},
1442     }]
1443
1444     SPACE_STATUS = {
1445         'notstarted': 'is_upcoming',
1446         'ended': 'was_live',
1447         'running': 'is_live',
1448         'timedout': 'post_live',
1449     }
1450
1451     def _build_graphql_query(self, space_id):
1452         return {
1453             'variables': {
1454                 'id': space_id,
1455                 'isMetatagsQuery': True,
1456                 'withDownvotePerspective': False,
1457                 'withReactionsMetadata': False,
1458                 'withReactionsPerspective': False,
1459                 'withReplays': True,
1460                 'withSuperFollowsUserFields': True,
1461                 'withSuperFollowsTweetFields': True,
1462             },
1463             'features': {
1464                 'dont_mention_me_view_api_enabled': True,
1465                 'interactive_text_enabled': True,
1466                 'responsive_web_edit_tweet_api_enabled': True,
1467                 'responsive_web_enhance_cards_enabled': True,
1468                 'responsive_web_uc_gql_enabled': True,
1469                 'spaces_2022_h2_clipping': True,
1470                 'spaces_2022_h2_spaces_communities': False,
1471                 'standardized_nudges_misinfo': True,
1472                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1473                 'vibe_api_enabled': True,
1474             },
1475         }
1476
1477     def _real_extract(self, url):
1478         space_id = self._match_id(url)
1479         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1480         if not space_data:
1481             raise ExtractorError('Twitter Space not found', expected=True)
1482
1483         metadata = space_data['metadata']
1484         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1485
1486         formats = []
1487         if live_status == 'is_upcoming':
1488             self.raise_no_formats('Twitter Space not started yet', expected=True)
1489         elif live_status == 'post_live':
1490             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1491         else:
1492             source = self._call_api(
1493                 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1494
1495             # XXX: Native downloader does not work
1496             formats = self._extract_m3u8_formats(
1497                 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1498                 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1499                 headers={'Referer': 'https://twitter.com/'})
1500             for fmt in formats:
1501                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1502
1503         participants = ', '.join(traverse_obj(
1504             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1505         return {
1506             'id': space_id,
1507             'title': metadata.get('title'),
1508             'description': f'Twitter Space participated by {participants}',
1509             'uploader': traverse_obj(
1510                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1511             'uploader_id': traverse_obj(
1512                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1513             'live_status': live_status,
1514             'release_timestamp': try_call(
1515                 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1516             'timestamp': metadata.get('created_at'),
1517             'formats': formats,
1518         }
1519
1520
1521 class TwitterShortenerIE(TwitterBaseIE):
1522     IE_NAME = 'twitter:shortener'
1523     _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1524     _BASE_URL = 'https://t.co/'
1525
1526     def _real_extract(self, url):
1527         mobj = self._match_valid_url(url)
1528         eid, id = mobj.group('eid', 'id')
1529         if eid:
1530             id = eid
1531             url = self._BASE_URL + id
1532         new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1533         __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1534         if new_url.startswith(__UNSAFE_LINK):
1535             new_url = new_url.replace(__UNSAFE_LINK, "")
1536         return self.url_result(new_url)