yt_dlp/extractor/twitter.py

   1 import json
   2 import re
   3 import urllib.error
   4
   5 from .common import InfoExtractor
   6 from .periscope import PeriscopeBaseIE, PeriscopeIE
   7 from ..compat import (
   8     compat_parse_qs,
   9     compat_urllib_parse_unquote,
  10     compat_urllib_parse_urlparse,
  11 )
  12 from ..utils import (
  13     ExtractorError,
  14     dict_get,
  15     float_or_none,
  16     format_field,
  17     int_or_none,
  18     make_archive_id,
  19     remove_end,
  20     str_or_none,
  21     strip_or_none,
  22     traverse_obj,
  23     try_call,
  24     try_get,
  25     unified_timestamp,
  26     update_url_query,
  27     url_or_none,
  28     xpath_text,
  29 )
  30
  31
  32 class TwitterBaseIE(InfoExtractor):
  33     _NETRC_MACHINE = 'twitter'
  34     _API_BASE = 'https://api.twitter.com/1.1/'
  35     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
  36     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  37     _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
  38     _flow_token = None
  39
  40     _LOGIN_INIT_DATA = json.dumps({
  41         'input_flow_data': {
  42             'flow_context': {
  43                 'debug_overrides': {},
  44                 'start_location': {
  45                     'location': 'unknown'
  46                 }
  47             }
  48         },
  49         'subtask_versions': {
  50             'action_list': 2,
  51             'alert_dialog': 1,
  52             'app_download_cta': 1,
  53             'check_logged_in_account': 1,
  54             'choice_selection': 3,
  55             'contacts_live_sync_permission_prompt': 0,
  56             'cta': 7,
  57             'email_verification': 2,
  58             'end_flow': 1,
  59             'enter_date': 1,
  60             'enter_email': 2,
  61             'enter_password': 5,
  62             'enter_phone': 2,
  63             'enter_recaptcha': 1,
  64             'enter_text': 5,
  65             'enter_username': 2,
  66             'generic_urt': 3,
  67             'in_app_notification': 1,
  68             'interest_picker': 3,
  69             'js_instrumentation': 1,
  70             'menu_dialog': 1,
  71             'notifications_permission_prompt': 2,
  72             'open_account': 2,
  73             'open_home_timeline': 1,
  74             'open_link': 1,
  75             'phone_verification': 4,
  76             'privacy_options': 1,
  77             'security_key': 3,
  78             'select_avatar': 4,
  79             'select_banner': 2,
  80             'settings_list': 7,
  81             'show_code': 1,
  82             'sign_up': 2,
  83             'sign_up_review': 4,
  84             'tweet_selection_urt': 1,
  85             'update_users': 1,
  86             'upload_media': 1,
  87             'user_recommendations_list': 4,
  88             'user_recommendations_urt': 1,
  89             'wait_spinner': 3,
  90             'web_modal': 1
  91         }
  92     }, separators=(',', ':')).encode()
  93
  94     def _extract_variant_formats(self, variant, video_id):
  95         variant_url = variant.get('url')
  96         if not variant_url:
  97             return [], {}
  98         elif '.m3u8' in variant_url:
  99             return self._extract_m3u8_formats_and_subtitles(
 100                 variant_url, video_id, 'mp4', 'm3u8_native',
 101                 m3u8_id='hls', fatal=False)
 102         else:
 103             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
 104             f = {
 105                 'url': variant_url,
 106                 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
 107                 'tbr': tbr,
 108             }
 109             self._search_dimensions_in_video_url(f, variant_url)
 110             return [f], {}
 111
 112     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
 113         vmap_url = url_or_none(vmap_url)
 114         if not vmap_url:
 115             return [], {}
 116         vmap_data = self._download_xml(vmap_url, video_id)
 117         formats = []
 118         subtitles = {}
 119         urls = []
 120         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
 121             video_variant.attrib['url'] = compat_urllib_parse_unquote(
 122                 video_variant.attrib['url'])
 123             urls.append(video_variant.attrib['url'])
 124             fmts, subs = self._extract_variant_formats(
 125                 video_variant.attrib, video_id)
 126             formats.extend(fmts)
 127             subtitles = self._merge_subtitles(subtitles, subs)
 128         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
 129         if video_url not in urls:
 130             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
 131             formats.extend(fmts)
 132             subtitles = self._merge_subtitles(subtitles, subs)
 133         return formats, subtitles
 134
 135     @staticmethod
 136     def _search_dimensions_in_video_url(a_format, video_url):
 137         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
 138         if m:
 139             a_format.update({
 140                 'width': int(m.group('width')),
 141                 'height': int(m.group('height')),
 142             })
 143
 144     @property
 145     def is_logged_in(self):
 146         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
 147
 148     def _set_base_headers(self):
 149         headers = self._AUTH.copy()
 150         csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
 151         if csrf_token:
 152             headers['x-csrf-token'] = csrf_token
 153         return headers
 154
 155     def _call_login_api(self, note, headers, query={}, data=None):
 156         response = self._download_json(
 157             f'{self._API_BASE}onboarding/task.json', None, note,
 158             headers=headers, query=query, data=data, expected_status=400)
 159         error = traverse_obj(response, ('errors', 0, 'message', {str}))
 160         if error:
 161             raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
 162         elif traverse_obj(response, 'status') != 'success':
 163             raise ExtractorError('Login was unsuccessful')
 164
 165         subtask = traverse_obj(
 166             response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
 167         if not subtask:
 168             raise ExtractorError('Twitter API did not return next login subtask')
 169
 170         self._flow_token = response['flow_token']
 171
 172         return subtask
 173
 174     def _perform_login(self, username, password):
 175         if self.is_logged_in:
 176             return
 177
 178         webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
 179         headers = self._set_base_headers()
 180         guest_token = self._search_regex(
 181             r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._download_json(
 182             f'{self._API_BASE}guest/activate.json', None, 'Downloading guest token',
 183             data=b'', headers=headers)['guest_token']
 184         headers.update({
 185             'content-type': 'application/json',
 186             'x-guest-token': guest_token,
 187             'x-twitter-client-language': 'en',
 188             'x-twitter-active-user': 'yes',
 189             'Referer': 'https://twitter.com/',
 190             'Origin': 'https://twitter.com',
 191         })
 192
 193         def build_login_json(*subtask_inputs):
 194             return json.dumps({
 195                 'flow_token': self._flow_token,
 196                 'subtask_inputs': subtask_inputs
 197             }, separators=(',', ':')).encode()
 198
 199         def input_dict(subtask_id, text):
 200             return {
 201                 'subtask_id': subtask_id,
 202                 'enter_text': {
 203                     'text': text,
 204                     'link': 'next_link'
 205                 }
 206             }
 207
 208         next_subtask = self._call_login_api(
 209             'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
 210
 211         while not self.is_logged_in:
 212             if next_subtask == 'LoginJsInstrumentationSubtask':
 213                 next_subtask = self._call_login_api(
 214                     'Submitting JS instrumentation response', headers, data=build_login_json({
 215                         'subtask_id': next_subtask,
 216                         'js_instrumentation': {
 217                             'response': '{}',
 218                             'link': 'next_link'
 219                         }
 220                     }))
 221
 222             elif next_subtask == 'LoginEnterUserIdentifierSSO':
 223                 next_subtask = self._call_login_api(
 224                     'Submitting username', headers, data=build_login_json({
 225                         'subtask_id': next_subtask,
 226                         'settings_list': {
 227                             'setting_responses': [{
 228                                 'key': 'user_identifier',
 229                                 'response_data': {
 230                                     'text_data': {
 231                                         'result': username
 232                                     }
 233                                 }
 234                             }],
 235                             'link': 'next_link'
 236                         }
 237                     }))
 238
 239             elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
 240                 next_subtask = self._call_login_api(
 241                     'Submitting alternate identifier', headers,
 242                     data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
 243                         'one of username, phone number or email that was not used as --username'))))
 244
 245             elif next_subtask == 'LoginEnterPassword':
 246                 next_subtask = self._call_login_api(
 247                     'Submitting password', headers, data=build_login_json({
 248                         'subtask_id': next_subtask,
 249                         'enter_password': {
 250                             'password': password,
 251                             'link': 'next_link'
 252                         }
 253                     }))
 254
 255             elif next_subtask == 'AccountDuplicationCheck':
 256                 next_subtask = self._call_login_api(
 257                     'Submitting account duplication check', headers, data=build_login_json({
 258                         'subtask_id': next_subtask,
 259                         'check_logged_in_account': {
 260                             'link': 'AccountDuplicationCheck_false'
 261                         }
 262                     }))
 263
 264             elif next_subtask == 'LoginTwoFactorAuthChallenge':
 265                 next_subtask = self._call_login_api(
 266                     'Submitting 2FA token', headers, data=build_login_json(input_dict(
 267                         next_subtask, self._get_tfa_info('two-factor authentication token'))))
 268
 269             elif next_subtask == 'LoginAcid':
 270                 next_subtask = self._call_login_api(
 271                     'Submitting confirmation code', headers, data=build_login_json(input_dict(
 272                         next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
 273
 274             elif next_subtask == 'LoginSuccessSubtask':
 275                 raise ExtractorError('Twitter API did not grant auth token cookie')
 276
 277             else:
 278                 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
 279
 280         self.report_login()
 281
 282     def _call_api(self, path, video_id, query={}, graphql=False):
 283         if not self.is_logged_in:
 284             self.raise_login_required()
 285
 286         result = self._download_json(
 287             (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, video_id,
 288             f'Downloading {"GraphQL" if graphql else "legacy API"} JSON', headers={
 289                 **self._set_base_headers(),
 290                 'x-twitter-auth-type': 'OAuth2Session',
 291                 'x-twitter-client-language': 'en',
 292                 'x-twitter-active-user': 'yes',
 293             }, query=query, expected_status={400, 401, 403, 404} if graphql else {403})
 294
 295         if result.get('errors'):
 296             errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
 297             raise ExtractorError(
 298                 f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
 299
 300         return result
 301
 302     def _build_graphql_query(self, media_id):
 303         raise NotImplementedError('Method must be implemented to support GraphQL')
 304
 305     def _call_graphql_api(self, endpoint, media_id):
 306         data = self._build_graphql_query(media_id)
 307         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 308         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 309
 310
 311 class TwitterCardIE(InfoExtractor):
 312     IE_NAME = 'twitter:card'
 313     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 314     _TESTS = [
 315         {
 316             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 317             # MD5 checksums are different in different places
 318             'info_dict': {
 319                 'id': '560070131976392705',
 320                 'ext': 'mp4',
 321                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 322                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 323                 'uploader': 'Twitter',
 324                 'uploader_id': 'Twitter',
 325                 'thumbnail': r're:^https?://.*\.jpg',
 326                 'duration': 30.033,
 327                 'timestamp': 1422366112,
 328                 'upload_date': '20150127',
 329                 'age_limit': 0,
 330                 'comment_count': int,
 331                 'tags': [],
 332                 'repost_count': int,
 333                 'like_count': int,
 334                 'display_id': '560070183650213889',
 335                 'uploader_url': 'https://twitter.com/Twitter',
 336             },
 337         },
 338         {
 339             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 340             'md5': '7137eca597f72b9abbe61e5ae0161399',
 341             'info_dict': {
 342                 'id': '623160978427936768',
 343                 'ext': 'mp4',
 344                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 345                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 346                 'uploader': 'NASA',
 347                 'uploader_id': 'NASA',
 348                 'timestamp': 1437408129,
 349                 'upload_date': '20150720',
 350                 'uploader_url': 'https://twitter.com/NASA',
 351                 'age_limit': 0,
 352                 'comment_count': int,
 353                 'like_count': int,
 354                 'repost_count': int,
 355                 'tags': ['PlutoFlyby'],
 356             },
 357             'params': {'format': '[protocol=https]'}
 358         },
 359         {
 360             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 361             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 362             'info_dict': {
 363                 'id': 'dq4Oj5quskI',
 364                 'ext': 'mp4',
 365                 'title': 'Ubuntu 11.10 Overview',
 366                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 367                 'upload_date': '20111013',
 368                 'uploader': 'OMG! UBUNTU!',
 369                 'uploader_id': 'omgubuntu',
 370                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 371                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 372                 'channel_follower_count': int,
 373                 'chapters': 'count:8',
 374                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 375                 'duration': 138,
 376                 'categories': ['Film & Animation'],
 377                 'age_limit': 0,
 378                 'comment_count': int,
 379                 'availability': 'public',
 380                 'like_count': int,
 381                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 382                 'view_count': int,
 383                 'tags': 'count:12',
 384                 'channel': 'OMG! UBUNTU!',
 385                 'playable_in_embed': True,
 386             },
 387             'add_ie': ['Youtube'],
 388         },
 389         {
 390             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 391             'info_dict': {
 392                 'id': 'iBb2x00UVlv',
 393                 'ext': 'mp4',
 394                 'upload_date': '20151113',
 395                 'uploader_id': '1189339351084113920',
 396                 'uploader': 'ArsenalTerje',
 397                 'title': 'Vine by ArsenalTerje',
 398                 'timestamp': 1447451307,
 399                 'alt_title': 'Vine by ArsenalTerje',
 400                 'comment_count': int,
 401                 'like_count': int,
 402                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 403                 'view_count': int,
 404                 'repost_count': int,
 405             },
 406             'add_ie': ['Vine'],
 407             'params': {'skip_download': 'm3u8'},
 408         },
 409         {
 410             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 411             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 412             'info_dict': {
 413                 'id': '705235433198714880',
 414                 'ext': 'mp4',
 415                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 416                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 417                 'uploader': 'Brent Yarina',
 418                 'uploader_id': 'BTNBrentYarina',
 419                 'timestamp': 1456976204,
 420                 'upload_date': '20160303',
 421             },
 422             'skip': 'This content is no longer available.',
 423         },
 424         {
 425             'url': 'https://twitter.com/i/videos/752274308186120192',
 426             'only_matching': True,
 427         },
 428     ]
 429
 430     def _real_extract(self, url):
 431         status_id = self._match_id(url)
 432         return self.url_result(
 433             'https://twitter.com/statuses/' + status_id,
 434             TwitterIE.ie_key(), status_id)
 435
 436
 437 class TwitterIE(TwitterBaseIE):
 438     IE_NAME = 'twitter'
 439     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 440
 441     _TESTS = [{
 442         # comment_count, repost_count, view_count are only available with auth (applies to all tests)
 443         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 444         'info_dict': {
 445             'id': '643211870443208704',
 446             'display_id': '643211948184596480',
 447             'ext': 'mp4',
 448             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 449             'thumbnail': r're:^https?://.*\.jpg',
 450             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 451             'uploader': 'FREE THE NIPPLE',
 452             'uploader_id': 'freethenipple',
 453             'duration': 12.922,
 454             'timestamp': 1442188653,
 455             'upload_date': '20150913',
 456             'uploader_url': 'https://twitter.com/freethenipple',
 457             'like_count': int,
 458             'tags': [],
 459             'age_limit': 18,
 460         },
 461     }, {
 462         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 463         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 464         'info_dict': {
 465             'id': '657991469417025536',
 466             'ext': 'mp4',
 467             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 468             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 469             'thumbnail': r're:^https?://.*\.png',
 470             'uploader': 'Gifs',
 471             'uploader_id': 'giphz',
 472         },
 473         'expected_warnings': ['height', 'width'],
 474         'skip': 'Account suspended',
 475     }, {
 476         'url': 'https://twitter.com/starwars/status/665052190608723968',
 477         'info_dict': {
 478             'id': '665052190608723968',
 479             'display_id': '665052190608723968',
 480             'ext': 'mp4',
 481             'title': r're:Star Wars.*A new beginning is coming December 18.*',
 482             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 483             'uploader_id': 'starwars',
 484             'uploader': r're:Star Wars.*',
 485             'timestamp': 1447395772,
 486             'upload_date': '20151113',
 487             'uploader_url': 'https://twitter.com/starwars',
 488             'like_count': int,
 489             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 490             'age_limit': 0,
 491         },
 492     }, {
 493         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 494         'info_dict': {
 495             'id': '705235433198714880',
 496             'ext': 'mp4',
 497             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 498             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 499             'uploader_id': 'BTNBrentYarina',
 500             'uploader': 'Brent Yarina',
 501             'timestamp': 1456976204,
 502             'upload_date': '20160303',
 503             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 504             'comment_count': int,
 505             'repost_count': int,
 506             'like_count': int,
 507             'tags': [],
 508             'age_limit': 0,
 509         },
 510         'params': {
 511             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 512             # Test case of TwitterCardIE
 513             'skip_download': True,
 514         },
 515         'skip': 'Dead external link',
 516     }, {
 517         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 518         'info_dict': {
 519             'id': '700207414000242688',
 520             'display_id': '700207533655363584',
 521             'ext': 'mp4',
 522             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 523             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 524             'thumbnail': r're:^https?://.*\.jpg',
 525             'uploader': 'jaydin donte geer',
 526             'uploader_id': 'jaydingeer',
 527             'duration': 30.0,
 528             'timestamp': 1455777459,
 529             'upload_date': '20160218',
 530             'uploader_url': 'https://twitter.com/jaydingeer',
 531             'like_count': int,
 532             'tags': ['Damndaniel'],
 533             'age_limit': 0,
 534         },
 535     }, {
 536         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 537         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 538         'info_dict': {
 539             'id': 'MIOxnrUteUd',
 540             'ext': 'mp4',
 541             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 542             'uploader': 'TAKUMA',
 543             'uploader_id': '1004126642786242560',
 544             'timestamp': 1402826626,
 545             'upload_date': '20140615',
 546             'thumbnail': r're:^https?://.*\.jpg',
 547             'alt_title': 'Vine by TAKUMA',
 548             'comment_count': int,
 549             'repost_count': int,
 550             'like_count': int,
 551             'view_count': int,
 552         },
 553         'add_ie': ['Vine'],
 554     }, {
 555         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 556         'info_dict': {
 557             'id': '717462543795523584',
 558             'display_id': '719944021058060289',
 559             'ext': 'mp4',
 560             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 561             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 562             'uploader_id': 'CaptainAmerica',
 563             'uploader': 'Captain America',
 564             'duration': 3.17,
 565             'timestamp': 1460483005,
 566             'upload_date': '20160412',
 567             'uploader_url': 'https://twitter.com/CaptainAmerica',
 568             'thumbnail': r're:^https?://.*\.jpg',
 569             'like_count': int,
 570             'tags': [],
 571             'age_limit': 0,
 572         },
 573     }, {
 574         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 575         'info_dict': {
 576             'id': '1zqKVVlkqLaKB',
 577             'ext': 'mp4',
 578             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 579             'upload_date': '20160923',
 580             'uploader_id': '1PmKqpJdOJQoY',
 581             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 582             'timestamp': 1474613214,
 583             'thumbnail': r're:^https?://.*\.jpg',
 584         },
 585         'add_ie': ['Periscope'],
 586     }, {
 587         # has mp4 formats via mobile API
 588         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 589         'info_dict': {
 590             'id': '852138619213144067',
 591             'ext': 'mp4',
 592             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 593             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 594             'uploader': 'عالم الأخبار',
 595             'uploader_id': 'news_al3alm',
 596             'duration': 277.4,
 597             'timestamp': 1492000653,
 598             'upload_date': '20170412',
 599         },
 600         'skip': 'Account suspended',
 601     }, {
 602         'url': 'https://twitter.com/i/web/status/910031516746514432',
 603         'info_dict': {
 604             'id': '910030238373089285',
 605             'display_id': '910031516746514432',
 606             'ext': 'mp4',
 607             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 608             'thumbnail': r're:^https?://.*\.jpg',
 609             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 610             'uploader': 'Préfet de Guadeloupe',
 611             'uploader_id': 'Prefet971',
 612             'duration': 47.48,
 613             'timestamp': 1505803395,
 614             'upload_date': '20170919',
 615             'uploader_url': 'https://twitter.com/Prefet971',
 616             'like_count': int,
 617             'tags': ['Maria'],
 618             'age_limit': 0,
 619         },
 620         'params': {
 621             'skip_download': True,  # requires ffmpeg
 622         },
 623     }, {
 624         # card via api.twitter.com/1.1/videos/tweet/config
 625         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 626         'info_dict': {
 627             'id': '1001551417340022785',
 628             'display_id': '1001551623938805763',
 629             'ext': 'mp4',
 630             'title': 're:.*?Shep is on a roll today.*?',
 631             'thumbnail': r're:^https?://.*\.jpg',
 632             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 633             'uploader': 'Lis Power',
 634             'uploader_id': 'LisPower1',
 635             'duration': 111.278,
 636             'timestamp': 1527623489,
 637             'upload_date': '20180529',
 638             'uploader_url': 'https://twitter.com/LisPower1',
 639             'like_count': int,
 640             'tags': [],
 641             'age_limit': 0,
 642         },
 643         'params': {
 644             'skip_download': True,  # requires ffmpeg
 645         },
 646     }, {
 647         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 648         'info_dict': {
 649             'id': '1087791272830607360',
 650             'display_id': '1087791357756956680',
 651             'ext': 'mp4',
 652             'title': 'Twitter - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 653             'thumbnail': r're:^https?://.*\.jpg',
 654             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 655             'uploader': 'Twitter',
 656             'uploader_id': 'Twitter',
 657             'duration': 61.567,
 658             'timestamp': 1548184644,
 659             'upload_date': '20190122',
 660             'uploader_url': 'https://twitter.com/Twitter',
 661             'like_count': int,
 662             'tags': [],
 663             'age_limit': 0,
 664         },
 665     }, {
 666         # not available in Periscope
 667         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 668         'info_dict': {
 669             'id': '1vOGwqejwoWxB',
 670             'ext': 'mp4',
 671             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 672             'uploader': 'Vivi',
 673             'uploader_id': '1eVjYOLGkGrQL',
 674             'thumbnail': r're:^https?://.*\.jpg',
 675             'tags': ['EduTECH2019'],
 676             'view_count': int,
 677         },
 678         'add_ie': ['TwitterBroadcast'],
 679         'skip': 'Requires authentication',
 680     }, {
 681         # unified card
 682         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 683         'info_dict': {
 684             'id': '1349774757969989634',
 685             'display_id': '1349794411333394432',
 686             'ext': 'mp4',
 687             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 688             'thumbnail': r're:^https?://.*\.jpg',
 689             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 690             'uploader': 'Brooklyn Nets',
 691             'uploader_id': 'BrooklynNets',
 692             'duration': 324.484,
 693             'timestamp': 1610651040,
 694             'upload_date': '20210114',
 695             'uploader_url': 'https://twitter.com/BrooklynNets',
 696             'like_count': int,
 697             'tags': [],
 698             'age_limit': 0,
 699         },
 700         'params': {
 701             'skip_download': True,
 702         },
 703     }, {
 704         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 705         'info_dict': {
 706             'id': '1577855447914409984',
 707             'display_id': '1577855540407197696',
 708             'ext': 'mp4',
 709             'title': 'md5:9d198efb93557b8f8d5b78c480407214',
 710             'description': 'md5:b9c3699335447391d11753ab21c70a74',
 711             'upload_date': '20221006',
 712             'uploader': 'oshtru',
 713             'uploader_id': 'oshtru',
 714             'uploader_url': 'https://twitter.com/oshtru',
 715             'thumbnail': r're:^https?://.*\.jpg',
 716             'duration': 30.03,
 717             'timestamp': 1665025050,
 718             'like_count': int,
 719             'tags': [],
 720             'age_limit': 0,
 721         },
 722         'params': {'skip_download': True},
 723     }, {
 724         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 725         'info_dict': {
 726             'id': '1577719286659006464',
 727             'title': 'Ultima📛 | #вʟм - Test',
 728             'description': 'Test https://t.co/Y3KEZD7Dad',
 729             'uploader': 'Ultima📛 | #вʟм',
 730             'uploader_id': 'UltimaShadowX',
 731             'uploader_url': 'https://twitter.com/UltimaShadowX',
 732             'upload_date': '20221005',
 733             'timestamp': 1664992565,
 734             'like_count': int,
 735             'tags': [],
 736             'age_limit': 0,
 737         },
 738         'playlist_count': 4,
 739         'params': {'skip_download': True},
 740     }, {
 741         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 742         'info_dict': {
 743             'id': '1575559336759263233',
 744             'display_id': '1575560063510810624',
 745             'ext': 'mp4',
 746             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 747             'thumbnail': r're:^https?://.*\.jpg',
 748             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 749             'uploader': 'Max Olson',
 750             'uploader_id': 'MesoMax919',
 751             'uploader_url': 'https://twitter.com/MesoMax919',
 752             'duration': 21.321,
 753             'timestamp': 1664477766,
 754             'upload_date': '20220929',
 755             'like_count': int,
 756             'tags': ['HurricaneIan'],
 757             'age_limit': 0,
 758         },
 759     }, {
 760         # Adult content, fails if not logged in (GraphQL)
 761         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 762         'info_dict': {
 763             'id': '1575199163847000068',
 764             'display_id': '1575199173472927762',
 765             'ext': 'mp4',
 766             'title': str,
 767             'description': str,
 768             'uploader': str,
 769             'uploader_id': 'Rizdraws',
 770             'uploader_url': 'https://twitter.com/Rizdraws',
 771             'upload_date': '20220928',
 772             'timestamp': 1664391723,
 773             'thumbnail': r're:^https?://.+\.jpg',
 774             'like_count': int,
 775             'repost_count': int,
 776             'comment_count': int,
 777             'age_limit': 18,
 778             'tags': []
 779         },
 780         'skip': 'Requires authentication',
 781     }, {
 782         # Single Vimeo video result without auth
 783         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 784         'info_dict': {
 785             'id': '551578322',
 786             'ext': 'mp4',
 787             'title': 'Dusty & The Mayor',
 788             'uploader': 'Michael Chau',
 789             'uploader_id': 'user29061007',
 790             'uploader_url': 'https://vimeo.com/user29061007',
 791             'duration': 478,
 792             'thumbnail': 'https://i.vimeocdn.com/video/1139658575-0dfdce6e9a2401fe09feb24bf0d14e6f24a53c12f447ff688ace61009ad4c1ba-d_1280',
 793         },
 794     }, {
 795         # Playlist result only with auth
 796         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 797         'playlist_mincount': 2,
 798         'info_dict': {
 799             'id': '1395079556562706435',
 800             'title': str,
 801             'tags': [],
 802             'uploader': str,
 803             'like_count': int,
 804             'upload_date': '20210519',
 805             'age_limit': 0,
 806             'repost_count': int,
 807             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
 808             'uploader_id': 'Srirachachau',
 809             'comment_count': int,
 810             'uploader_url': 'https://twitter.com/Srirachachau',
 811             'timestamp': 1621447860,
 812         },
 813         'skip': 'Requires authentication',
 814     }, {
 815         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 816         'playlist_mincount': 2,
 817         'info_dict': {
 818             'id': '1578353380363501568',
 819             'title': str,
 820             'uploader_id': 'DavidToons_',
 821             'repost_count': int,
 822             'like_count': int,
 823             'uploader': str,
 824             'timestamp': 1665143744,
 825             'uploader_url': 'https://twitter.com/DavidToons_',
 826             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
 827             'tags': [],
 828             'comment_count': int,
 829             'upload_date': '20221007',
 830             'age_limit': 0,
 831         },
 832         'skip': 'Requires authentication',
 833     }, {
 834         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 835         'playlist_count': 2,
 836         'info_dict': {
 837             'id': '1578401165338976258',
 838             'title': str,
 839             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 840             'uploader': str,
 841             'uploader_id': 'primevideouk',
 842             'timestamp': 1665155137,
 843             'upload_date': '20221007',
 844             'age_limit': 0,
 845             'uploader_url': 'https://twitter.com/primevideouk',
 846             'like_count': int,
 847             'tags': ['TheRingsOfPower'],
 848         },
 849     }, {
 850         # Twitter Spaces
 851         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 852         'info_dict': {
 853             'id': '1lPJqmBeeNAJb',
 854             'ext': 'm4a',
 855             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 856             'uploader': r're:Monique Camarra.+?',
 857             'uploader_id': 'MoniqueCamarra',
 858             'live_status': 'was_live',
 859             'release_timestamp': 1658417414,
 860             'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
 861             'timestamp': 1658407771,
 862             'release_date': '20220721',
 863             'upload_date': '20220721',
 864         },
 865         'add_ie': ['TwitterSpaces'],
 866         'params': {'skip_download': 'm3u8'},
 867         'skip': 'Requires authentication',
 868     }, {
 869         # URL specifies video number but --yes-playlist
 870         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
 871         'playlist_mincount': 2,
 872         'info_dict': {
 873             'id': '1600649710662213632',
 874             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 875             'timestamp': 1670459604.0,
 876             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 877             'uploader_id': 'CTVJLaidlaw',
 878             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 879             'upload_date': '20221208',
 880             'age_limit': 0,
 881             'uploader': 'Jocelyn Laidlaw',
 882             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 883             'like_count': int,
 884         },
 885     }, {
 886         # URL specifies video number and --no-playlist
 887         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
 888         'info_dict': {
 889             'id': '1600649511827013632',
 890             'ext': 'mp4',
 891             'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
 892             'thumbnail': r're:^https?://.+\.jpg',
 893             'timestamp': 1670459604.0,
 894             'uploader_id': 'CTVJLaidlaw',
 895             'uploader': 'Jocelyn Laidlaw',
 896             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 897             'duration': 102.226,
 898             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 899             'display_id': '1600649710662213632',
 900             'like_count': int,
 901             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 902             'upload_date': '20221208',
 903             'age_limit': 0,
 904         },
 905         'params': {'noplaylist': True},
 906     }, {
 907         # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
 908         # note the id different between extraction and url
 909         'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
 910         'info_dict': {
 911             'id': '1621117577354424321',
 912             'display_id': '1621117700482416640',
 913             'ext': 'mp4',
 914             'title': '뽀 - 아 최우제 이동속도 봐',
 915             'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
 916             'duration': 24.598,
 917             'uploader': '뽀',
 918             'uploader_id': 's2FAKER',
 919             'uploader_url': 'https://twitter.com/s2FAKER',
 920             'upload_date': '20230202',
 921             'timestamp': 1675339553.0,
 922             'thumbnail': r're:https?://pbs\.twimg\.com/.+',
 923             'age_limit': 18,
 924             'tags': [],
 925             'like_count': int,
 926         },
 927     }, {
 928         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
 929         'info_dict': {
 930             'id': '1599108643743473680',
 931             'display_id': '1599108751385972737',
 932             'ext': 'mp4',
 933             'title': '\u06ea - \U0001F48B',
 934             'uploader_url': 'https://twitter.com/hlo_again',
 935             'like_count': int,
 936             'uploader_id': 'hlo_again',
 937             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
 938             'duration': 9.531,
 939             'upload_date': '20221203',
 940             'age_limit': 0,
 941             'timestamp': 1670092210.0,
 942             'tags': [],
 943             'uploader': '\u06ea',
 944             'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
 945         },
 946         'params': {'noplaylist': True},
 947     }, {
 948         'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
 949         'info_dict': {
 950             'id': '1600009362759733248',
 951             'display_id': '1600009574919962625',
 952             'ext': 'mp4',
 953             'uploader_url': 'https://twitter.com/MunTheShinobi',
 954             'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
 955             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
 956             'age_limit': 0,
 957             'uploader': 'Mün The Shinobi',
 958             'upload_date': '20221206',
 959             'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
 960             'like_count': int,
 961             'tags': [],
 962             'uploader_id': 'MunTheShinobi',
 963             'duration': 139.987,
 964             'timestamp': 1670306984.0,
 965         },
 966     }, {
 967         # url to retweet id
 968         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
 969         'info_dict': {
 970             'id': '1623274794488659969',
 971             'display_id': '1623739803874349067',
 972             'ext': 'mp4',
 973             'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
 974             'description': 'md5:224d62f54b0cdef8e33d4c56c41ac503',
 975             'uploader': 'Johnny Bullets',
 976             'uploader_id': 'Johnnybull3ts',
 977             'uploader_url': 'https://twitter.com/Johnnybull3ts',
 978             'age_limit': 0,
 979             'tags': [],
 980             'duration': 8.033,
 981             'timestamp': 1675853859.0,
 982             'upload_date': '20230208',
 983             'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
 984             'like_count': int,
 985         },
 986     }, {
 987         # onion route
 988         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
 989         'only_matching': True,
 990     }, {
 991         # Twitch Clip Embed
 992         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 993         'only_matching': True,
 994     }, {
 995         # promo_video_website card
 996         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
 997         'only_matching': True,
 998     }, {
 999         # promo_video_convo card
1000         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1001         'only_matching': True,
1002     }, {
1003         # appplayer card
1004         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1005         'only_matching': True,
1006     }, {
1007         # video_direct_message card
1008         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1009         'only_matching': True,
1010     }, {
1011         # poll2choice_video card
1012         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1013         'only_matching': True,
1014     }, {
1015         # poll3choice_video card
1016         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1017         'only_matching': True,
1018     }, {
1019         # poll4choice_video card
1020         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1021         'only_matching': True,
1022     }]
1023
1024     def _graphql_to_legacy(self, data, twid):
1025         result = traverse_obj(data, (
1026             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1027             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1028             'tweet_results', 'result', ('tweet', None),
1029         ), expected_type=dict, default={}, get_all=False)
1030
1031         if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
1032             self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
1033
1034         if 'tombstone' in result:
1035             cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1036             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1037
1038         status = result.get('legacy', {})
1039         status.update(traverse_obj(result, {
1040             'user': ('core', 'user_results', 'result', 'legacy'),
1041             'card': ('card', 'legacy'),
1042             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1043         }, expected_type=dict, default={}))
1044
1045         # extra transformation is needed since result does not match legacy format
1046         binding_values = {
1047             binding_value.get('key'): binding_value.get('value')
1048             for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1049         }
1050         if binding_values:
1051             status['card']['binding_values'] = binding_values
1052
1053         return status
1054
1055     def _build_graphql_query(self, media_id):
1056         return {
1057             'variables': {
1058                 'focalTweetId': media_id,
1059                 'includePromotedContent': True,
1060                 'with_rux_injections': False,
1061                 'withBirdwatchNotes': True,
1062                 'withCommunity': True,
1063                 'withDownvotePerspective': False,
1064                 'withQuickPromoteEligibilityTweetFields': True,
1065                 'withReactionsMetadata': False,
1066                 'withReactionsPerspective': False,
1067                 'withSuperFollowsTweetFields': True,
1068                 'withSuperFollowsUserFields': True,
1069                 'withV2Timeline': True,
1070                 'withVoice': True,
1071             },
1072             'features': {
1073                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1074                 'interactive_text_enabled': True,
1075                 'responsive_web_edit_tweet_api_enabled': True,
1076                 'responsive_web_enhance_cards_enabled': True,
1077                 'responsive_web_graphql_timeline_navigation_enabled': False,
1078                 'responsive_web_text_conversations_enabled': False,
1079                 'responsive_web_uc_gql_enabled': True,
1080                 'standardized_nudges_misinfo': True,
1081                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1082                 'tweetypie_unmention_optimization_enabled': True,
1083                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1084                 'verified_phone_label_enabled': False,
1085                 'vibe_api_enabled': True,
1086             },
1087         }
1088
1089     def _real_extract(self, url):
1090         twid, selected_index = self._match_valid_url(url).group('id', 'index')
1091         if not self.is_logged_in:
1092             try:
1093                 status = self._download_json(
1094                     'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1095                     headers={'User-Agent': 'Googlebot'}, query={'id': twid})
1096                 self.to_screen(f'Some metadata is missing without authentication. {self._login_hint()}')
1097             except ExtractorError as e:
1098                 if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
1099                     self.raise_login_required('Requested tweet may only be available when logged in')
1100                 raise
1101         else:
1102             status = self._graphql_to_legacy(
1103                 self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
1104
1105         title = description = traverse_obj(
1106             status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1107         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
1108         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1109         user = status.get('user') or {}
1110         uploader = user.get('name')
1111         if uploader:
1112             title = f'{uploader} - {title}'
1113         uploader_id = user.get('screen_name')
1114
1115         info = {
1116             'id': twid,
1117             'title': title,
1118             'description': description,
1119             'uploader': uploader,
1120             'timestamp': unified_timestamp(status.get('created_at')),
1121             'uploader_id': uploader_id,
1122             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1123             'like_count': int_or_none(status.get('favorite_count')),
1124             'repost_count': int_or_none(status.get('retweet_count')),
1125             'comment_count': int_or_none(status.get('reply_count')),
1126             'age_limit': 18 if status.get('possibly_sensitive') else 0,
1127             'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1128         }
1129
1130         def extract_from_video_info(media):
1131             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1132             if not media_id:
1133                 # workaround for non-authenticated responses
1134                 media_id = traverse_obj(media, (
1135                     'video_info', 'variants', ..., 'url',
1136                     {lambda x: re.search(r'_video/(\d+)/', x)[1]}), get_all=False)
1137             self.write_debug(f'Extracting from video info: {media_id}')
1138
1139             formats = []
1140             subtitles = {}
1141             for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1142                 fmts, subs = self._extract_variant_formats(variant, twid)
1143                 subtitles = self._merge_subtitles(subtitles, subs)
1144                 formats.extend(fmts)
1145
1146             thumbnails = []
1147             media_url = media.get('media_url_https') or media.get('media_url')
1148             if media_url:
1149                 def add_thumbnail(name, size):
1150                     thumbnails.append({
1151                         'id': name,
1152                         'url': update_url_query(media_url, {'name': name}),
1153                         'width': int_or_none(size.get('w') or size.get('width')),
1154                         'height': int_or_none(size.get('h') or size.get('height')),
1155                     })
1156                 for name, size in media.get('sizes', {}).items():
1157                     add_thumbnail(name, size)
1158                 add_thumbnail('orig', media.get('original_info') or {})
1159
1160             return {
1161                 'id': media_id or twid,
1162                 'formats': formats,
1163                 'subtitles': subtitles,
1164                 'thumbnails': thumbnails,
1165                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1166                 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1167                 # The codec of http formats are unknown
1168                 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1169             }
1170
1171         def extract_from_card_info(card):
1172             if not card:
1173                 return
1174
1175             self.write_debug(f'Extracting from card info: {card.get("url")}')
1176             binding_values = card['binding_values']
1177
1178             def get_binding_value(k):
1179                 o = binding_values.get(k) or {}
1180                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1181
1182             card_name = card['name'].split(':')[-1]
1183             if card_name == 'player':
1184                 yield {
1185                     '_type': 'url',
1186                     'url': get_binding_value('player_url'),
1187                 }
1188             elif card_name == 'periscope_broadcast':
1189                 yield {
1190                     '_type': 'url',
1191                     'url': get_binding_value('url') or get_binding_value('player_url'),
1192                     'ie_key': PeriscopeIE.ie_key(),
1193                 }
1194             elif card_name == 'broadcast':
1195                 yield {
1196                     '_type': 'url',
1197                     'url': get_binding_value('broadcast_url'),
1198                     'ie_key': TwitterBroadcastIE.ie_key(),
1199                 }
1200             elif card_name == 'audiospace':
1201                 yield {
1202                     '_type': 'url',
1203                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1204                     'ie_key': TwitterSpacesIE.ie_key(),
1205                 }
1206             elif card_name == 'summary':
1207                 yield {
1208                     '_type': 'url',
1209                     'url': get_binding_value('card_url'),
1210                 }
1211             elif card_name == 'unified_card':
1212                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1213                 yield from map(extract_from_video_info, traverse_obj(
1214                     unified_card, ('media_entities', ...), expected_type=dict))
1215             # amplify, promo_video_website, promo_video_convo, appplayer,
1216             # video_direct_message, poll2choice_video, poll3choice_video,
1217             # poll4choice_video, ...
1218             else:
1219                 is_amplify = card_name == 'amplify'
1220                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1221                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1222                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1223
1224                 thumbnails = []
1225                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1226                     image = get_binding_value('player_image' + suffix) or {}
1227                     image_url = image.get('url')
1228                     if not image_url or '/player-placeholder' in image_url:
1229                         continue
1230                     thumbnails.append({
1231                         'id': suffix[1:] if suffix else 'medium',
1232                         'url': image_url,
1233                         'width': int_or_none(image.get('width')),
1234                         'height': int_or_none(image.get('height')),
1235                     })
1236
1237                 yield {
1238                     'formats': formats,
1239                     'subtitles': subtitles,
1240                     'thumbnails': thumbnails,
1241                     'duration': int_or_none(get_binding_value(
1242                         'content_duration_seconds')),
1243                 }
1244
1245         videos = traverse_obj(status, (
1246             ('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
1247             lambda _, m: m['type'] != 'photo', {dict}))
1248
1249         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1250             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1251         else:
1252             desired_obj = traverse_obj(status, (
1253                 ('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
1254                 int(selected_index) - 1, {dict}), get_all=False)
1255             if not desired_obj:
1256                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1257             elif desired_obj.get('type') != 'video':
1258                 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1259
1260             # Restore original archive id and video index in title
1261             for index, entry in enumerate(videos, 1):
1262                 if entry.get('id') != desired_obj.get('id'):
1263                     continue
1264                 if index == 1:
1265                     info['_old_archive_ids'] = [make_archive_id(self, twid)]
1266                 if len(videos) != 1:
1267                     info['title'] += f' #{index}'
1268                 break
1269
1270             return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1271
1272         entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1273         if not entries:
1274             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1275             if not expanded_url or expanded_url == url:
1276                 self.raise_no_formats('No video could be found in this tweet', expected=True)
1277                 return info
1278
1279             return self.url_result(expanded_url, display_id=twid, **info)
1280
1281         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1282
1283         if len(entries) == 1:
1284             return entries[0]
1285
1286         for index, entry in enumerate(entries, 1):
1287             entry['title'] += f' #{index}'
1288
1289         return self.playlist_result(entries, **info)
1290
1291
1292 class TwitterAmplifyIE(TwitterBaseIE):
1293     IE_NAME = 'twitter:amplify'
1294     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1295
1296     _TEST = {
1297         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1298         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1299         'info_dict': {
1300             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1301             'ext': 'mp4',
1302             'title': 'Twitter Video',
1303             'thumbnail': 're:^https?://.*',
1304         },
1305         'params': {'format': '[protocol=https]'},
1306     }
1307
1308     def _real_extract(self, url):
1309         video_id = self._match_id(url)
1310         webpage = self._download_webpage(url, video_id)
1311
1312         vmap_url = self._html_search_meta(
1313             'twitter:amplify:vmap', webpage, 'vmap url')
1314         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1315
1316         thumbnails = []
1317         thumbnail = self._html_search_meta(
1318             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1319
1320         def _find_dimension(target):
1321             w = int_or_none(self._html_search_meta(
1322                 'twitter:%s:width' % target, webpage, fatal=False))
1323             h = int_or_none(self._html_search_meta(
1324                 'twitter:%s:height' % target, webpage, fatal=False))
1325             return w, h
1326
1327         if thumbnail:
1328             thumbnail_w, thumbnail_h = _find_dimension('image')
1329             thumbnails.append({
1330                 'url': thumbnail,
1331                 'width': thumbnail_w,
1332                 'height': thumbnail_h,
1333             })
1334
1335         video_w, video_h = _find_dimension('player')
1336         formats[0].update({
1337             'width': video_w,
1338             'height': video_h,
1339         })
1340
1341         return {
1342             'id': video_id,
1343             'title': 'Twitter Video',
1344             'formats': formats,
1345             'thumbnails': thumbnails,
1346         }
1347
1348
1349 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1350     IE_NAME = 'twitter:broadcast'
1351     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1352
1353     _TEST = {
1354         # untitled Periscope video
1355         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1356         'info_dict': {
1357             'id': '1yNGaQLWpejGj',
1358             'ext': 'mp4',
1359             'title': 'Andrea May Sahouri - Periscope Broadcast',
1360             'uploader': 'Andrea May Sahouri',
1361             'uploader_id': '1PXEdBZWpGwKe',
1362             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1363             'view_count': int,
1364         },
1365     }
1366
1367     def _real_extract(self, url):
1368         broadcast_id = self._match_id(url)
1369         broadcast = self._call_api(
1370             'broadcasts/show.json', broadcast_id,
1371             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1372         info = self._parse_broadcast_data(broadcast, broadcast_id)
1373         media_key = broadcast['media_key']
1374         source = self._call_api(
1375             f'live_video_stream/status/{media_key}', media_key)['source']
1376         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1377         if '/live_video_stream/geoblocked/' in m3u8_url:
1378             self.raise_geo_restricted()
1379         m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1380             m3u8_url).query).get('type', [None])[0]
1381         state, width, height = self._extract_common_format_info(broadcast)
1382         info['formats'] = self._extract_pscp_m3u8_formats(
1383             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1384         return info
1385
1386
1387 class TwitterSpacesIE(TwitterBaseIE):
1388     IE_NAME = 'twitter:spaces'
1389     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1390
1391     _TESTS = [{
1392         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1393         'info_dict': {
1394             'id': '1RDxlgyvNXzJL',
1395             'ext': 'm4a',
1396             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1397             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1398             'uploader': r're:Lucio Di Gaetano.*?',
1399             'uploader_id': 'luciodigaetano',
1400             'live_status': 'was_live',
1401             'timestamp': 1659877956,
1402             'upload_date': '20220807',
1403             'release_timestamp': 1659904215,
1404             'release_date': '20220807',
1405         },
1406         'params': {'skip_download': 'm3u8'},
1407     }]
1408
1409     SPACE_STATUS = {
1410         'notstarted': 'is_upcoming',
1411         'ended': 'was_live',
1412         'running': 'is_live',
1413         'timedout': 'post_live',
1414     }
1415
1416     def _build_graphql_query(self, space_id):
1417         return {
1418             'variables': {
1419                 'id': space_id,
1420                 'isMetatagsQuery': True,
1421                 'withDownvotePerspective': False,
1422                 'withReactionsMetadata': False,
1423                 'withReactionsPerspective': False,
1424                 'withReplays': True,
1425                 'withSuperFollowsUserFields': True,
1426                 'withSuperFollowsTweetFields': True,
1427             },
1428             'features': {
1429                 'dont_mention_me_view_api_enabled': True,
1430                 'interactive_text_enabled': True,
1431                 'responsive_web_edit_tweet_api_enabled': True,
1432                 'responsive_web_enhance_cards_enabled': True,
1433                 'responsive_web_uc_gql_enabled': True,
1434                 'spaces_2022_h2_clipping': True,
1435                 'spaces_2022_h2_spaces_communities': False,
1436                 'standardized_nudges_misinfo': True,
1437                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1438                 'vibe_api_enabled': True,
1439             },
1440         }
1441
1442     def _real_extract(self, url):
1443         space_id = self._match_id(url)
1444         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1445         if not space_data:
1446             raise ExtractorError('Twitter Space not found', expected=True)
1447
1448         metadata = space_data['metadata']
1449         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1450         is_live = live_status == 'is_live'
1451
1452         formats = []
1453         if live_status == 'is_upcoming':
1454             self.raise_no_formats('Twitter Space not started yet', expected=True)
1455         elif not is_live and not metadata.get('is_space_available_for_replay'):
1456             self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1457         elif metadata.get('media_key'):
1458             source = traverse_obj(
1459                 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1460                 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1461             formats = self._extract_m3u8_formats(
1462                 source, metadata['media_key'], 'm4a', live=is_live, fatal=False,
1463                 headers={'Referer': 'https://twitter.com/'}) if source else []
1464             for fmt in formats:
1465                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1466                 if not is_live:
1467                     fmt['container'] = 'm4a_dash'
1468
1469         participants = ', '.join(traverse_obj(
1470             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1471
1472         if not formats and live_status == 'post_live':
1473             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1474
1475         return {
1476             'id': space_id,
1477             'title': metadata.get('title'),
1478             'description': f'Twitter Space participated by {participants}',
1479             'uploader': traverse_obj(
1480                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1481             'uploader_id': traverse_obj(
1482                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1483             'live_status': live_status,
1484             'release_timestamp': try_call(
1485                 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1486             'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1487             'formats': formats,
1488         }
1489
1490
1491 class TwitterShortenerIE(TwitterBaseIE):
1492     IE_NAME = 'twitter:shortener'
1493     _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1494     _BASE_URL = 'https://t.co/'
1495
1496     def _real_extract(self, url):
1497         mobj = self._match_valid_url(url)
1498         eid, id = mobj.group('eid', 'id')
1499         if eid:
1500             id = eid
1501             url = self._BASE_URL + id
1502         new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1503         __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1504         if new_url.startswith(__UNSAFE_LINK):
1505             new_url = new_url.replace(__UNSAFE_LINK, "")
1506         return self.url_result(new_url)