yt_dlp/extractor/twitter.py

   1 import json
   2 import random
   3 import re
   4
   5 from .common import InfoExtractor
   6 from .periscope import PeriscopeBaseIE, PeriscopeIE
   7 from ..compat import functools  # isort: split
   8 from ..compat import (
   9     compat_parse_qs,
  10     compat_urllib_parse_unquote,
  11     compat_urllib_parse_urlparse,
  12 )
  13 from ..networking.exceptions import HTTPError
  14 from ..utils import (
  15     ExtractorError,
  16     dict_get,
  17     filter_dict,
  18     float_or_none,
  19     format_field,
  20     int_or_none,
  21     make_archive_id,
  22     remove_end,
  23     str_or_none,
  24     strip_or_none,
  25     traverse_obj,
  26     try_call,
  27     try_get,
  28     unified_timestamp,
  29     update_url_query,
  30     url_or_none,
  31     xpath_text,
  32 )
  33
  34
  35 class TwitterBaseIE(InfoExtractor):
  36     _NETRC_MACHINE = 'twitter'
  37     _API_BASE = 'https://api.twitter.com/1.1/'
  38     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
  39     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
  40     _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
  41     _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
  42     _flow_token = None
  43
  44     _LOGIN_INIT_DATA = json.dumps({
  45         'input_flow_data': {
  46             'flow_context': {
  47                 'debug_overrides': {},
  48                 'start_location': {
  49                     'location': 'unknown'
  50                 }
  51             }
  52         },
  53         'subtask_versions': {
  54             'action_list': 2,
  55             'alert_dialog': 1,
  56             'app_download_cta': 1,
  57             'check_logged_in_account': 1,
  58             'choice_selection': 3,
  59             'contacts_live_sync_permission_prompt': 0,
  60             'cta': 7,
  61             'email_verification': 2,
  62             'end_flow': 1,
  63             'enter_date': 1,
  64             'enter_email': 2,
  65             'enter_password': 5,
  66             'enter_phone': 2,
  67             'enter_recaptcha': 1,
  68             'enter_text': 5,
  69             'enter_username': 2,
  70             'generic_urt': 3,
  71             'in_app_notification': 1,
  72             'interest_picker': 3,
  73             'js_instrumentation': 1,
  74             'menu_dialog': 1,
  75             'notifications_permission_prompt': 2,
  76             'open_account': 2,
  77             'open_home_timeline': 1,
  78             'open_link': 1,
  79             'phone_verification': 4,
  80             'privacy_options': 1,
  81             'security_key': 3,
  82             'select_avatar': 4,
  83             'select_banner': 2,
  84             'settings_list': 7,
  85             'show_code': 1,
  86             'sign_up': 2,
  87             'sign_up_review': 4,
  88             'tweet_selection_urt': 1,
  89             'update_users': 1,
  90             'upload_media': 1,
  91             'user_recommendations_list': 4,
  92             'user_recommendations_urt': 1,
  93             'wait_spinner': 3,
  94             'web_modal': 1
  95         }
  96     }, separators=(',', ':')).encode()
  97
  98     def _extract_variant_formats(self, variant, video_id):
  99         variant_url = variant.get('url')
 100         if not variant_url:
 101             return [], {}
 102         elif '.m3u8' in variant_url:
 103             return self._extract_m3u8_formats_and_subtitles(
 104                 variant_url, video_id, 'mp4', 'm3u8_native',
 105                 m3u8_id='hls', fatal=False)
 106         else:
 107             tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
 108             f = {
 109                 'url': variant_url,
 110                 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
 111                 'tbr': tbr,
 112             }
 113             self._search_dimensions_in_video_url(f, variant_url)
 114             return [f], {}
 115
 116     def _extract_formats_from_vmap_url(self, vmap_url, video_id):
 117         vmap_url = url_or_none(vmap_url)
 118         if not vmap_url:
 119             return [], {}
 120         vmap_data = self._download_xml(vmap_url, video_id)
 121         formats = []
 122         subtitles = {}
 123         urls = []
 124         for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
 125             video_variant.attrib['url'] = compat_urllib_parse_unquote(
 126                 video_variant.attrib['url'])
 127             urls.append(video_variant.attrib['url'])
 128             fmts, subs = self._extract_variant_formats(
 129                 video_variant.attrib, video_id)
 130             formats.extend(fmts)
 131             subtitles = self._merge_subtitles(subtitles, subs)
 132         video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
 133         if video_url not in urls:
 134             fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
 135             formats.extend(fmts)
 136             subtitles = self._merge_subtitles(subtitles, subs)
 137         return formats, subtitles
 138
 139     @staticmethod
 140     def _search_dimensions_in_video_url(a_format, video_url):
 141         m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
 142         if m:
 143             a_format.update({
 144                 'width': int(m.group('width')),
 145                 'height': int(m.group('height')),
 146             })
 147
 148     @property
 149     def is_logged_in(self):
 150         return bool(self._get_cookies(self._API_BASE).get('auth_token'))
 151
 152     @functools.cached_property
 153     def _selected_api(self):
 154         return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
 155
 156     def _fetch_guest_token(self, display_id):
 157         guest_token = traverse_obj(self._download_json(
 158             f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
 159             headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
 160             ('guest_token', {str}))
 161         if not guest_token:
 162             raise ExtractorError('Could not retrieve guest token')
 163         return guest_token
 164
 165     def _set_base_headers(self, legacy=False):
 166         bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
 167         return filter_dict({
 168             'Authorization': f'Bearer {bearer_token}',
 169             'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
 170         })
 171
 172     def _call_login_api(self, note, headers, query={}, data=None):
 173         response = self._download_json(
 174             f'{self._API_BASE}onboarding/task.json', None, note,
 175             headers=headers, query=query, data=data, expected_status=400)
 176         error = traverse_obj(response, ('errors', 0, 'message', {str}))
 177         if error:
 178             raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
 179         elif traverse_obj(response, 'status') != 'success':
 180             raise ExtractorError('Login was unsuccessful')
 181
 182         subtask = traverse_obj(
 183             response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
 184         if not subtask:
 185             raise ExtractorError('Twitter API did not return next login subtask')
 186
 187         self._flow_token = response['flow_token']
 188
 189         return subtask
 190
 191     def _perform_login(self, username, password):
 192         if self.is_logged_in:
 193             return
 194
 195         webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
 196         guest_token = self._search_regex(
 197             r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
 198         headers = {
 199             **self._set_base_headers(),
 200             'content-type': 'application/json',
 201             'x-guest-token': guest_token,
 202             'x-twitter-client-language': 'en',
 203             'x-twitter-active-user': 'yes',
 204             'Referer': 'https://twitter.com/',
 205             'Origin': 'https://twitter.com',
 206         }
 207
 208         def build_login_json(*subtask_inputs):
 209             return json.dumps({
 210                 'flow_token': self._flow_token,
 211                 'subtask_inputs': subtask_inputs
 212             }, separators=(',', ':')).encode()
 213
 214         def input_dict(subtask_id, text):
 215             return {
 216                 'subtask_id': subtask_id,
 217                 'enter_text': {
 218                     'text': text,
 219                     'link': 'next_link'
 220                 }
 221             }
 222
 223         next_subtask = self._call_login_api(
 224             'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
 225
 226         while not self.is_logged_in:
 227             if next_subtask == 'LoginJsInstrumentationSubtask':
 228                 next_subtask = self._call_login_api(
 229                     'Submitting JS instrumentation response', headers, data=build_login_json({
 230                         'subtask_id': next_subtask,
 231                         'js_instrumentation': {
 232                             'response': '{}',
 233                             'link': 'next_link'
 234                         }
 235                     }))
 236
 237             elif next_subtask == 'LoginEnterUserIdentifierSSO':
 238                 next_subtask = self._call_login_api(
 239                     'Submitting username', headers, data=build_login_json({
 240                         'subtask_id': next_subtask,
 241                         'settings_list': {
 242                             'setting_responses': [{
 243                                 'key': 'user_identifier',
 244                                 'response_data': {
 245                                     'text_data': {
 246                                         'result': username
 247                                     }
 248                                 }
 249                             }],
 250                             'link': 'next_link'
 251                         }
 252                     }))
 253
 254             elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
 255                 next_subtask = self._call_login_api(
 256                     'Submitting alternate identifier', headers,
 257                     data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
 258                         'one of username, phone number or email that was not used as --username'))))
 259
 260             elif next_subtask == 'LoginEnterPassword':
 261                 next_subtask = self._call_login_api(
 262                     'Submitting password', headers, data=build_login_json({
 263                         'subtask_id': next_subtask,
 264                         'enter_password': {
 265                             'password': password,
 266                             'link': 'next_link'
 267                         }
 268                     }))
 269
 270             elif next_subtask == 'AccountDuplicationCheck':
 271                 next_subtask = self._call_login_api(
 272                     'Submitting account duplication check', headers, data=build_login_json({
 273                         'subtask_id': next_subtask,
 274                         'check_logged_in_account': {
 275                             'link': 'AccountDuplicationCheck_false'
 276                         }
 277                     }))
 278
 279             elif next_subtask == 'LoginTwoFactorAuthChallenge':
 280                 next_subtask = self._call_login_api(
 281                     'Submitting 2FA token', headers, data=build_login_json(input_dict(
 282                         next_subtask, self._get_tfa_info('two-factor authentication token'))))
 283
 284             elif next_subtask == 'LoginAcid':
 285                 next_subtask = self._call_login_api(
 286                     'Submitting confirmation code', headers, data=build_login_json(input_dict(
 287                         next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
 288
 289             elif next_subtask == 'ArkoseLogin':
 290                 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
 291
 292             elif next_subtask == 'DenyLoginSubtask':
 293                 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
 294
 295             elif next_subtask == 'LoginSuccessSubtask':
 296                 raise ExtractorError('Twitter API did not grant auth token cookie')
 297
 298             else:
 299                 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
 300
 301         self.report_login()
 302
 303     def _call_api(self, path, video_id, query={}, graphql=False):
 304         headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
 305         headers.update({
 306             'x-twitter-auth-type': 'OAuth2Session',
 307             'x-twitter-client-language': 'en',
 308             'x-twitter-active-user': 'yes',
 309         } if self.is_logged_in else {
 310             'x-guest-token': self._fetch_guest_token(video_id)
 311         })
 312         allowed_status = {400, 401, 403, 404} if graphql else {403}
 313         result = self._download_json(
 314             (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
 315             video_id, headers=headers, query=query, expected_status=allowed_status,
 316             note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
 317
 318         if result.get('errors'):
 319             errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
 320             if errors and 'not authorized' in errors:
 321                 self.raise_login_required(remove_end(errors, '.'))
 322             raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
 323
 324         return result
 325
 326     def _build_graphql_query(self, media_id):
 327         raise NotImplementedError('Method must be implemented to support GraphQL')
 328
 329     def _call_graphql_api(self, endpoint, media_id):
 330         data = self._build_graphql_query(media_id)
 331         query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
 332         return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
 333
 334
 335 class TwitterCardIE(InfoExtractor):
 336     IE_NAME = 'twitter:card'
 337     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
 338     _TESTS = [
 339         {
 340             'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
 341             # MD5 checksums are different in different places
 342             'info_dict': {
 343                 'id': '560070131976392705',
 344                 'ext': 'mp4',
 345                 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
 346                 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
 347                 'uploader': 'Twitter',
 348                 'uploader_id': 'Twitter',
 349                 'thumbnail': r're:^https?://.*\.jpg',
 350                 'duration': 30.033,
 351                 'timestamp': 1422366112,
 352                 'upload_date': '20150127',
 353                 'age_limit': 0,
 354                 'comment_count': int,
 355                 'tags': [],
 356                 'repost_count': int,
 357                 'like_count': int,
 358                 'display_id': '560070183650213889',
 359                 'uploader_url': 'https://twitter.com/Twitter',
 360             },
 361         },
 362         {
 363             'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
 364             'md5': '7137eca597f72b9abbe61e5ae0161399',
 365             'info_dict': {
 366                 'id': '623160978427936768',
 367                 'ext': 'mp4',
 368                 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
 369                 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
 370                 'uploader': 'NASA',
 371                 'uploader_id': 'NASA',
 372                 'timestamp': 1437408129,
 373                 'upload_date': '20150720',
 374                 'uploader_url': 'https://twitter.com/NASA',
 375                 'age_limit': 0,
 376                 'comment_count': int,
 377                 'like_count': int,
 378                 'repost_count': int,
 379                 'tags': ['PlutoFlyby'],
 380             },
 381             'params': {'format': '[protocol=https]'}
 382         },
 383         {
 384             'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
 385             'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
 386             'info_dict': {
 387                 'id': 'dq4Oj5quskI',
 388                 'ext': 'mp4',
 389                 'title': 'Ubuntu 11.10 Overview',
 390                 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
 391                 'upload_date': '20111013',
 392                 'uploader': 'OMG! UBUNTU!',
 393                 'uploader_id': 'omgubuntu',
 394                 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
 395                 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
 396                 'channel_follower_count': int,
 397                 'chapters': 'count:8',
 398                 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
 399                 'duration': 138,
 400                 'categories': ['Film & Animation'],
 401                 'age_limit': 0,
 402                 'comment_count': int,
 403                 'availability': 'public',
 404                 'like_count': int,
 405                 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
 406                 'view_count': int,
 407                 'tags': 'count:12',
 408                 'channel': 'OMG! UBUNTU!',
 409                 'playable_in_embed': True,
 410             },
 411             'add_ie': ['Youtube'],
 412         },
 413         {
 414             'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
 415             'info_dict': {
 416                 'id': 'iBb2x00UVlv',
 417                 'ext': 'mp4',
 418                 'upload_date': '20151113',
 419                 'uploader_id': '1189339351084113920',
 420                 'uploader': 'ArsenalTerje',
 421                 'title': 'Vine by ArsenalTerje',
 422                 'timestamp': 1447451307,
 423                 'alt_title': 'Vine by ArsenalTerje',
 424                 'comment_count': int,
 425                 'like_count': int,
 426                 'thumbnail': r're:^https?://[^?#]+\.jpg',
 427                 'view_count': int,
 428                 'repost_count': int,
 429             },
 430             'add_ie': ['Vine'],
 431             'params': {'skip_download': 'm3u8'},
 432         },
 433         {
 434             'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
 435             'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
 436             'info_dict': {
 437                 'id': '705235433198714880',
 438                 'ext': 'mp4',
 439                 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 440                 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 441                 'uploader': 'Brent Yarina',
 442                 'uploader_id': 'BTNBrentYarina',
 443                 'timestamp': 1456976204,
 444                 'upload_date': '20160303',
 445             },
 446             'skip': 'This content is no longer available.',
 447         },
 448         {
 449             'url': 'https://twitter.com/i/videos/752274308186120192',
 450             'only_matching': True,
 451         },
 452     ]
 453
 454     def _real_extract(self, url):
 455         status_id = self._match_id(url)
 456         return self.url_result(
 457             'https://twitter.com/statuses/' + status_id,
 458             TwitterIE.ie_key(), status_id)
 459
 460
 461 class TwitterIE(TwitterBaseIE):
 462     IE_NAME = 'twitter'
 463     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 464
 465     _TESTS = [{
 466         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
 467         'info_dict': {
 468             'id': '643211870443208704',
 469             'display_id': '643211948184596480',
 470             'ext': 'mp4',
 471             'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
 472             'thumbnail': r're:^https?://.*\.jpg',
 473             'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
 474             'uploader': 'FREE THE NIPPLE',
 475             'uploader_id': 'freethenipple',
 476             'duration': 12.922,
 477             'timestamp': 1442188653,
 478             'upload_date': '20150913',
 479             'uploader_url': 'https://twitter.com/freethenipple',
 480             'comment_count': int,
 481             'repost_count': int,
 482             'like_count': int,
 483             'tags': [],
 484             'age_limit': 18,
 485             '_old_archive_ids': ['twitter 643211948184596480'],
 486         },
 487     }, {
 488         'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
 489         'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
 490         'info_dict': {
 491             'id': '657991469417025536',
 492             'ext': 'mp4',
 493             'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
 494             'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
 495             'thumbnail': r're:^https?://.*\.png',
 496             'uploader': 'Gifs',
 497             'uploader_id': 'giphz',
 498         },
 499         'expected_warnings': ['height', 'width'],
 500         'skip': 'Account suspended',
 501     }, {
 502         'url': 'https://twitter.com/starwars/status/665052190608723968',
 503         'info_dict': {
 504             'id': '665052190608723968',
 505             'display_id': '665052190608723968',
 506             'ext': 'mp4',
 507             'title': r're:Star Wars.*A new beginning is coming December 18.*',
 508             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
 509             'uploader_id': 'starwars',
 510             'uploader': r're:Star Wars.*',
 511             'timestamp': 1447395772,
 512             'upload_date': '20151113',
 513             'uploader_url': 'https://twitter.com/starwars',
 514             'comment_count': int,
 515             'repost_count': int,
 516             'like_count': int,
 517             'tags': ['TV', 'StarWars', 'TheForceAwakens'],
 518             'age_limit': 0,
 519             '_old_archive_ids': ['twitter 665052190608723968'],
 520         },
 521     }, {
 522         'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
 523         'info_dict': {
 524             'id': '705235433198714880',
 525             'ext': 'mp4',
 526             'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
 527             'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
 528             'uploader_id': 'BTNBrentYarina',
 529             'uploader': 'Brent Yarina',
 530             'timestamp': 1456976204,
 531             'upload_date': '20160303',
 532             'uploader_url': 'https://twitter.com/BTNBrentYarina',
 533             'comment_count': int,
 534             'repost_count': int,
 535             'like_count': int,
 536             'tags': [],
 537             'age_limit': 0,
 538         },
 539         'params': {
 540             # The same video as https://twitter.com/i/videos/tweet/705235433198714880
 541             # Test case of TwitterCardIE
 542             'skip_download': True,
 543         },
 544         'skip': 'Dead external link',
 545     }, {
 546         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
 547         'info_dict': {
 548             'id': '700207414000242688',
 549             'display_id': '700207533655363584',
 550             'ext': 'mp4',
 551             'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
 552             'description': 'BEAT PROD: @suhmeduh  https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
 553             'thumbnail': r're:^https?://.*\.jpg',
 554             'uploader': 'jaydin donte geer',
 555             'uploader_id': 'jaydingeer',
 556             'duration': 30.0,
 557             'timestamp': 1455777459,
 558             'upload_date': '20160218',
 559             'uploader_url': 'https://twitter.com/jaydingeer',
 560             'comment_count': int,
 561             'repost_count': int,
 562             'like_count': int,
 563             'tags': ['Damndaniel'],
 564             'age_limit': 0,
 565             '_old_archive_ids': ['twitter 700207533655363584'],
 566         },
 567     }, {
 568         'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
 569         'md5': '89a15ed345d13b86e9a5a5e051fa308a',
 570         'info_dict': {
 571             'id': 'MIOxnrUteUd',
 572             'ext': 'mp4',
 573             'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
 574             'uploader': 'TAKUMA',
 575             'uploader_id': '1004126642786242560',
 576             'timestamp': 1402826626,
 577             'upload_date': '20140615',
 578             'thumbnail': r're:^https?://.*\.jpg',
 579             'alt_title': 'Vine by TAKUMA',
 580             'comment_count': int,
 581             'repost_count': int,
 582             'like_count': int,
 583             'view_count': int,
 584         },
 585         'add_ie': ['Vine'],
 586     }, {
 587         'url': 'https://twitter.com/captainamerica/status/719944021058060289',
 588         'info_dict': {
 589             'id': '717462543795523584',
 590             'display_id': '719944021058060289',
 591             'ext': 'mp4',
 592             'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
 593             'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
 594             'uploader_id': 'CaptainAmerica',
 595             'uploader': 'Captain America',
 596             'duration': 3.17,
 597             'timestamp': 1460483005,
 598             'upload_date': '20160412',
 599             'uploader_url': 'https://twitter.com/CaptainAmerica',
 600             'thumbnail': r're:^https?://.*\.jpg',
 601             'comment_count': int,
 602             'repost_count': int,
 603             'like_count': int,
 604             'tags': [],
 605             'age_limit': 0,
 606             '_old_archive_ids': ['twitter 719944021058060289'],
 607         },
 608     }, {
 609         'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
 610         'info_dict': {
 611             'id': '1zqKVVlkqLaKB',
 612             'ext': 'mp4',
 613             'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
 614             'upload_date': '20160923',
 615             'uploader_id': '1PmKqpJdOJQoY',
 616             'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
 617             'timestamp': 1474613214,
 618             'thumbnail': r're:^https?://.*\.jpg',
 619         },
 620         'add_ie': ['Periscope'],
 621         'skip': 'Broadcast not found',
 622     }, {
 623         # has mp4 formats via mobile API
 624         'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
 625         'info_dict': {
 626             'id': '852077943283097602',
 627             'ext': 'mp4',
 628             'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
 629             'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة   https://t.co/xg6OhpyKfN',
 630             'uploader': 'عالم الأخبار',
 631             'uploader_id': 'news_al3alm',
 632             'duration': 277.4,
 633             'timestamp': 1492000653,
 634             'upload_date': '20170412',
 635             'display_id': '852138619213144067',
 636             'age_limit': 0,
 637             'uploader_url': 'https://twitter.com/news_al3alm',
 638             'thumbnail': r're:^https?://.*\.jpg',
 639             'tags': [],
 640             'repost_count': int,
 641             'like_count': int,
 642             'comment_count': int,
 643             '_old_archive_ids': ['twitter 852138619213144067'],
 644         },
 645     }, {
 646         'url': 'https://twitter.com/i/web/status/910031516746514432',
 647         'info_dict': {
 648             'id': '910030238373089285',
 649             'display_id': '910031516746514432',
 650             'ext': 'mp4',
 651             'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
 652             'thumbnail': r're:^https?://.*\.jpg',
 653             'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
 654             'uploader': 'Préfet de Guadeloupe',
 655             'uploader_id': 'Prefet971',
 656             'duration': 47.48,
 657             'timestamp': 1505803395,
 658             'upload_date': '20170919',
 659             'uploader_url': 'https://twitter.com/Prefet971',
 660             'comment_count': int,
 661             'repost_count': int,
 662             'like_count': int,
 663             'tags': ['Maria'],
 664             'age_limit': 0,
 665             '_old_archive_ids': ['twitter 910031516746514432'],
 666         },
 667         'params': {
 668             'skip_download': True,  # requires ffmpeg
 669         },
 670     }, {
 671         # card via api.twitter.com/1.1/videos/tweet/config
 672         'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
 673         'info_dict': {
 674             'id': '1001551417340022785',
 675             'display_id': '1001551623938805763',
 676             'ext': 'mp4',
 677             'title': 're:.*?Shep is on a roll today.*?',
 678             'thumbnail': r're:^https?://.*\.jpg',
 679             'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
 680             'uploader': 'Lis Power',
 681             'uploader_id': 'LisPower1',
 682             'duration': 111.278,
 683             'timestamp': 1527623489,
 684             'upload_date': '20180529',
 685             'uploader_url': 'https://twitter.com/LisPower1',
 686             'comment_count': int,
 687             'repost_count': int,
 688             'like_count': int,
 689             'tags': [],
 690             'age_limit': 0,
 691             '_old_archive_ids': ['twitter 1001551623938805763'],
 692         },
 693         'params': {
 694             'skip_download': True,  # requires ffmpeg
 695         },
 696     }, {
 697         'url': 'https://twitter.com/foobar/status/1087791357756956680',
 698         'info_dict': {
 699             'id': '1087791272830607360',
 700             'display_id': '1087791357756956680',
 701             'ext': 'mp4',
 702             'title': 'X - A new is coming.  Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
 703             'thumbnail': r're:^https?://.*\.jpg',
 704             'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
 705             'uploader': 'X',
 706             'uploader_id': 'X',
 707             'duration': 61.567,
 708             'timestamp': 1548184644,
 709             'upload_date': '20190122',
 710             'uploader_url': 'https://twitter.com/X',
 711             'comment_count': int,
 712             'repost_count': int,
 713             'like_count': int,
 714             'view_count': int,
 715             'tags': [],
 716             'age_limit': 0,
 717         },
 718         'skip': 'This Tweet is unavailable',
 719     }, {
 720         # not available in Periscope
 721         'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
 722         'info_dict': {
 723             'id': '1vOGwqejwoWxB',
 724             'ext': 'mp4',
 725             'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
 726             'uploader': 'Vivi',
 727             'uploader_id': '1eVjYOLGkGrQL',
 728             'thumbnail': r're:^https?://.*\.jpg',
 729             'tags': ['EduTECH2019'],
 730             'view_count': int,
 731         },
 732         'add_ie': ['TwitterBroadcast'],
 733         'skip': 'Broadcast no longer exists',
 734     }, {
 735         # unified card
 736         'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
 737         'info_dict': {
 738             'id': '1349774757969989634',
 739             'display_id': '1349794411333394432',
 740             'ext': 'mp4',
 741             'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
 742             'thumbnail': r're:^https?://.*\.jpg',
 743             'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
 744             'uploader': 'Brooklyn Nets',
 745             'uploader_id': 'BrooklynNets',
 746             'duration': 324.484,
 747             'timestamp': 1610651040,
 748             'upload_date': '20210114',
 749             'uploader_url': 'https://twitter.com/BrooklynNets',
 750             'comment_count': int,
 751             'repost_count': int,
 752             'like_count': int,
 753             'tags': [],
 754             'age_limit': 0,
 755             '_old_archive_ids': ['twitter 1349794411333394432'],
 756         },
 757         'params': {
 758             'skip_download': True,
 759         },
 760     }, {
 761         'url': 'https://twitter.com/oshtru/status/1577855540407197696',
 762         'info_dict': {
 763             'id': '1577855447914409984',
 764             'display_id': '1577855540407197696',
 765             'ext': 'mp4',
 766             'title': 'md5:9d198efb93557b8f8d5b78c480407214',
 767             'description': 'md5:b9c3699335447391d11753ab21c70a74',
 768             'upload_date': '20221006',
 769             'uploader': 'oshtru',
 770             'uploader_id': 'oshtru',
 771             'uploader_url': 'https://twitter.com/oshtru',
 772             'thumbnail': r're:^https?://.*\.jpg',
 773             'duration': 30.03,
 774             'timestamp': 1665025050,
 775             'comment_count': int,
 776             'repost_count': int,
 777             'like_count': int,
 778             'tags': [],
 779             'age_limit': 0,
 780             '_old_archive_ids': ['twitter 1577855540407197696'],
 781         },
 782         'params': {'skip_download': True},
 783     }, {
 784         'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
 785         'info_dict': {
 786             'id': '1577719286659006464',
 787             'title': 'Ultima - Test',
 788             'description': 'Test https://t.co/Y3KEZD7Dad',
 789             'uploader': 'Ultima',
 790             'uploader_id': 'UltimaShadowX',
 791             'uploader_url': 'https://twitter.com/UltimaShadowX',
 792             'upload_date': '20221005',
 793             'timestamp': 1664992565,
 794             'comment_count': int,
 795             'repost_count': int,
 796             'like_count': int,
 797             'tags': [],
 798             'age_limit': 0,
 799         },
 800         'playlist_count': 4,
 801         'params': {'skip_download': True},
 802     }, {
 803         'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
 804         'info_dict': {
 805             'id': '1575559336759263233',
 806             'display_id': '1575560063510810624',
 807             'ext': 'mp4',
 808             'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
 809             'thumbnail': r're:^https?://.*\.jpg',
 810             'description': 'md5:95aea692fda36a12081b9629b02daa92',
 811             'uploader': 'Max Olson',
 812             'uploader_id': 'MesoMax919',
 813             'uploader_url': 'https://twitter.com/MesoMax919',
 814             'duration': 21.321,
 815             'timestamp': 1664477766,
 816             'upload_date': '20220929',
 817             'comment_count': int,
 818             'repost_count': int,
 819             'like_count': int,
 820             'tags': ['HurricaneIan'],
 821             'age_limit': 0,
 822             '_old_archive_ids': ['twitter 1575560063510810624'],
 823         },
 824     }, {
 825         # Adult content, fails if not logged in
 826         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
 827         'info_dict': {
 828             'id': '1575199163847000068',
 829             'display_id': '1575199173472927762',
 830             'ext': 'mp4',
 831             'title': str,
 832             'description': str,
 833             'uploader': str,
 834             'uploader_id': 'Rizdraws',
 835             'uploader_url': 'https://twitter.com/Rizdraws',
 836             'upload_date': '20220928',
 837             'timestamp': 1664391723,
 838             'thumbnail': r're:^https?://.+\.jpg',
 839             'like_count': int,
 840             'repost_count': int,
 841             'comment_count': int,
 842             'age_limit': 18,
 843             'tags': []
 844         },
 845         'params': {'skip_download': 'The media could not be played'},
 846         'skip': 'Requires authentication',
 847     }, {
 848         # Playlist result only with graphql API
 849         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
 850         'playlist_mincount': 2,
 851         'info_dict': {
 852             'id': '1395079556562706435',
 853             'title': str,
 854             'tags': [],
 855             'uploader': str,
 856             'like_count': int,
 857             'upload_date': '20210519',
 858             'age_limit': 0,
 859             'repost_count': int,
 860             'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
 861             'uploader_id': 'Srirachachau',
 862             'comment_count': int,
 863             'uploader_url': 'https://twitter.com/Srirachachau',
 864             'timestamp': 1621447860,
 865         },
 866     }, {
 867         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
 868         'playlist_mincount': 2,
 869         'info_dict': {
 870             'id': '1578353380363501568',
 871             'title': str,
 872             'uploader_id': 'DavidToons_',
 873             'repost_count': int,
 874             'like_count': int,
 875             'uploader': str,
 876             'timestamp': 1665143744,
 877             'uploader_url': 'https://twitter.com/DavidToons_',
 878             'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
 879             'tags': [],
 880             'comment_count': int,
 881             'upload_date': '20221007',
 882             'age_limit': 0,
 883         },
 884     }, {
 885         'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
 886         'playlist_count': 2,
 887         'info_dict': {
 888             'id': '1578401165338976258',
 889             'title': str,
 890             'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
 891             'uploader': str,
 892             'uploader_id': 'primevideouk',
 893             'timestamp': 1665155137,
 894             'upload_date': '20221007',
 895             'age_limit': 0,
 896             'uploader_url': 'https://twitter.com/primevideouk',
 897             'comment_count': int,
 898             'repost_count': int,
 899             'like_count': int,
 900             'tags': ['TheRingsOfPower'],
 901         },
 902     }, {
 903         # Twitter Spaces
 904         'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
 905         'info_dict': {
 906             'id': '1lPJqmBeeNAJb',
 907             'ext': 'm4a',
 908             'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
 909             'uploader': r're:Monique Camarra.+?',
 910             'uploader_id': 'MoniqueCamarra',
 911             'live_status': 'was_live',
 912             'release_timestamp': 1658417414,
 913             'description': 'md5:acce559345fd49f129c20dbcda3f1201',
 914             'timestamp': 1658407771,
 915             'release_date': '20220721',
 916             'upload_date': '20220721',
 917         },
 918         'add_ie': ['TwitterSpaces'],
 919         'params': {'skip_download': 'm3u8'},
 920         'skip': 'Requires authentication',
 921     }, {
 922         # URL specifies video number but --yes-playlist
 923         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
 924         'playlist_mincount': 2,
 925         'info_dict': {
 926             'id': '1600649710662213632',
 927             'title': 'md5:be05989b0722e114103ed3851a0ffae2',
 928             'timestamp': 1670459604.0,
 929             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 930             'comment_count': int,
 931             'uploader_id': 'CTVJLaidlaw',
 932             'repost_count': int,
 933             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 934             'upload_date': '20221208',
 935             'age_limit': 0,
 936             'uploader': 'Jocelyn Laidlaw',
 937             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 938             'like_count': int,
 939         },
 940     }, {
 941         # URL specifies video number and --no-playlist
 942         'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
 943         'info_dict': {
 944             'id': '1600649511827013632',
 945             'ext': 'mp4',
 946             'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
 947             'thumbnail': r're:^https?://.+\.jpg',
 948             'timestamp': 1670459604.0,
 949             'uploader_id': 'CTVJLaidlaw',
 950             'uploader': 'Jocelyn Laidlaw',
 951             'repost_count': int,
 952             'comment_count': int,
 953             'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
 954             'duration': 102.226,
 955             'uploader_url': 'https://twitter.com/CTVJLaidlaw',
 956             'display_id': '1600649710662213632',
 957             'like_count': int,
 958             'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
 959             'upload_date': '20221208',
 960             'age_limit': 0,
 961             '_old_archive_ids': ['twitter 1600649710662213632'],
 962         },
 963         'params': {'noplaylist': True},
 964     }, {
 965         # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
 966         # note the id different between extraction and url
 967         'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
 968         'info_dict': {
 969             'id': '1621117577354424321',
 970             'display_id': '1621117700482416640',
 971             'ext': 'mp4',
 972             'title': '뽀 - 아 최우제 이동속도 봐',
 973             'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
 974             'duration': 24.598,
 975             'uploader': '뽀',
 976             'uploader_id': 's2FAKER',
 977             'uploader_url': 'https://twitter.com/s2FAKER',
 978             'upload_date': '20230202',
 979             'timestamp': 1675339553.0,
 980             'thumbnail': r're:https?://pbs\.twimg\.com/.+',
 981             'age_limit': 18,
 982             'tags': [],
 983             'like_count': int,
 984             'repost_count': int,
 985             'comment_count': int,
 986             '_old_archive_ids': ['twitter 1621117700482416640'],
 987         },
 988     }, {
 989         'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
 990         'info_dict': {
 991             'id': '1599108643743473680',
 992             'display_id': '1599108751385972737',
 993             'ext': 'mp4',
 994             'title': '\u06ea - \U0001F48B',
 995             'uploader_url': 'https://twitter.com/hlo_again',
 996             'like_count': int,
 997             'uploader_id': 'hlo_again',
 998             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
 999             'repost_count': int,
1000             'duration': 9.531,
1001             'comment_count': int,
1002             'upload_date': '20221203',
1003             'age_limit': 0,
1004             'timestamp': 1670092210.0,
1005             'tags': [],
1006             'uploader': '\u06ea',
1007             'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1008             '_old_archive_ids': ['twitter 1599108751385972737'],
1009         },
1010         'params': {'noplaylist': True},
1011     }, {
1012         'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1013         'info_dict': {
1014             'id': '1600009362759733248',
1015             'display_id': '1600009574919962625',
1016             'ext': 'mp4',
1017             'uploader_url': 'https://twitter.com/MunTheShinobi',
1018             'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1019             'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1020             'age_limit': 0,
1021             'uploader': 'Mün',
1022             'repost_count': int,
1023             'upload_date': '20221206',
1024             'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1025             'comment_count': int,
1026             'like_count': int,
1027             'tags': [],
1028             'uploader_id': 'MunTheShinobi',
1029             'duration': 139.987,
1030             'timestamp': 1670306984.0,
1031             '_old_archive_ids': ['twitter 1600009574919962625'],
1032         },
1033     }, {
1034         # retweeted_status (private)
1035         'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1036         'info_dict': {
1037             'id': '1623274794488659969',
1038             'display_id': '1623739803874349067',
1039             'ext': 'mp4',
1040             'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
1041             'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1042             'uploader': 'Johnny Bullets',
1043             'uploader_id': 'Johnnybull3ts',
1044             'uploader_url': 'https://twitter.com/Johnnybull3ts',
1045             'age_limit': 0,
1046             'tags': [],
1047             'duration': 8.033,
1048             'timestamp': 1675853859.0,
1049             'upload_date': '20230208',
1050             'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1051             'like_count': int,
1052             'repost_count': int,
1053         },
1054         'skip': 'Protected tweet',
1055     }, {
1056         # retweeted_status
1057         'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1058         'info_dict': {
1059             'id': '1694928337846538240',
1060             'ext': 'mp4',
1061             'display_id': '1695424220702888009',
1062             'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1063             'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1064             'uploader': 'Benny Johnson',
1065             'uploader_id': 'bennyjohnson',
1066             'uploader_url': 'https://twitter.com/bennyjohnson',
1067             'age_limit': 0,
1068             'tags': [],
1069             'duration': 45.001,
1070             'timestamp': 1692962814.0,
1071             'upload_date': '20230825',
1072             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1073             'like_count': int,
1074             'repost_count': int,
1075             'comment_count': int,
1076             '_old_archive_ids': ['twitter 1695424220702888009'],
1077         },
1078     }, {
1079         # retweeted_status w/ legacy API
1080         'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1081         'info_dict': {
1082             'id': '1694928337846538240',
1083             'ext': 'mp4',
1084             'display_id': '1695424220702888009',
1085             'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1086             'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1087             'uploader': 'Benny Johnson',
1088             'uploader_id': 'bennyjohnson',
1089             'uploader_url': 'https://twitter.com/bennyjohnson',
1090             'age_limit': 0,
1091             'tags': [],
1092             'duration': 45.001,
1093             'timestamp': 1692962814.0,
1094             'upload_date': '20230825',
1095             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1096             'like_count': int,
1097             'repost_count': int,
1098             '_old_archive_ids': ['twitter 1695424220702888009'],
1099         },
1100         'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1101     }, {
1102         # Broadcast embedded in tweet
1103         'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1104         'info_dict': {
1105             'id': '1rmxPMjLzAXKN',
1106             'ext': 'mp4',
1107             'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1108             'uploader': 'Jessica Dobson',
1109             'uploader_id': 'JessicaDobsonWX',
1110             'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1111             'timestamp': 1701566398,
1112             'upload_date': '20231203',
1113             'live_status': 'was_live',
1114             'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1115             'concurrent_view_count': int,
1116             'view_count': int,
1117         },
1118         'add_ie': ['TwitterBroadcast'],
1119     }, {
1120         # Animated gif and quote tweet video, with syndication API
1121         'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1122         'playlist_mincount': 2,
1123         'info_dict': {
1124             'id': '1696256659889565950',
1125             'title': 'BAKOON - https://t.co/zom968d0a0',
1126             'description': 'https://t.co/zom968d0a0',
1127             'tags': [],
1128             'uploader': 'BAKOON',
1129             'uploader_id': 'BAKKOOONN',
1130             'uploader_url': 'https://twitter.com/BAKKOOONN',
1131             'age_limit': 18,
1132             'timestamp': 1693254077.0,
1133             'upload_date': '20230828',
1134             'like_count': int,
1135         },
1136         'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
1137         'expected_warnings': ['Not all metadata'],
1138     }, {
1139         # "stale tweet" with typename "TweetWithVisibilityResults"
1140         'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1141         'md5': '62b1e11cdc2cdd0e527f83adb081f536',
1142         'info_dict': {
1143             'id': '1724883339285544960',
1144             'ext': 'mp4',
1145             'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1146             'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1147             'display_id': '1724884212803834154',
1148             'uploader': 'Robert F. Kennedy Jr',
1149             'uploader_id': 'RobertKennedyJr',
1150             'uploader_url': 'https://twitter.com/RobertKennedyJr',
1151             'upload_date': '20231115',
1152             'timestamp': 1700079417.0,
1153             'duration': 341.048,
1154             'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1155             'tags': ['Kennedy24'],
1156             'repost_count': int,
1157             'like_count': int,
1158             'comment_count': int,
1159             'age_limit': 0,
1160             '_old_archive_ids': ['twitter 1724884212803834154'],
1161         },
1162     }, {
1163         # onion route
1164         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1165         'only_matching': True,
1166     }, {
1167         # Twitch Clip Embed
1168         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1169         'only_matching': True,
1170     }, {
1171         # promo_video_website card
1172         'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1173         'only_matching': True,
1174     }, {
1175         # promo_video_convo card
1176         'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1177         'only_matching': True,
1178     }, {
1179         # appplayer card
1180         'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1181         'only_matching': True,
1182     }, {
1183         # video_direct_message card
1184         'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1185         'only_matching': True,
1186     }, {
1187         # poll2choice_video card
1188         'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1189         'only_matching': True,
1190     }, {
1191         # poll3choice_video card
1192         'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1193         'only_matching': True,
1194     }, {
1195         # poll4choice_video card
1196         'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1197         'only_matching': True,
1198     }]
1199
1200     _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1201
1202     @property
1203     def _GRAPHQL_ENDPOINT(self):
1204         if self.is_logged_in:
1205             return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1206         return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1207
1208     def _graphql_to_legacy(self, data, twid):
1209         result = traverse_obj(data, (
1210             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1211             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1212             'tweet_results', 'result', ('tweet', None), {dict},
1213         ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1214             data, ('tweetResult', 'result', {dict}), default={})
1215
1216         typename = result.get('__typename')
1217         if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1218             self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1219
1220         if 'tombstone' in result:
1221             cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1222             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1223         elif typename == 'TweetUnavailable':
1224             reason = result.get('reason')
1225             if reason == 'NsfwLoggedOut':
1226                 self.raise_login_required('NSFW tweet requires authentication')
1227             elif reason == 'Protected':
1228                 self.raise_login_required('You are not authorized to view this protected tweet')
1229             raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1230         # Result for "stale tweet" needs additional transformation
1231         elif typename == 'TweetWithVisibilityResults':
1232             result = traverse_obj(result, ('tweet', {dict})) or {}
1233
1234         status = result.get('legacy', {})
1235         status.update(traverse_obj(result, {
1236             'user': ('core', 'user_results', 'result', 'legacy'),
1237             'card': ('card', 'legacy'),
1238             'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1239             'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1240         }, expected_type=dict, default={}))
1241
1242         # extra transformations needed since result does not match legacy format
1243         if status.get('retweeted_status'):
1244             status['retweeted_status']['user'] = traverse_obj(status, (
1245                 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1246
1247         binding_values = {
1248             binding_value.get('key'): binding_value.get('value')
1249             for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1250         }
1251         if binding_values:
1252             status['card']['binding_values'] = binding_values
1253
1254         return status
1255
1256     def _build_graphql_query(self, media_id):
1257         return {
1258             'variables': {
1259                 'focalTweetId': media_id,
1260                 'includePromotedContent': True,
1261                 'with_rux_injections': False,
1262                 'withBirdwatchNotes': True,
1263                 'withCommunity': True,
1264                 'withDownvotePerspective': False,
1265                 'withQuickPromoteEligibilityTweetFields': True,
1266                 'withReactionsMetadata': False,
1267                 'withReactionsPerspective': False,
1268                 'withSuperFollowsTweetFields': True,
1269                 'withSuperFollowsUserFields': True,
1270                 'withV2Timeline': True,
1271                 'withVoice': True,
1272             },
1273             'features': {
1274                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1275                 'interactive_text_enabled': True,
1276                 'responsive_web_edit_tweet_api_enabled': True,
1277                 'responsive_web_enhance_cards_enabled': True,
1278                 'responsive_web_graphql_timeline_navigation_enabled': False,
1279                 'responsive_web_text_conversations_enabled': False,
1280                 'responsive_web_uc_gql_enabled': True,
1281                 'standardized_nudges_misinfo': True,
1282                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1283                 'tweetypie_unmention_optimization_enabled': True,
1284                 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1285                 'verified_phone_label_enabled': False,
1286                 'vibe_api_enabled': True,
1287             },
1288         } if self.is_logged_in else {
1289             'variables': {
1290                 'tweetId': media_id,
1291                 'withCommunity': False,
1292                 'includePromotedContent': False,
1293                 'withVoice': False,
1294             },
1295             'features': {
1296                 'creator_subscriptions_tweet_preview_api_enabled': True,
1297                 'tweetypie_unmention_optimization_enabled': True,
1298                 'responsive_web_edit_tweet_api_enabled': True,
1299                 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1300                 'view_counts_everywhere_api_enabled': True,
1301                 'longform_notetweets_consumption_enabled': True,
1302                 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1303                 'tweet_awards_web_tipping_enabled': False,
1304                 'freedom_of_speech_not_reach_fetch_enabled': True,
1305                 'standardized_nudges_misinfo': True,
1306                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1307                 'longform_notetweets_rich_text_read_enabled': True,
1308                 'longform_notetweets_inline_media_enabled': True,
1309                 'responsive_web_graphql_exclude_directive_enabled': True,
1310                 'verified_phone_label_enabled': False,
1311                 'responsive_web_media_download_video_enabled': False,
1312                 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1313                 'responsive_web_graphql_timeline_navigation_enabled': True,
1314                 'responsive_web_enhance_cards_enabled': False
1315             },
1316             'fieldToggles': {
1317                 'withArticleRichContentState': False
1318             }
1319         }
1320
1321     def _call_syndication_api(self, twid):
1322         self.report_warning(
1323             'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1324         status = self._download_json(
1325             'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1326             headers={'User-Agent': 'Googlebot'}, query={
1327                 'id': twid,
1328                 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1329                 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1330             })
1331         if not status:
1332             raise ExtractorError('Syndication endpoint returned empty JSON response')
1333         # Transform the result so its structure matches that of legacy/graphql
1334         media = []
1335         for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1336             detail['id_str'] = traverse_obj(detail, (
1337                 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1338             media.append(detail)
1339         status['extended_entities'] = {'media': media}
1340
1341         return status
1342
1343     def _extract_status(self, twid):
1344         if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1345             raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1346
1347         try:
1348             if self.is_logged_in or self._selected_api == 'graphql':
1349                 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1350             elif self._selected_api == 'legacy':
1351                 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1352                     'cards_platform': 'Web-12',
1353                     'include_cards': 1,
1354                     'include_reply_count': 1,
1355                     'include_user_entities': 0,
1356                     'tweet_mode': 'extended',
1357                 })
1358         except ExtractorError as e:
1359             if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1360                 raise
1361             self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1362             status = self._call_syndication_api(twid)
1363
1364         if self._selected_api == 'syndication':
1365             status = self._call_syndication_api(twid)
1366
1367         return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1368
1369     def _real_extract(self, url):
1370         twid, selected_index = self._match_valid_url(url).group('id', 'index')
1371         status = self._extract_status(twid)
1372
1373         title = description = traverse_obj(
1374             status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1375         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
1376         title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1377         user = status.get('user') or {}
1378         uploader = user.get('name')
1379         if uploader:
1380             title = f'{uploader} - {title}'
1381         uploader_id = user.get('screen_name')
1382
1383         info = {
1384             'id': twid,
1385             'title': title,
1386             'description': description,
1387             'uploader': uploader,
1388             'timestamp': unified_timestamp(status.get('created_at')),
1389             'uploader_id': uploader_id,
1390             'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1391             'like_count': int_or_none(status.get('favorite_count')),
1392             'repost_count': int_or_none(status.get('retweet_count')),
1393             'comment_count': int_or_none(status.get('reply_count')),
1394             'age_limit': 18 if status.get('possibly_sensitive') else 0,
1395             'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1396         }
1397
1398         def extract_from_video_info(media):
1399             media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1400             self.write_debug(f'Extracting from video info: {media_id}')
1401
1402             formats = []
1403             subtitles = {}
1404             for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1405                 fmts, subs = self._extract_variant_formats(variant, twid)
1406                 subtitles = self._merge_subtitles(subtitles, subs)
1407                 formats.extend(fmts)
1408
1409             thumbnails = []
1410             media_url = media.get('media_url_https') or media.get('media_url')
1411             if media_url:
1412                 def add_thumbnail(name, size):
1413                     thumbnails.append({
1414                         'id': name,
1415                         'url': update_url_query(media_url, {'name': name}),
1416                         'width': int_or_none(size.get('w') or size.get('width')),
1417                         'height': int_or_none(size.get('h') or size.get('height')),
1418                     })
1419                 for name, size in media.get('sizes', {}).items():
1420                     add_thumbnail(name, size)
1421                 add_thumbnail('orig', media.get('original_info') or {})
1422
1423             return {
1424                 'id': media_id,
1425                 'formats': formats,
1426                 'subtitles': subtitles,
1427                 'thumbnails': thumbnails,
1428                 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),  # No longer available
1429                 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1430                 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1431                 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'),  # http format codec is unknown
1432             }
1433
1434         def extract_from_card_info(card):
1435             if not card:
1436                 return
1437
1438             self.write_debug(f'Extracting from card info: {card.get("url")}')
1439             binding_values = card['binding_values']
1440
1441             def get_binding_value(k):
1442                 o = binding_values.get(k) or {}
1443                 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1444
1445             card_name = card['name'].split(':')[-1]
1446             if card_name == 'player':
1447                 yield {
1448                     '_type': 'url',
1449                     'url': get_binding_value('player_url'),
1450                 }
1451             elif card_name == 'periscope_broadcast':
1452                 yield {
1453                     '_type': 'url',
1454                     'url': get_binding_value('url') or get_binding_value('player_url'),
1455                     'ie_key': PeriscopeIE.ie_key(),
1456                 }
1457             elif card_name == 'broadcast':
1458                 yield {
1459                     '_type': 'url',
1460                     'url': get_binding_value('broadcast_url'),
1461                     'ie_key': TwitterBroadcastIE.ie_key(),
1462                 }
1463             elif card_name == 'audiospace':
1464                 yield {
1465                     '_type': 'url',
1466                     'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1467                     'ie_key': TwitterSpacesIE.ie_key(),
1468                 }
1469             elif card_name == 'summary':
1470                 yield {
1471                     '_type': 'url',
1472                     'url': get_binding_value('card_url'),
1473                 }
1474             elif card_name == 'unified_card':
1475                 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1476                 yield from map(extract_from_video_info, traverse_obj(
1477                     unified_card, ('media_entities', ...), expected_type=dict))
1478             # amplify, promo_video_website, promo_video_convo, appplayer,
1479             # video_direct_message, poll2choice_video, poll3choice_video,
1480             # poll4choice_video, ...
1481             else:
1482                 is_amplify = card_name == 'amplify'
1483                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1484                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1485                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1486
1487                 thumbnails = []
1488                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1489                     image = get_binding_value('player_image' + suffix) or {}
1490                     image_url = image.get('url')
1491                     if not image_url or '/player-placeholder' in image_url:
1492                         continue
1493                     thumbnails.append({
1494                         'id': suffix[1:] if suffix else 'medium',
1495                         'url': image_url,
1496                         'width': int_or_none(image.get('width')),
1497                         'height': int_or_none(image.get('height')),
1498                     })
1499
1500                 yield {
1501                     'formats': formats,
1502                     'subtitles': subtitles,
1503                     'thumbnails': thumbnails,
1504                     'duration': int_or_none(get_binding_value(
1505                         'content_duration_seconds')),
1506                 }
1507
1508         videos = traverse_obj(status, (
1509             (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1510
1511         if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1512             selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1513         else:
1514             desired_obj = traverse_obj(status, (
1515                 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1516             if not desired_obj:
1517                 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1518             elif desired_obj.get('type') != 'video':
1519                 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1520
1521             # Restore original archive id and video index in title
1522             for index, entry in enumerate(videos, 1):
1523                 if entry.get('id') != desired_obj.get('id'):
1524                     continue
1525                 if index == 1:
1526                     info['_old_archive_ids'] = [make_archive_id(self, twid)]
1527                 if len(videos) != 1:
1528                     info['title'] += f' #{index}'
1529                 break
1530
1531             return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1532
1533         entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1534         if not entries:
1535             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1536             if not expanded_url or expanded_url == url:
1537                 self.raise_no_formats('No video could be found in this tweet', expected=True)
1538                 return info
1539
1540             return self.url_result(expanded_url, display_id=twid, **info)
1541
1542         entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1543
1544         if len(entries) == 1:
1545             return entries[0]
1546
1547         for index, entry in enumerate(entries, 1):
1548             entry['title'] += f' #{index}'
1549
1550         return self.playlist_result(entries, **info)
1551
1552
1553 class TwitterAmplifyIE(TwitterBaseIE):
1554     IE_NAME = 'twitter:amplify'
1555     _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1556
1557     _TEST = {
1558         'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1559         'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1560         'info_dict': {
1561             'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1562             'ext': 'mp4',
1563             'title': 'Twitter Video',
1564             'thumbnail': 're:^https?://.*',
1565         },
1566         'params': {'format': '[protocol=https]'},
1567     }
1568
1569     def _real_extract(self, url):
1570         video_id = self._match_id(url)
1571         webpage = self._download_webpage(url, video_id)
1572
1573         vmap_url = self._html_search_meta(
1574             'twitter:amplify:vmap', webpage, 'vmap url')
1575         formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1576
1577         thumbnails = []
1578         thumbnail = self._html_search_meta(
1579             'twitter:image:src', webpage, 'thumbnail', fatal=False)
1580
1581         def _find_dimension(target):
1582             w = int_or_none(self._html_search_meta(
1583                 'twitter:%s:width' % target, webpage, fatal=False))
1584             h = int_or_none(self._html_search_meta(
1585                 'twitter:%s:height' % target, webpage, fatal=False))
1586             return w, h
1587
1588         if thumbnail:
1589             thumbnail_w, thumbnail_h = _find_dimension('image')
1590             thumbnails.append({
1591                 'url': thumbnail,
1592                 'width': thumbnail_w,
1593                 'height': thumbnail_h,
1594             })
1595
1596         video_w, video_h = _find_dimension('player')
1597         formats[0].update({
1598             'width': video_w,
1599             'height': video_h,
1600         })
1601
1602         return {
1603             'id': video_id,
1604             'title': 'Twitter Video',
1605             'formats': formats,
1606             'thumbnails': thumbnails,
1607         }
1608
1609
1610 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1611     IE_NAME = 'twitter:broadcast'
1612     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1613
1614     _TESTS = [{
1615         # untitled Periscope video
1616         'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1617         'info_dict': {
1618             'id': '1yNGaQLWpejGj',
1619             'ext': 'mp4',
1620             'title': 'Andrea May Sahouri - Periscope Broadcast',
1621             'uploader': 'Andrea May Sahouri',
1622             'uploader_id': 'andreamsahouri',
1623             'uploader_url': 'https://twitter.com/andreamsahouri',
1624             'timestamp': 1590973638,
1625             'upload_date': '20200601',
1626             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1627             'view_count': int,
1628         },
1629     }, {
1630         'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1631         'info_dict': {
1632             'id': '1ZkKzeyrPbaxv',
1633             'ext': 'mp4',
1634             'title': 'Starship | SN10 | High-Altitude Flight Test',
1635             'uploader': 'SpaceX',
1636             'uploader_id': 'SpaceX',
1637             'uploader_url': 'https://twitter.com/SpaceX',
1638             'timestamp': 1614812942,
1639             'upload_date': '20210303',
1640             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1641             'view_count': int,
1642         },
1643     }, {
1644         'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1645         'info_dict': {
1646             'id': '1OyKAVQrgzwGb',
1647             'ext': 'mp4',
1648             'title': 'Starship Flight Test',
1649             'uploader': 'SpaceX',
1650             'uploader_id': 'SpaceX',
1651             'uploader_url': 'https://twitter.com/SpaceX',
1652             'timestamp': 1681993964,
1653             'upload_date': '20230420',
1654             'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1655             'view_count': int,
1656         },
1657     }]
1658
1659     def _real_extract(self, url):
1660         broadcast_id = self._match_id(url)
1661         broadcast = self._call_api(
1662             'broadcasts/show.json', broadcast_id,
1663             {'ids': broadcast_id})['broadcasts'][broadcast_id]
1664         if not broadcast:
1665             raise ExtractorError('Broadcast no longer exists', expected=True)
1666         info = self._parse_broadcast_data(broadcast, broadcast_id)
1667         info['title'] = broadcast.get('status') or info.get('title')
1668         info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1669         info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1670         if info['live_status'] == 'is_upcoming':
1671             return info
1672
1673         media_key = broadcast['media_key']
1674         source = self._call_api(
1675             f'live_video_stream/status/{media_key}', media_key)['source']
1676         m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1677         if '/live_video_stream/geoblocked/' in m3u8_url:
1678             self.raise_geo_restricted()
1679         m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1680             m3u8_url).query).get('type', [None])[0]
1681         state, width, height = self._extract_common_format_info(broadcast)
1682         info['formats'] = self._extract_pscp_m3u8_formats(
1683             m3u8_url, broadcast_id, m3u8_id, state, width, height)
1684         return info
1685
1686
1687 class TwitterSpacesIE(TwitterBaseIE):
1688     IE_NAME = 'twitter:spaces'
1689     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1690
1691     _TESTS = [{
1692         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1693         'info_dict': {
1694             'id': '1RDxlgyvNXzJL',
1695             'ext': 'm4a',
1696             'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1697             'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1698             'uploader': r're:Lucio Di Gaetano.*?',
1699             'uploader_id': 'luciodigaetano',
1700             'live_status': 'was_live',
1701             'timestamp': 1659877956,
1702             'upload_date': '20220807',
1703             'release_timestamp': 1659904215,
1704             'release_date': '20220807',
1705         },
1706         'params': {'skip_download': 'm3u8'},
1707     }, {
1708         # post_live/TimedOut but downloadable
1709         'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1710         'info_dict': {
1711             'id': '1vAxRAVQWONJl',
1712             'ext': 'm4a',
1713             'title': 'Framing Up FinOps: Billing Tools',
1714             'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1715             'uploader': 'Google Cloud',
1716             'uploader_id': 'googlecloud',
1717             'live_status': 'post_live',
1718             'timestamp': 1681409554,
1719             'upload_date': '20230413',
1720             'release_timestamp': 1681839000,
1721             'release_date': '20230418',
1722         },
1723         'params': {'skip_download': 'm3u8'},
1724     }, {
1725         # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1726         'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1727         'info_dict': {
1728             'id': '1eaKbrQbjoRKX',
1729             'ext': 'm4a',
1730             'title': 'あ',
1731             'description': 'Twitter Space participated by nobody yet',
1732             'uploader': '息根とめる🔪Twitchで復活',
1733             'uploader_id': 'tomeru_ikinone',
1734             'live_status': 'was_live',
1735             'timestamp': 1685617198,
1736             'upload_date': '20230601',
1737         },
1738         'params': {'skip_download': 'm3u8'},
1739     }]
1740
1741     SPACE_STATUS = {
1742         'notstarted': 'is_upcoming',
1743         'ended': 'was_live',
1744         'running': 'is_live',
1745         'timedout': 'post_live',
1746     }
1747
1748     def _build_graphql_query(self, space_id):
1749         return {
1750             'variables': {
1751                 'id': space_id,
1752                 'isMetatagsQuery': True,
1753                 'withDownvotePerspective': False,
1754                 'withReactionsMetadata': False,
1755                 'withReactionsPerspective': False,
1756                 'withReplays': True,
1757                 'withSuperFollowsUserFields': True,
1758                 'withSuperFollowsTweetFields': True,
1759             },
1760             'features': {
1761                 'dont_mention_me_view_api_enabled': True,
1762                 'interactive_text_enabled': True,
1763                 'responsive_web_edit_tweet_api_enabled': True,
1764                 'responsive_web_enhance_cards_enabled': True,
1765                 'responsive_web_uc_gql_enabled': True,
1766                 'spaces_2022_h2_clipping': True,
1767                 'spaces_2022_h2_spaces_communities': False,
1768                 'standardized_nudges_misinfo': True,
1769                 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1770                 'vibe_api_enabled': True,
1771             },
1772         }
1773
1774     def _real_extract(self, url):
1775         space_id = self._match_id(url)
1776         if not self.is_logged_in:
1777             self.raise_login_required('Twitter Spaces require authentication')
1778         space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1779         if not space_data:
1780             raise ExtractorError('Twitter Space not found', expected=True)
1781
1782         metadata = space_data['metadata']
1783         live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1784         is_live = live_status == 'is_live'
1785
1786         formats = []
1787         headers = {'Referer': 'https://twitter.com/'}
1788         if live_status == 'is_upcoming':
1789             self.raise_no_formats('Twitter Space not started yet', expected=True)
1790         elif not is_live and not metadata.get('is_space_available_for_replay'):
1791             self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1792         elif metadata.get('media_key'):
1793             source = traverse_obj(
1794                 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1795                 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1796             formats = self._extract_m3u8_formats(  # XXX: Some Spaces need ffmpeg as downloader
1797                 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
1798                 headers=headers, fatal=False) if source else []
1799             for fmt in formats:
1800                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1801                 if not is_live:
1802                     fmt['container'] = 'm4a_dash'
1803
1804         participants = ', '.join(traverse_obj(
1805             space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1806
1807         if not formats and live_status == 'post_live':
1808             self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1809
1810         return {
1811             'id': space_id,
1812             'title': metadata.get('title'),
1813             'description': f'Twitter Space participated by {participants}',
1814             'uploader': traverse_obj(
1815                 metadata, ('creator_results', 'result', 'legacy', 'name')),
1816             'uploader_id': traverse_obj(
1817                 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1818             'live_status': live_status,
1819             'release_timestamp': try_call(
1820                 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1821             'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1822             'formats': formats,
1823             'http_headers': headers,
1824         }
1825
1826
1827 class TwitterShortenerIE(TwitterBaseIE):
1828     IE_NAME = 'twitter:shortener'
1829     _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1830     _BASE_URL = 'https://t.co/'
1831
1832     def _real_extract(self, url):
1833         mobj = self._match_valid_url(url)
1834         eid, id = mobj.group('eid', 'id')
1835         if eid:
1836             id = eid
1837             url = self._BASE_URL + id
1838         new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
1839         __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1840         if new_url.startswith(__UNSAFE_LINK):
1841             new_url = new_url.replace(__UNSAFE_LINK, "")
1842         return self.url_result(new_url)