]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/twitter.py
[extractor/twitter] Default to GraphQL, handle auth errors (#6957)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
index 48c14ddce1d176117bf6039e655f605594ea03c2..d9a89c44b64a0cd1e6fb1cfe9c0d5e257f634b73 100644 (file)
@@ -1,6 +1,5 @@
 import json
 import re
-import urllib.error
 
 from .common import InfoExtractor
 from .periscope import PeriscopeBaseIE, PeriscopeIE
@@ -17,6 +16,7 @@
     format_field,
     int_or_none,
     make_archive_id,
+    remove_end,
     str_or_none,
     strip_or_none,
     traverse_obj,
 class TwitterBaseIE(InfoExtractor):
     _API_BASE = 'https://api.twitter.com/1.1/'
     _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
-    _TOKENS = {
-        'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
-        'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
-    }
     _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
+    _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
+    _guest_token = None
 
     def _extract_variant_formats(self, variant, video_id):
         variant_url = variant.get('url')
@@ -94,7 +92,7 @@ def is_logged_in(self):
 
     def _call_api(self, path, video_id, query={}, graphql=False):
         cookies = self._get_cookies(self._API_BASE)
-        headers = {}
+        headers = self._AUTH.copy()
 
         csrf_cookie = cookies.get('ct0')
         if csrf_cookie:
@@ -107,46 +105,34 @@ def _call_api(self, path, video_id, query={}, graphql=False):
                 'x-twitter-active-user': 'yes',
             })
 
-        result, last_error = None, None
-        for bearer_token in self._TOKENS:
-            headers['Authorization'] = f'Bearer {bearer_token}'
-
-            if not self.is_logged_in:
-                if not self._TOKENS[bearer_token]:
-                    headers.pop('x-guest-token', None)
-                    guest_token_response = self._download_json(
-                        self._API_BASE + 'guest/activate.json', video_id,
-                        'Downloading guest token', data=b'', headers=headers)
-
-                    self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
-                    if not self._TOKENS[bearer_token]:
-                        raise ExtractorError('Could not retrieve guest token')
-                headers['x-guest-token'] = self._TOKENS[bearer_token]
-
-            try:
-                allowed_status = {400, 403, 404} if graphql else {403}
-                result = self._download_json(
-                    (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
-                    video_id, headers=headers, query=query, expected_status=allowed_status)
-                break
-
-            except ExtractorError as e:
-                if last_error:
-                    raise last_error
-                elif not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
-                    raise
-                last_error = e
-                self.report_warning(
-                    'Twitter API gave 404 response, retrying with deprecated token. '
-                    'Only one media item can be extracted')
-
-        if result.get('errors'):
-            error_message = ', '.join(set(traverse_obj(
-                result, ('errors', ..., 'message'), expected_type=str))) or 'Unknown error'
-            raise ExtractorError(f'Error(s) while querying api: {error_message}', expected=True)
-
-        assert result is not None
-        return result
+        for first_attempt in (True, False):
+            if not self.is_logged_in and not self._guest_token:
+                headers.pop('x-guest-token', None)
+                self._guest_token = traverse_obj(self._download_json(
+                    f'{self._API_BASE}guest/activate.json', video_id,
+                    'Downloading guest token', data=b'', headers=headers), 'guest_token')
+            if self._guest_token:
+                headers['x-guest-token'] = self._guest_token
+            elif not self.is_logged_in:
+                raise ExtractorError('Could not retrieve guest token')
+
+            allowed_status = {400, 401, 403, 404} if graphql else {403}
+            result = self._download_json(
+                (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
+                video_id, headers=headers, query=query, expected_status=allowed_status,
+                note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
+
+            if result.get('errors'):
+                errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
+                if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
+                    self.to_screen('Guest token has expired. Refreshing guest token')
+                    self._guest_token = None
+                    continue
+
+                raise ExtractorError(
+                    f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
+
+            return result
 
     def _build_graphql_query(self, media_id):
         raise NotImplementedError('Method must be implemented to support GraphQL')
@@ -285,7 +271,7 @@ def _real_extract(self, url):
 
 class TwitterIE(TwitterBaseIE):
     IE_NAME = 'twitter'
-    _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)'
+    _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
 
     _TESTS = [{
         'url': 'https://twitter.com/freethenipple/status/643211948184596480',
@@ -305,6 +291,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 18,
         },
@@ -328,7 +315,7 @@ class TwitterIE(TwitterBaseIE):
             'id': '665052190608723968',
             'display_id': '665052190608723968',
             'ext': 'mp4',
-            'title': 'md5:3f57ab5d35116537a2ae7345cd0060d8',
+            'title': r're:Star Wars.*A new beginning is coming December 18.*',
             'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
             'uploader_id': 'starwars',
             'uploader': r're:Star Wars.*',
@@ -364,6 +351,7 @@ class TwitterIE(TwitterBaseIE):
             # Test case of TwitterCardIE
             'skip_download': True,
         },
+        'skip': 'Dead external link',
     }, {
         'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
         'info_dict': {
@@ -382,6 +370,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': ['Damndaniel'],
             'age_limit': 0,
         },
@@ -422,6 +411,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -471,6 +461,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': ['Maria'],
             'age_limit': 0,
         },
@@ -496,6 +487,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -520,6 +512,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -568,10 +561,10 @@ class TwitterIE(TwitterBaseIE):
             'id': '1577855447914409984',
             'display_id': '1577855540407197696',
             'ext': 'mp4',
-            'title': 'oshtru \U0001faac\U0001f47d - gm \u2728\ufe0f now I can post image and video. nice update.',
-            'description': 'gm \u2728\ufe0f now I can post image and video. nice update. https://t.co/cG7XgiINOm',
+            'title': 'md5:9d198efb93557b8f8d5b78c480407214',
+            'description': 'md5:b9c3699335447391d11753ab21c70a74',
             'upload_date': '20221006',
-            'uploader': 'oshtru \U0001faac\U0001f47d',
+            'uploader': 'oshtru',
             'uploader_id': 'oshtru',
             'uploader_url': 'https://twitter.com/oshtru',
             'thumbnail': r're:^https?://.*\.jpg',
@@ -580,6 +573,7 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': [],
             'age_limit': 0,
         },
@@ -621,12 +615,12 @@ class TwitterIE(TwitterBaseIE):
             'comment_count': int,
             'repost_count': int,
             'like_count': int,
+            'view_count': int,
             'tags': ['HurricaneIan'],
             'age_limit': 0,
         },
     }, {
-        # Adult content, uses old token
-        # Fails if not logged in (GraphQL)
+        # Adult content, fails if not logged in (GraphQL)
         'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
         'info_dict': {
             'id': '1575199163847000068',
@@ -639,16 +633,15 @@ class TwitterIE(TwitterBaseIE):
             'uploader_url': 'https://twitter.com/Rizdraws',
             'upload_date': '20220928',
             'timestamp': 1664391723,
-            'thumbnail': 're:^https?://.*\\.jpg',
+            'thumbnail': r're:^https?://.+\.jpg',
             'like_count': int,
             'repost_count': int,
             'comment_count': int,
             'age_limit': 18,
             'tags': []
         },
-        'expected_warnings': ['404'],
+        'skip': 'Requires authentication',
     }, {
-        # Description is missing one https://t.co url (GraphQL)
         'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
         'playlist_mincount': 2,
         'info_dict': {
@@ -660,14 +653,13 @@ class TwitterIE(TwitterBaseIE):
             'upload_date': '20210519',
             'age_limit': 0,
             'repost_count': int,
-            'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
+            'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
             'uploader_id': 'Srirachachau',
             'comment_count': int,
             'uploader_url': 'https://twitter.com/Srirachachau',
             'timestamp': 1621447860,
         },
     }, {
-        # Description is missing one https://t.co url (GraphQL)
         'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
         'playlist_mincount': 2,
         'info_dict': {
@@ -679,7 +671,7 @@ class TwitterIE(TwitterBaseIE):
             'uploader': str,
             'timestamp': 1665143744,
             'uploader_url': 'https://twitter.com/DavidToons_',
-            'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
+            'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
             'tags': [],
             'comment_count': int,
             'upload_date': '20221007',
@@ -718,6 +710,141 @@ class TwitterIE(TwitterBaseIE):
         },
         'add_ie': ['TwitterSpaces'],
         'params': {'skip_download': 'm3u8'},
+    }, {
+        # URL specifies video number but --yes-playlist
+        'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
+        'playlist_mincount': 2,
+        'info_dict': {
+            'id': '1600649710662213632',
+            'title': 'md5:be05989b0722e114103ed3851a0ffae2',
+            'timestamp': 1670459604.0,
+            'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
+            'comment_count': int,
+            'uploader_id': 'CTVJLaidlaw',
+            'repost_count': int,
+            'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
+            'upload_date': '20221208',
+            'age_limit': 0,
+            'uploader': 'Jocelyn Laidlaw',
+            'uploader_url': 'https://twitter.com/CTVJLaidlaw',
+            'like_count': int,
+        },
+    }, {
+        # URL specifies video number and --no-playlist
+        'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
+        'info_dict': {
+            'id': '1600649511827013632',
+            'ext': 'mp4',
+            'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
+            'thumbnail': r're:^https?://.+\.jpg',
+            'timestamp': 1670459604.0,
+            'uploader_id': 'CTVJLaidlaw',
+            'uploader': 'Jocelyn Laidlaw',
+            'repost_count': int,
+            'comment_count': int,
+            'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
+            'duration': 102.226,
+            'uploader_url': 'https://twitter.com/CTVJLaidlaw',
+            'display_id': '1600649710662213632',
+            'like_count': int,
+            'view_count': int,
+            'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
+            'upload_date': '20221208',
+            'age_limit': 0,
+        },
+        'params': {'noplaylist': True},
+    }, {
+        # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
+        # note the id different between extraction and url
+        'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
+        'info_dict': {
+            'id': '1621117577354424321',
+            'display_id': '1621117700482416640',
+            'ext': 'mp4',
+            'title': '뽀 - 아 최우제 이동속도 봐',
+            'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
+            'duration': 24.598,
+            'uploader': '뽀',
+            'uploader_id': 's2FAKER',
+            'uploader_url': 'https://twitter.com/s2FAKER',
+            'upload_date': '20230202',
+            'timestamp': 1675339553.0,
+            'thumbnail': r're:https?://pbs\.twimg\.com/.+',
+            'age_limit': 18,
+            'tags': [],
+            'like_count': int,
+            'repost_count': int,
+            'comment_count': int,
+            'view_count': int,
+        },
+    }, {
+        'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
+        'info_dict': {
+            'id': '1599108643743473680',
+            'display_id': '1599108751385972737',
+            'ext': 'mp4',
+            'title': '\u06ea - \U0001F48B',
+            'uploader_url': 'https://twitter.com/hlo_again',
+            'like_count': int,
+            'uploader_id': 'hlo_again',
+            'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
+            'repost_count': int,
+            'duration': 9.531,
+            'comment_count': int,
+            'view_count': int,
+            'upload_date': '20221203',
+            'age_limit': 0,
+            'timestamp': 1670092210.0,
+            'tags': [],
+            'uploader': '\u06ea',
+            'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
+        },
+        'params': {'noplaylist': True},
+    }, {
+        'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
+        'info_dict': {
+            'id': '1600009362759733248',
+            'display_id': '1600009574919962625',
+            'ext': 'mp4',
+            'uploader_url': 'https://twitter.com/MunTheShinobi',
+            'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
+            'view_count': int,
+            'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
+            'age_limit': 0,
+            'uploader': 'Mün The Shinobi',
+            'repost_count': int,
+            'upload_date': '20221206',
+            'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
+            'comment_count': int,
+            'like_count': int,
+            'tags': [],
+            'uploader_id': 'MunTheShinobi',
+            'duration': 139.987,
+            'timestamp': 1670306984.0,
+        },
+    }, {
+        # url to retweet id, legacy API
+        'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
+        'info_dict': {
+            'id': '1623274794488659969',
+            'display_id': '1623739803874349067',
+            'ext': 'mp4',
+            'title': 'Johnny Bullets - Me after going viral to over 30million people:    Whoopsie-daisy',
+            'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
+            'uploader': 'Johnny Bullets',
+            'uploader_id': 'Johnnybull3ts',
+            'uploader_url': 'https://twitter.com/Johnnybull3ts',
+            'age_limit': 0,
+            'tags': [],
+            'duration': 8.033,
+            'timestamp': 1675853859.0,
+            'upload_date': '20230208',
+            'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
+            'like_count': int,
+            'repost_count': int,
+            'comment_count': int,
+        },
+        'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
     }, {
         # onion route
         'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
@@ -760,11 +887,16 @@ def _graphql_to_legacy(self, data, twid):
         result = traverse_obj(data, (
             'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
             lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
-            'tweet_results', 'result'
+            'tweet_results', 'result', ('tweet', None),
         ), expected_type=dict, default={}, get_all=False)
 
+        if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
+            self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
+
         if 'tombstone' in result:
-            cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
+            cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
+            if cause and 'adult content' in cause:
+                self.raise_login_required(cause)
             raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
 
         status = result.get('legacy', {})
@@ -777,7 +909,7 @@ def _graphql_to_legacy(self, data, twid):
         # extra transformation is needed since result does not match legacy format
         binding_values = {
             binding_value.get('key'): binding_value.get('value')
-            for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
+            for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
         }
         if binding_values:
             status['card']['binding_values'] = binding_values
@@ -819,20 +951,18 @@ def _build_graphql_query(self, media_id):
         }
 
     def _real_extract(self, url):
-        twid = self._match_id(url)
-        if self.is_logged_in or self._configuration_arg('force_graphql'):
-            self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
-            result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
-            status = self._graphql_to_legacy(result, twid)
-
-        else:
-            status = self._call_api(f'statuses/show/{twid}.json', twid, {
+        twid, selected_index = self._match_valid_url(url).group('id', 'index')
+        if self._configuration_arg('legacy_api') and not self.is_logged_in:
+            status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
                 'cards_platform': 'Web-12',
                 'include_cards': 1,
                 'include_reply_count': 1,
                 'include_user_entities': 0,
                 'tweet_mode': 'extended',
-            })
+            }), 'retweeted_status', None)
+        else:
+            result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
+            status = self._graphql_to_legacy(result, twid)
 
         title = description = status['full_text'].replace('\n', ' ')
         # strip  'https -_t.co_BJYgOjSeGA' junk from filenames
@@ -843,13 +973,6 @@ def _real_extract(self, url):
             title = f'{uploader} - {title}'
         uploader_id = user.get('screen_name')
 
-        tags = []
-        for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
-            hashtag_text = hashtag.get('text')
-            if not hashtag_text:
-                continue
-            tags.append(hashtag_text)
-
         info = {
             'id': twid,
             'title': title,
@@ -862,7 +985,7 @@ def _real_extract(self, url):
             'repost_count': int_or_none(status.get('retweet_count')),
             'comment_count': int_or_none(status.get('reply_count')),
             'age_limit': 18 if status.get('possibly_sensitive') else 0,
-            'tags': tags,
+            'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
         }
 
         def extract_from_video_info(media):
@@ -876,7 +999,6 @@ def extract_from_video_info(media):
                 fmts, subs = self._extract_variant_formats(variant, twid)
                 subtitles = self._merge_subtitles(subtitles, subs)
                 formats.extend(fmts)
-            self._sort_formats(formats, ('res', 'br', 'size', 'proto'))  # The codec of http formats are unknown
 
             thumbnails = []
             media_url = media.get('media_url_https') or media.get('media_url')
@@ -897,7 +1019,10 @@ def add_thumbnail(name, size):
                 'formats': formats,
                 'subtitles': subtitles,
                 'thumbnails': thumbnails,
+                'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
                 'duration': float_or_none(video_info.get('duration_millis'), 1000),
+                # The codec of http formats are unknown
+                '_format_sort_fields': ('res', 'br', 'size', 'proto'),
             }
 
         def extract_from_card_info(card):
@@ -952,7 +1077,6 @@ def get_binding_value(k):
                 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
                 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
                 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
-                self._sort_formats(formats)
 
                 thumbnails = []
                 for suffix in ('_small', '', '_large', '_x_large', '_original'):
@@ -975,15 +1099,36 @@ def get_binding_value(k):
                         'content_duration_seconds')),
                 }
 
-        media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
-        videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
-        cards = extract_from_card_info(status.get('card'))
-        entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)]
+        videos = traverse_obj(status, (
+            (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
+
+        if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
+            selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
+        else:
+            desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
+            if not desired_obj:
+                raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
+            elif desired_obj.get('type') != 'video':
+                raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
+
+            # Restore original archive id and video index in title
+            for index, entry in enumerate(videos, 1):
+                if entry.get('id') != desired_obj.get('id'):
+                    continue
+                if index == 1:
+                    info['_old_archive_ids'] = [make_archive_id(self, twid)]
+                if len(videos) != 1:
+                    info['title'] += f' #{index}'
+                break
+
+            return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
 
+        entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
         if not entries:
             expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
             if not expanded_url or expanded_url == url:
-                raise ExtractorError('No video could be found in this tweet', expected=True)
+                self.raise_no_formats('No video could be found in this tweet', expected=True)
+                return info
 
             return self.url_result(expanded_url, display_id=twid, **info)
 
@@ -1096,7 +1241,6 @@ def _real_extract(self, url):
 class TwitterSpacesIE(TwitterBaseIE):
     IE_NAME = 'twitter:spaces'
     _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
-    _TWITTER_GRAPHQL = 'https://twitter.com/i/api/graphql/HPEisOmj1epUNLCWTYhUWw/'
 
     _TESTS = [{
         'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
@@ -1167,7 +1311,8 @@ def _real_extract(self, url):
             # XXX: Native downloader does not work
             formats = self._extract_m3u8_formats(
                 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
-                metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live')
+                metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
+                headers={'Referer': 'https://twitter.com/'})
             for fmt in formats:
                 fmt.update({'vcodec': 'none', 'acodec': 'aac'})