]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
e897bd82 1import functools
7a26ce26 2import json
a006ce2b 3import random
23e7cba8 4import re
add96eb9 5import urllib.parse
23e7cba8
S
6
7from .common import InfoExtractor
13b2ae29 8from .periscope import PeriscopeBaseIE, PeriscopeIE
116c2684 9from ..networking.exceptions import HTTPError
23e7cba8 10from ..utils import (
2edfd745 11 ExtractorError,
13b2ae29 12 dict_get,
92315c03 13 filter_dict,
23e7cba8 14 float_or_none,
13b2ae29 15 format_field,
cf5881fc 16 int_or_none,
13b2ae29 17 make_archive_id,
147e62fc 18 remove_end,
13b2ae29
SS
19 str_or_none,
20 strip_or_none,
f1150b9e 21 traverse_obj,
7a26ce26 22 try_call,
2edfd745 23 try_get,
18ca61c5
RA
24 unified_timestamp,
25 update_url_query,
41d1cca3 26 url_or_none,
2edfd745 27 xpath_text,
23e7cba8
S
28)
29
30
445d72b8 31class TwitterBaseIE(InfoExtractor):
d1795f4a 32 _NETRC_MACHINE = 'twitter'
3e35aa32 33 _API_BASE = 'https://api.x.com/1.1/'
34 _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
4813173e 35 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
92315c03 36 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
37 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
d1795f4a 38 _flow_token = None
39
40 _LOGIN_INIT_DATA = json.dumps({
41 'input_flow_data': {
42 'flow_context': {
43 'debug_overrides': {},
44 'start_location': {
add96eb9 45 'location': 'unknown',
46 },
47 },
d1795f4a 48 },
49 'subtask_versions': {
50 'action_list': 2,
51 'alert_dialog': 1,
52 'app_download_cta': 1,
53 'check_logged_in_account': 1,
54 'choice_selection': 3,
55 'contacts_live_sync_permission_prompt': 0,
56 'cta': 7,
57 'email_verification': 2,
58 'end_flow': 1,
59 'enter_date': 1,
60 'enter_email': 2,
61 'enter_password': 5,
62 'enter_phone': 2,
63 'enter_recaptcha': 1,
64 'enter_text': 5,
65 'enter_username': 2,
66 'generic_urt': 3,
67 'in_app_notification': 1,
68 'interest_picker': 3,
69 'js_instrumentation': 1,
70 'menu_dialog': 1,
71 'notifications_permission_prompt': 2,
72 'open_account': 2,
73 'open_home_timeline': 1,
74 'open_link': 1,
75 'phone_verification': 4,
76 'privacy_options': 1,
77 'security_key': 3,
78 'select_avatar': 4,
79 'select_banner': 2,
80 'settings_list': 7,
81 'show_code': 1,
82 'sign_up': 2,
83 'sign_up_review': 4,
84 'tweet_selection_urt': 1,
85 'update_users': 1,
86 'upload_media': 1,
87 'user_recommendations_list': 4,
88 'user_recommendations_urt': 1,
89 'wait_spinner': 3,
add96eb9 90 'web_modal': 1,
91 },
d1795f4a 92 }, separators=(',', ':')).encode()
18ca61c5
RA
93
94 def _extract_variant_formats(self, variant, video_id):
95 variant_url = variant.get('url')
96 if not variant_url:
4bed4363 97 return [], {}
18ca61c5 98 elif '.m3u8' in variant_url:
28e53d60 99 fmts, subs = self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
100 variant_url, video_id, 'mp4', 'm3u8_native',
101 m3u8_id='hls', fatal=False)
28e53d60 102 for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
103 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
104 f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
105 return fmts, subs
18ca61c5
RA
106 else:
107 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
108 f = {
109 'url': variant_url,
add96eb9 110 'format_id': 'http' + (f'-{tbr}' if tbr else ''),
18ca61c5
RA
111 'tbr': tbr,
112 }
113 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 114 return [f], {}
18ca61c5 115
9be31e77 116 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 117 vmap_url = url_or_none(vmap_url)
118 if not vmap_url:
f1150b9e 119 return [], {}
445d72b8 120 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 121 formats = []
4bed4363 122 subtitles = {}
18ca61c5
RA
123 urls = []
124 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
add96eb9 125 video_variant.attrib['url'] = urllib.parse.unquote(
18ca61c5
RA
126 video_variant.attrib['url'])
127 urls.append(video_variant.attrib['url'])
4bed4363
F
128 fmts, subs = self._extract_variant_formats(
129 video_variant.attrib, video_id)
130 formats.extend(fmts)
131 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
132 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
133 if video_url not in urls:
4bed4363
F
134 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
135 formats.extend(fmts)
136 subtitles = self._merge_subtitles(subtitles, subs)
137 return formats, subtitles
445d72b8 138
2edfd745
YCH
139 @staticmethod
140 def _search_dimensions_in_video_url(a_format, video_url):
141 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
142 if m:
143 a_format.update({
144 'width': int(m.group('width')),
145 'height': int(m.group('height')),
146 })
147
d1795f4a 148 @property
7a26ce26
SS
149 def is_logged_in(self):
150 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
151
3e35aa32 152 # XXX: Temporary workaround until twitter.com => x.com migration is completed
153 def _real_initialize(self):
154 if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
155 return
156 # User has not yet been migrated to x.com and has passed twitter.com cookies
157 TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
158 TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
159
a006ce2b 160 @functools.cached_property
161 def _selected_api(self):
162 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
163
92315c03 164 def _fetch_guest_token(self, display_id):
165 guest_token = traverse_obj(self._download_json(
166 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
a006ce2b 167 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
92315c03 168 ('guest_token', {str}))
169 if not guest_token:
b03fa783 170 raise ExtractorError('Could not retrieve guest token')
92315c03 171 return guest_token
b03fa783 172
92315c03 173 def _set_base_headers(self, legacy=False):
174 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
175 return filter_dict({
176 'Authorization': f'Bearer {bearer_token}',
177 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
178 })
d1795f4a 179
180 def _call_login_api(self, note, headers, query={}, data=None):
181 response = self._download_json(
182 f'{self._API_BASE}onboarding/task.json', None, note,
183 headers=headers, query=query, data=data, expected_status=400)
184 error = traverse_obj(response, ('errors', 0, 'message', {str}))
185 if error:
186 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
187 elif traverse_obj(response, 'status') != 'success':
188 raise ExtractorError('Login was unsuccessful')
189
190 subtask = traverse_obj(
191 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
192 if not subtask:
193 raise ExtractorError('Twitter API did not return next login subtask')
194
195 self._flow_token = response['flow_token']
7a26ce26 196
d1795f4a 197 return subtask
198
199 def _perform_login(self, username, password):
200 if self.is_logged_in:
201 return
202
3e35aa32 203 guest_token = self._fetch_guest_token(None)
92315c03 204 headers = {
205 **self._set_base_headers(),
d1795f4a 206 'content-type': 'application/json',
92315c03 207 'x-guest-token': guest_token,
d1795f4a 208 'x-twitter-client-language': 'en',
209 'x-twitter-active-user': 'yes',
3e35aa32 210 'Referer': 'https://x.com/',
211 'Origin': 'https://x.com',
92315c03 212 }
d1795f4a 213
214 def build_login_json(*subtask_inputs):
215 return json.dumps({
216 'flow_token': self._flow_token,
add96eb9 217 'subtask_inputs': subtask_inputs,
d1795f4a 218 }, separators=(',', ':')).encode()
219
220 def input_dict(subtask_id, text):
221 return {
222 'subtask_id': subtask_id,
223 'enter_text': {
224 'text': text,
add96eb9 225 'link': 'next_link',
226 },
d1795f4a 227 }
7a26ce26 228
d1795f4a 229 next_subtask = self._call_login_api(
230 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
231
232 while not self.is_logged_in:
233 if next_subtask == 'LoginJsInstrumentationSubtask':
234 next_subtask = self._call_login_api(
235 'Submitting JS instrumentation response', headers, data=build_login_json({
236 'subtask_id': next_subtask,
237 'js_instrumentation': {
238 'response': '{}',
add96eb9 239 'link': 'next_link',
240 },
d1795f4a 241 }))
242
243 elif next_subtask == 'LoginEnterUserIdentifierSSO':
244 next_subtask = self._call_login_api(
245 'Submitting username', headers, data=build_login_json({
246 'subtask_id': next_subtask,
247 'settings_list': {
248 'setting_responses': [{
249 'key': 'user_identifier',
250 'response_data': {
251 'text_data': {
add96eb9 252 'result': username,
253 },
254 },
d1795f4a 255 }],
add96eb9 256 'link': 'next_link',
257 },
d1795f4a 258 }))
259
260 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
261 next_subtask = self._call_login_api(
262 'Submitting alternate identifier', headers,
263 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
264 'one of username, phone number or email that was not used as --username'))))
265
266 elif next_subtask == 'LoginEnterPassword':
267 next_subtask = self._call_login_api(
268 'Submitting password', headers, data=build_login_json({
269 'subtask_id': next_subtask,
270 'enter_password': {
271 'password': password,
add96eb9 272 'link': 'next_link',
273 },
d1795f4a 274 }))
275
276 elif next_subtask == 'AccountDuplicationCheck':
277 next_subtask = self._call_login_api(
278 'Submitting account duplication check', headers, data=build_login_json({
279 'subtask_id': next_subtask,
280 'check_logged_in_account': {
add96eb9 281 'link': 'AccountDuplicationCheck_false',
282 },
d1795f4a 283 }))
284
285 elif next_subtask == 'LoginTwoFactorAuthChallenge':
286 next_subtask = self._call_login_api(
287 'Submitting 2FA token', headers, data=build_login_json(input_dict(
288 next_subtask, self._get_tfa_info('two-factor authentication token'))))
289
290 elif next_subtask == 'LoginAcid':
291 next_subtask = self._call_login_api(
292 'Submitting confirmation code', headers, data=build_login_json(input_dict(
293 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
294
6014355c 295 elif next_subtask == 'ArkoseLogin':
296 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
297
298 elif next_subtask == 'DenyLoginSubtask':
299 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
300
d1795f4a 301 elif next_subtask == 'LoginSuccessSubtask':
302 raise ExtractorError('Twitter API did not grant auth token cookie')
303
304 else:
305 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
306
307 self.report_login()
308
309 def _call_api(self, path, video_id, query={}, graphql=False):
a006ce2b 310 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
92315c03 311 headers.update({
312 'x-twitter-auth-type': 'OAuth2Session',
313 'x-twitter-client-language': 'en',
314 'x-twitter-active-user': 'yes',
315 } if self.is_logged_in else {
add96eb9 316 'x-guest-token': self._fetch_guest_token(video_id),
92315c03 317 })
318 allowed_status = {400, 401, 403, 404} if graphql else {403}
319 result = self._download_json(
320 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
321 video_id, headers=headers, query=query, expected_status=allowed_status,
322 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
b03fa783 323
92315c03 324 if result.get('errors'):
325 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
6014355c 326 if errors and 'not authorized' in errors:
327 self.raise_login_required(remove_end(errors, '.'))
328 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
b03fa783 329
92315c03 330 return result
7a26ce26
SS
331
332 def _build_graphql_query(self, media_id):
333 raise NotImplementedError('Method must be implemented to support GraphQL')
334
335 def _call_graphql_api(self, endpoint, media_id):
336 data = self._build_graphql_query(media_id)
337 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
338 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
339
340
341class TwitterCardIE(InfoExtractor):
014e8803 342 IE_NAME = 'twitter:card'
18ca61c5 343 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 344 _TESTS = [
345 {
346 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 347 # MD5 checksums are different in different places
c3dea3f8 348 'info_dict': {
7a26ce26 349 'id': '560070131976392705',
c3dea3f8 350 'ext': 'mp4',
18ca61c5
RA
351 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
352 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
353 'uploader': 'Twitter',
354 'uploader_id': 'Twitter',
355 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 356 'duration': 30.033,
18ca61c5
RA
357 'timestamp': 1422366112,
358 'upload_date': '20150127',
7a26ce26
SS
359 'age_limit': 0,
360 'comment_count': int,
361 'tags': [],
362 'repost_count': int,
363 'like_count': int,
364 'display_id': '560070183650213889',
365 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 366 },
23e7cba8 367 },
c3dea3f8 368 {
369 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 370 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 371 'info_dict': {
372 'id': '623160978427936768',
373 'ext': 'mp4',
18ca61c5
RA
374 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
375 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
376 'uploader': 'NASA',
377 'uploader_id': 'NASA',
378 'timestamp': 1437408129,
379 'upload_date': '20150720',
7a26ce26
SS
380 'uploader_url': 'https://twitter.com/NASA',
381 'age_limit': 0,
382 'comment_count': int,
383 'like_count': int,
384 'repost_count': int,
385 'tags': ['PlutoFlyby'],
c3dea3f8 386 },
add96eb9 387 'params': {'format': '[protocol=https]'},
4a7b7903
YCH
388 },
389 {
390 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 391 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
392 'info_dict': {
393 'id': 'dq4Oj5quskI',
394 'ext': 'mp4',
395 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 396 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 397 'upload_date': '20111013',
18ca61c5 398 'uploader': 'OMG! UBUNTU!',
4a7b7903 399 'uploader_id': 'omgubuntu',
7a26ce26
SS
400 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
401 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
402 'channel_follower_count': int,
403 'chapters': 'count:8',
404 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
405 'duration': 138,
406 'categories': ['Film & Animation'],
407 'age_limit': 0,
408 'comment_count': int,
409 'availability': 'public',
410 'like_count': int,
411 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
412 'view_count': int,
413 'tags': 'count:12',
414 'channel': 'OMG! UBUNTU!',
415 'playable_in_embed': True,
4a7b7903 416 },
31752f76 417 'add_ie': ['Youtube'],
5f1b2aea
YCH
418 },
419 {
420 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
421 'info_dict': {
422 'id': 'iBb2x00UVlv',
423 'ext': 'mp4',
424 'upload_date': '20151113',
425 'uploader_id': '1189339351084113920',
acb6e97e
YCH
426 'uploader': 'ArsenalTerje',
427 'title': 'Vine by ArsenalTerje',
e8f20ffa 428 'timestamp': 1447451307,
7a26ce26
SS
429 'alt_title': 'Vine by ArsenalTerje',
430 'comment_count': int,
431 'like_count': int,
432 'thumbnail': r're:^https?://[^?#]+\.jpg',
433 'view_count': int,
434 'repost_count': int,
5f1b2aea
YCH
435 },
436 'add_ie': ['Vine'],
7a26ce26
SS
437 'params': {'skip_download': 'm3u8'},
438 },
439 {
0ae937a7 440 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 441 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
442 'info_dict': {
443 'id': '705235433198714880',
444 'ext': 'mp4',
18ca61c5
RA
445 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
446 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
447 'uploader': 'Brent Yarina',
448 'uploader_id': 'BTNBrentYarina',
449 'timestamp': 1456976204,
450 'upload_date': '20160303',
0ae937a7 451 },
18ca61c5 452 'skip': 'This content is no longer available.',
7a26ce26
SS
453 },
454 {
748a462f
S
455 'url': 'https://twitter.com/i/videos/752274308186120192',
456 'only_matching': True,
0ae937a7 457 },
c3dea3f8 458 ]
23e7cba8
S
459
460 def _real_extract(self, url):
18ca61c5
RA
461 status_id = self._match_id(url)
462 return self.url_result(
463 'https://twitter.com/statuses/' + status_id,
464 TwitterIE.ie_key(), status_id)
c8398a9b 465
03879ff0 466
18ca61c5 467class TwitterIE(TwitterBaseIE):
014e8803 468 IE_NAME = 'twitter'
b6795fd3 469 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 470
cf5881fc 471 _TESTS = [{
48aae2d2 472 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 473 'info_dict': {
13b2ae29
SS
474 'id': '643211870443208704',
475 'display_id': '643211948184596480',
f57f84f6 476 'ext': 'mp4',
575036b4 477 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 478 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 479 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
55f18333 480 'channel_id': '549749560',
48aae2d2
YCH
481 'uploader': 'FREE THE NIPPLE',
482 'uploader_id': 'freethenipple',
3b65a6fb 483 'duration': 12.922,
18ca61c5
RA
484 'timestamp': 1442188653,
485 'upload_date': '20150913',
13b2ae29 486 'uploader_url': 'https://twitter.com/freethenipple',
b03fa783 487 'comment_count': int,
488 'repost_count': int,
13b2ae29
SS
489 'like_count': int,
490 'tags': [],
491 'age_limit': 18,
1c54a98e 492 '_old_archive_ids': ['twitter 643211948184596480'],
f57f84f6 493 },
55f18333 494 'skip': 'Requires authentication',
cf5881fc
YCH
495 }, {
496 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
497 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
498 'info_dict': {
499 'id': '657991469417025536',
500 'ext': 'mp4',
501 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
502 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 503 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
504 'uploader': 'Gifs',
505 'uploader_id': 'giphz',
506 },
7efc1c2b 507 'expected_warnings': ['height', 'width'],
fc0a45fa 508 'skip': 'Account suspended',
b703ebee
JMF
509 }, {
510 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
511 'info_dict': {
512 'id': '665052190608723968',
13b2ae29 513 'display_id': '665052190608723968',
b703ebee 514 'ext': 'mp4',
b6795fd3 515 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 516 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
55f18333 517 'channel_id': '20106852',
b703ebee 518 'uploader_id': 'starwars',
7a26ce26 519 'uploader': r're:Star Wars.*',
18ca61c5
RA
520 'timestamp': 1447395772,
521 'upload_date': '20151113',
13b2ae29 522 'uploader_url': 'https://twitter.com/starwars',
b03fa783 523 'comment_count': int,
524 'repost_count': int,
13b2ae29
SS
525 'like_count': int,
526 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
527 'age_limit': 0,
1c54a98e 528 '_old_archive_ids': ['twitter 665052190608723968'],
b703ebee 529 },
0ae937a7
YCH
530 }, {
531 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
532 'info_dict': {
533 'id': '705235433198714880',
534 'ext': 'mp4',
18ca61c5
RA
535 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
536 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
537 'uploader_id': 'BTNBrentYarina',
538 'uploader': 'Brent Yarina',
18ca61c5
RA
539 'timestamp': 1456976204,
540 'upload_date': '20160303',
13b2ae29
SS
541 'uploader_url': 'https://twitter.com/BTNBrentYarina',
542 'comment_count': int,
543 'repost_count': int,
544 'like_count': int,
545 'tags': [],
546 'age_limit': 0,
0ae937a7
YCH
547 },
548 'params': {
549 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
550 # Test case of TwitterCardIE
551 'skip_download': True,
552 },
352e7d98 553 'skip': 'Dead external link',
03879ff0
YCH
554 }, {
555 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 556 'info_dict': {
13b2ae29
SS
557 'id': '700207414000242688',
558 'display_id': '700207533655363584',
03879ff0 559 'ext': 'mp4',
13b2ae29 560 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 561 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 562 'thumbnail': r're:^https?://.*\.jpg',
55f18333 563 'channel_id': '1383165541',
13b2ae29
SS
564 'uploader': 'jaydin donte geer',
565 'uploader_id': 'jaydingeer',
3b65a6fb 566 'duration': 30.0,
18ca61c5
RA
567 'timestamp': 1455777459,
568 'upload_date': '20160218',
13b2ae29 569 'uploader_url': 'https://twitter.com/jaydingeer',
b03fa783 570 'comment_count': int,
571 'repost_count': int,
13b2ae29
SS
572 'like_count': int,
573 'tags': ['Damndaniel'],
574 'age_limit': 0,
1c54a98e 575 '_old_archive_ids': ['twitter 700207533655363584'],
03879ff0 576 },
395fd4b0
YCH
577 }, {
578 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
579 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
580 'info_dict': {
581 'id': 'MIOxnrUteUd',
582 'ext': 'mp4',
18ca61c5
RA
583 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
584 'uploader': 'TAKUMA',
585 'uploader_id': '1004126642786242560',
3615bfe1 586 'timestamp': 1402826626,
395fd4b0 587 'upload_date': '20140615',
13b2ae29
SS
588 'thumbnail': r're:^https?://.*\.jpg',
589 'alt_title': 'Vine by TAKUMA',
590 'comment_count': int,
591 'repost_count': int,
592 'like_count': int,
593 'view_count': int,
395fd4b0
YCH
594 },
595 'add_ie': ['Vine'],
36b7d9db
YCH
596 }, {
597 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 598 'info_dict': {
13b2ae29
SS
599 'id': '717462543795523584',
600 'display_id': '719944021058060289',
36b7d9db
YCH
601 'ext': 'mp4',
602 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5 603 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
55f18333 604 'channel_id': '701615052',
18ca61c5 605 'uploader_id': 'CaptainAmerica',
36b7d9db 606 'uploader': 'Captain America',
3b65a6fb 607 'duration': 3.17,
18ca61c5
RA
608 'timestamp': 1460483005,
609 'upload_date': '20160412',
13b2ae29
SS
610 'uploader_url': 'https://twitter.com/CaptainAmerica',
611 'thumbnail': r're:^https?://.*\.jpg',
b03fa783 612 'comment_count': int,
613 'repost_count': int,
13b2ae29
SS
614 'like_count': int,
615 'tags': [],
616 'age_limit': 0,
1c54a98e 617 '_old_archive_ids': ['twitter 719944021058060289'],
36b7d9db 618 },
f0bc5a86
YCH
619 }, {
620 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
621 'info_dict': {
622 'id': '1zqKVVlkqLaKB',
623 'ext': 'mp4',
18ca61c5 624 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 625 'upload_date': '20160923',
18ca61c5
RA
626 'uploader_id': '1PmKqpJdOJQoY',
627 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 628 'timestamp': 1474613214,
13b2ae29 629 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
630 },
631 'add_ie': ['Periscope'],
1c54a98e 632 'skip': 'Broadcast not found',
2edfd745
YCH
633 }, {
634 # has mp4 formats via mobile API
635 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
636 'info_dict': {
6014355c 637 'id': '852077943283097602',
2edfd745
YCH
638 'ext': 'mp4',
639 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 640 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
55f18333 641 'channel_id': '2526757026',
2edfd745
YCH
642 'uploader': 'عالم الأخبار',
643 'uploader_id': 'news_al3alm',
3b65a6fb 644 'duration': 277.4,
18ca61c5
RA
645 'timestamp': 1492000653,
646 'upload_date': '20170412',
6014355c 647 'display_id': '852138619213144067',
648 'age_limit': 0,
649 'uploader_url': 'https://twitter.com/news_al3alm',
650 'thumbnail': r're:^https?://.*\.jpg',
651 'tags': [],
652 'repost_count': int,
6014355c 653 'like_count': int,
654 'comment_count': int,
1c54a98e 655 '_old_archive_ids': ['twitter 852138619213144067'],
2edfd745 656 },
5c1452e8
GF
657 }, {
658 'url': 'https://twitter.com/i/web/status/910031516746514432',
659 'info_dict': {
13b2ae29
SS
660 'id': '910030238373089285',
661 'display_id': '910031516746514432',
5c1452e8
GF
662 'ext': 'mp4',
663 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
664 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 665 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
55f18333 666 'channel_id': '2319432498',
5c1452e8
GF
667 'uploader': 'Préfet de Guadeloupe',
668 'uploader_id': 'Prefet971',
669 'duration': 47.48,
18ca61c5
RA
670 'timestamp': 1505803395,
671 'upload_date': '20170919',
13b2ae29 672 'uploader_url': 'https://twitter.com/Prefet971',
b03fa783 673 'comment_count': int,
674 'repost_count': int,
13b2ae29
SS
675 'like_count': int,
676 'tags': ['Maria'],
677 'age_limit': 0,
1c54a98e 678 '_old_archive_ids': ['twitter 910031516746514432'],
5c1452e8
GF
679 },
680 'params': {
681 'skip_download': True, # requires ffmpeg
682 },
2593725a
S
683 }, {
684 # card via api.twitter.com/1.1/videos/tweet/config
685 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
686 'info_dict': {
13b2ae29
SS
687 'id': '1001551417340022785',
688 'display_id': '1001551623938805763',
2593725a
S
689 'ext': 'mp4',
690 'title': 're:.*?Shep is on a roll today.*?',
691 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 692 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
55f18333 693 'channel_id': '255036353',
2593725a
S
694 'uploader': 'Lis Power',
695 'uploader_id': 'LisPower1',
696 'duration': 111.278,
18ca61c5
RA
697 'timestamp': 1527623489,
698 'upload_date': '20180529',
13b2ae29 699 'uploader_url': 'https://twitter.com/LisPower1',
b03fa783 700 'comment_count': int,
701 'repost_count': int,
13b2ae29
SS
702 'like_count': int,
703 'tags': [],
704 'age_limit': 0,
1c54a98e 705 '_old_archive_ids': ['twitter 1001551623938805763'],
2593725a
S
706 },
707 'params': {
708 'skip_download': True, # requires ffmpeg
709 },
b7ef93f0
S
710 }, {
711 'url': 'https://twitter.com/foobar/status/1087791357756956680',
712 'info_dict': {
13b2ae29
SS
713 'id': '1087791272830607360',
714 'display_id': '1087791357756956680',
b7ef93f0 715 'ext': 'mp4',
6014355c 716 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
b7ef93f0 717 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 718 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
6014355c 719 'uploader': 'X',
720 'uploader_id': 'X',
b7ef93f0 721 'duration': 61.567,
18ca61c5
RA
722 'timestamp': 1548184644,
723 'upload_date': '20190122',
6014355c 724 'uploader_url': 'https://twitter.com/X',
b03fa783 725 'comment_count': int,
726 'repost_count': int,
13b2ae29 727 'like_count': int,
b03fa783 728 'view_count': int,
13b2ae29
SS
729 'tags': [],
730 'age_limit': 0,
18ca61c5 731 },
a006ce2b 732 'skip': 'This Tweet is unavailable',
18ca61c5
RA
733 }, {
734 # not available in Periscope
735 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
736 'info_dict': {
737 'id': '1vOGwqejwoWxB',
738 'ext': 'mp4',
739 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
740 'uploader': 'Vivi',
741 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
742 'thumbnail': r're:^https?://.*\.jpg',
743 'tags': ['EduTECH2019'],
744 'view_count': int,
b7ef93f0 745 },
18ca61c5 746 'add_ie': ['TwitterBroadcast'],
a006ce2b 747 'skip': 'Broadcast no longer exists',
30a074c2 748 }, {
749 # unified card
750 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
751 'info_dict': {
13b2ae29
SS
752 'id': '1349774757969989634',
753 'display_id': '1349794411333394432',
30a074c2 754 'ext': 'mp4',
755 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
756 'thumbnail': r're:^https?://.*\.jpg',
757 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
55f18333 758 'channel_id': '18552281',
30a074c2 759 'uploader': 'Brooklyn Nets',
760 'uploader_id': 'BrooklynNets',
761 'duration': 324.484,
762 'timestamp': 1610651040,
763 'upload_date': '20210114',
13b2ae29 764 'uploader_url': 'https://twitter.com/BrooklynNets',
b03fa783 765 'comment_count': int,
766 'repost_count': int,
13b2ae29
SS
767 'like_count': int,
768 'tags': [],
769 'age_limit': 0,
1c54a98e 770 '_old_archive_ids': ['twitter 1349794411333394432'],
30a074c2 771 },
772 'params': {
773 'skip_download': True,
774 },
13b2ae29
SS
775 }, {
776 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
777 'info_dict': {
778 'id': '1577855447914409984',
779 'display_id': '1577855540407197696',
780 'ext': 'mp4',
55f18333 781 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
352e7d98 782 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 783 'upload_date': '20221006',
55f18333 784 'channel_id': '143077138',
785 'uploader': 'Oshtru',
13b2ae29
SS
786 'uploader_id': 'oshtru',
787 'uploader_url': 'https://twitter.com/oshtru',
788 'thumbnail': r're:^https?://.*\.jpg',
789 'duration': 30.03,
7a26ce26 790 'timestamp': 1665025050,
b03fa783 791 'comment_count': int,
792 'repost_count': int,
13b2ae29
SS
793 'like_count': int,
794 'tags': [],
795 'age_limit': 0,
1c54a98e 796 '_old_archive_ids': ['twitter 1577855540407197696'],
13b2ae29
SS
797 },
798 'params': {'skip_download': True},
799 }, {
800 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
801 'info_dict': {
802 'id': '1577719286659006464',
55f18333 803 'title': 'Ultima Reload - Test',
13b2ae29 804 'description': 'Test https://t.co/Y3KEZD7Dad',
55f18333 805 'channel_id': '168922496',
806 'uploader': 'Ultima Reload',
13b2ae29
SS
807 'uploader_id': 'UltimaShadowX',
808 'uploader_url': 'https://twitter.com/UltimaShadowX',
809 'upload_date': '20221005',
7a26ce26 810 'timestamp': 1664992565,
b03fa783 811 'comment_count': int,
812 'repost_count': int,
13b2ae29
SS
813 'like_count': int,
814 'tags': [],
815 'age_limit': 0,
816 },
817 'playlist_count': 4,
818 'params': {'skip_download': True},
7a26ce26
SS
819 }, {
820 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
821 'info_dict': {
822 'id': '1575559336759263233',
823 'display_id': '1575560063510810624',
824 'ext': 'mp4',
825 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
826 'thumbnail': r're:^https?://.*\.jpg',
827 'description': 'md5:95aea692fda36a12081b9629b02daa92',
55f18333 828 'channel_id': '1094109584',
7a26ce26
SS
829 'uploader': 'Max Olson',
830 'uploader_id': 'MesoMax919',
831 'uploader_url': 'https://twitter.com/MesoMax919',
832 'duration': 21.321,
833 'timestamp': 1664477766,
834 'upload_date': '20220929',
b03fa783 835 'comment_count': int,
836 'repost_count': int,
7a26ce26
SS
837 'like_count': int,
838 'tags': ['HurricaneIan'],
839 'age_limit': 0,
1c54a98e 840 '_old_archive_ids': ['twitter 1575560063510810624'],
7a26ce26
SS
841 },
842 }, {
a006ce2b 843 # Adult content, fails if not logged in
7a26ce26
SS
844 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
845 'info_dict': {
846 'id': '1575199163847000068',
847 'display_id': '1575199173472927762',
848 'ext': 'mp4',
849 'title': str,
850 'description': str,
55f18333 851 'channel_id': '1217167793541480450',
7a26ce26
SS
852 'uploader': str,
853 'uploader_id': 'Rizdraws',
854 'uploader_url': 'https://twitter.com/Rizdraws',
855 'upload_date': '20220928',
856 'timestamp': 1664391723,
16bed382 857 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
858 'like_count': int,
859 'repost_count': int,
860 'comment_count': int,
861 'age_limit': 18,
55f18333 862 'tags': [],
863 '_old_archive_ids': ['twitter 1575199173472927762'],
7a26ce26 864 },
a006ce2b 865 'params': {'skip_download': 'The media could not be played'},
147e62fc 866 'skip': 'Requires authentication',
7a26ce26 867 }, {
a006ce2b 868 # Playlist result only with graphql API
7a26ce26
SS
869 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
870 'playlist_mincount': 2,
871 'info_dict': {
872 'id': '1395079556562706435',
873 'title': str,
874 'tags': [],
55f18333 875 'channel_id': '21539378',
7a26ce26
SS
876 'uploader': str,
877 'like_count': int,
878 'upload_date': '20210519',
879 'age_limit': 0,
880 'repost_count': int,
147e62fc 881 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
7a26ce26
SS
882 'uploader_id': 'Srirachachau',
883 'comment_count': int,
884 'uploader_url': 'https://twitter.com/Srirachachau',
885 'timestamp': 1621447860,
886 },
887 }, {
7a26ce26
SS
888 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
889 'playlist_mincount': 2,
890 'info_dict': {
891 'id': '1578353380363501568',
892 'title': str,
55f18333 893 'channel_id': '2195866214',
7a26ce26
SS
894 'uploader_id': 'DavidToons_',
895 'repost_count': int,
896 'like_count': int,
897 'uploader': str,
898 'timestamp': 1665143744,
899 'uploader_url': 'https://twitter.com/DavidToons_',
147e62fc 900 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
7a26ce26
SS
901 'tags': [],
902 'comment_count': int,
903 'upload_date': '20221007',
904 'age_limit': 0,
905 },
906 }, {
907 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
908 'playlist_count': 2,
909 'info_dict': {
910 'id': '1578401165338976258',
911 'title': str,
912 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
55f18333 913 'channel_id': '19338359',
7a26ce26
SS
914 'uploader': str,
915 'uploader_id': 'primevideouk',
916 'timestamp': 1665155137,
917 'upload_date': '20221007',
918 'age_limit': 0,
919 'uploader_url': 'https://twitter.com/primevideouk',
b03fa783 920 'comment_count': int,
921 'repost_count': int,
7a26ce26
SS
922 'like_count': int,
923 'tags': ['TheRingsOfPower'],
924 },
925 }, {
926 # Twitter Spaces
927 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
928 'info_dict': {
929 'id': '1lPJqmBeeNAJb',
930 'ext': 'm4a',
931 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
932 'uploader': r're:Monique Camarra.+?',
933 'uploader_id': 'MoniqueCamarra',
934 'live_status': 'was_live',
1c16d9df 935 'release_timestamp': 1658417414,
a006ce2b 936 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
1cffd621 937 'timestamp': 1658407771,
938 'release_date': '20220721',
939 'upload_date': '20220721',
7a26ce26
SS
940 },
941 'add_ie': ['TwitterSpaces'],
942 'params': {'skip_download': 'm3u8'},
92315c03 943 'skip': 'Requires authentication',
16bed382 944 }, {
945 # URL specifies video number but --yes-playlist
946 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
947 'playlist_mincount': 2,
948 'info_dict': {
949 'id': '1600649710662213632',
950 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
951 'timestamp': 1670459604.0,
952 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
b03fa783 953 'comment_count': int,
16bed382 954 'uploader_id': 'CTVJLaidlaw',
55f18333 955 'channel_id': '80082014',
b03fa783 956 'repost_count': int,
16bed382 957 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
958 'upload_date': '20221208',
959 'age_limit': 0,
960 'uploader': 'Jocelyn Laidlaw',
961 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
962 'like_count': int,
963 },
964 }, {
965 # URL specifies video number and --no-playlist
966 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
967 'info_dict': {
968 'id': '1600649511827013632',
969 'ext': 'mp4',
147e62fc 970 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
16bed382 971 'thumbnail': r're:^https?://.+\.jpg',
972 'timestamp': 1670459604.0,
55f18333 973 'channel_id': '80082014',
16bed382 974 'uploader_id': 'CTVJLaidlaw',
975 'uploader': 'Jocelyn Laidlaw',
b03fa783 976 'repost_count': int,
977 'comment_count': int,
16bed382 978 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
979 'duration': 102.226,
980 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
981 'display_id': '1600649710662213632',
982 'like_count': int,
983 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
984 'upload_date': '20221208',
985 'age_limit': 0,
1c54a98e 986 '_old_archive_ids': ['twitter 1600649710662213632'],
16bed382 987 },
988 'params': {'noplaylist': True},
7543c9c9 989 }, {
990 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
991 # note the id different between extraction and url
992 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
993 'info_dict': {
994 'id': '1621117577354424321',
995 'display_id': '1621117700482416640',
996 'ext': 'mp4',
997 'title': '뽀 - 아 최우제 이동속도 봐',
998 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
999 'duration': 24.598,
55f18333 1000 'channel_id': '1281839411068432384',
7543c9c9 1001 'uploader': '뽀',
1002 'uploader_id': 's2FAKER',
1003 'uploader_url': 'https://twitter.com/s2FAKER',
1004 'upload_date': '20230202',
1005 'timestamp': 1675339553.0,
1006 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
1007 'age_limit': 18,
1008 'tags': [],
1009 'like_count': int,
b03fa783 1010 'repost_count': int,
1011 'comment_count': int,
1c54a98e 1012 '_old_archive_ids': ['twitter 1621117700482416640'],
7543c9c9 1013 },
55f18333 1014 'skip': 'Requires authentication',
b6795fd3
SS
1015 }, {
1016 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1017 'info_dict': {
1018 'id': '1599108643743473680',
1019 'display_id': '1599108751385972737',
1020 'ext': 'mp4',
1021 'title': '\u06ea - \U0001F48B',
55f18333 1022 'channel_id': '1347791436809441283',
b6795fd3
SS
1023 'uploader_url': 'https://twitter.com/hlo_again',
1024 'like_count': int,
1025 'uploader_id': 'hlo_again',
1026 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
b03fa783 1027 'repost_count': int,
b6795fd3 1028 'duration': 9.531,
b03fa783 1029 'comment_count': int,
b6795fd3
SS
1030 'upload_date': '20221203',
1031 'age_limit': 0,
1032 'timestamp': 1670092210.0,
1033 'tags': [],
1034 'uploader': '\u06ea',
1035 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1c54a98e 1036 '_old_archive_ids': ['twitter 1599108751385972737'],
b6795fd3
SS
1037 },
1038 'params': {'noplaylist': True},
1039 }, {
b6795fd3
SS
1040 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1041 'info_dict': {
1042 'id': '1600009362759733248',
1043 'display_id': '1600009574919962625',
1044 'ext': 'mp4',
55f18333 1045 'channel_id': '211814412',
b6795fd3
SS
1046 'uploader_url': 'https://twitter.com/MunTheShinobi',
1047 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
b6795fd3
SS
1048 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1049 'age_limit': 0,
a006ce2b 1050 'uploader': 'Mün',
b03fa783 1051 'repost_count': int,
b6795fd3 1052 'upload_date': '20221206',
a006ce2b 1053 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
b03fa783 1054 'comment_count': int,
b6795fd3
SS
1055 'like_count': int,
1056 'tags': [],
1057 'uploader_id': 'MunTheShinobi',
1058 'duration': 139.987,
1059 'timestamp': 1670306984.0,
1c54a98e 1060 '_old_archive_ids': ['twitter 1600009574919962625'],
b6795fd3 1061 },
cf605226 1062 }, {
a006ce2b 1063 # retweeted_status (private)
cf605226 1064 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1065 'info_dict': {
1066 'id': '1623274794488659969',
1067 'display_id': '1623739803874349067',
1068 'ext': 'mp4',
1069 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
92315c03 1070 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
cf605226 1071 'uploader': 'Johnny Bullets',
1072 'uploader_id': 'Johnnybull3ts',
1073 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1074 'age_limit': 0,
1075 'tags': [],
1076 'duration': 8.033,
1077 'timestamp': 1675853859.0,
1078 'upload_date': '20230208',
1079 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1080 'like_count': int,
b03fa783 1081 'repost_count': int,
cf605226 1082 },
6014355c 1083 'skip': 'Protected tweet',
92315c03 1084 }, {
a006ce2b 1085 # retweeted_status
1086 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
92315c03 1087 'info_dict': {
a006ce2b 1088 'id': '1694928337846538240',
92315c03 1089 'ext': 'mp4',
a006ce2b 1090 'display_id': '1695424220702888009',
1091 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1092 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
55f18333 1093 'channel_id': '15212187',
a006ce2b 1094 'uploader': 'Benny Johnson',
1095 'uploader_id': 'bennyjohnson',
1096 'uploader_url': 'https://twitter.com/bennyjohnson',
92315c03 1097 'age_limit': 0,
1098 'tags': [],
a006ce2b 1099 'duration': 45.001,
1100 'timestamp': 1692962814.0,
1101 'upload_date': '20230825',
1102 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
92315c03 1103 'like_count': int,
92315c03 1104 'repost_count': int,
1105 'comment_count': int,
1c54a98e 1106 '_old_archive_ids': ['twitter 1695424220702888009'],
92315c03 1107 },
a006ce2b 1108 }, {
1109 # retweeted_status w/ legacy API
1110 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1111 'info_dict': {
1112 'id': '1694928337846538240',
1113 'ext': 'mp4',
1114 'display_id': '1695424220702888009',
1115 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1116 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
55f18333 1117 'channel_id': '15212187',
a006ce2b 1118 'uploader': 'Benny Johnson',
1119 'uploader_id': 'bennyjohnson',
1120 'uploader_url': 'https://twitter.com/bennyjohnson',
1121 'age_limit': 0,
1122 'tags': [],
1123 'duration': 45.001,
1124 'timestamp': 1692962814.0,
1125 'upload_date': '20230825',
1126 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1127 'like_count': int,
1128 'repost_count': int,
1c54a98e 1129 '_old_archive_ids': ['twitter 1695424220702888009'],
a006ce2b 1130 },
1131 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1132 }, {
1133 # Broadcast embedded in tweet
1c54a98e 1134 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
a006ce2b 1135 'info_dict': {
1c54a98e 1136 'id': '1rmxPMjLzAXKN',
a006ce2b 1137 'ext': 'mp4',
1c54a98e 1138 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
a006ce2b 1139 'uploader': 'Jessica Dobson',
1c54a98e 1140 'uploader_id': 'JessicaDobsonWX',
1141 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1142 'timestamp': 1701566398,
1143 'upload_date': '20231203',
1144 'live_status': 'was_live',
1145 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1146 'concurrent_view_count': int,
a006ce2b 1147 'view_count': int,
1148 },
1149 'add_ie': ['TwitterBroadcast'],
1150 }, {
55f18333 1151 # Animated gif and quote tweet video
a006ce2b 1152 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1153 'playlist_mincount': 2,
1154 'info_dict': {
1155 'id': '1696256659889565950',
1156 'title': 'BAKOON - https://t.co/zom968d0a0',
1157 'description': 'https://t.co/zom968d0a0',
1158 'tags': [],
55f18333 1159 'channel_id': '1263540390',
a006ce2b 1160 'uploader': 'BAKOON',
1161 'uploader_id': 'BAKKOOONN',
1162 'uploader_url': 'https://twitter.com/BAKKOOONN',
1163 'age_limit': 18,
1164 'timestamp': 1693254077.0,
1165 'upload_date': '20230828',
1166 'like_count': int,
55f18333 1167 'comment_count': int,
1168 'repost_count': int,
a006ce2b 1169 },
55f18333 1170 'skip': 'Requires authentication',
1c54a98e 1171 }, {
1172 # "stale tweet" with typename "TweetWithVisibilityResults"
1173 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
55f18333 1174 'md5': '511377ff8dfa7545307084dca4dce319',
1c54a98e 1175 'info_dict': {
1176 'id': '1724883339285544960',
1177 'ext': 'mp4',
1178 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1179 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1180 'display_id': '1724884212803834154',
55f18333 1181 'channel_id': '337808606',
1c54a98e 1182 'uploader': 'Robert F. Kennedy Jr',
1183 'uploader_id': 'RobertKennedyJr',
1184 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1185 'upload_date': '20231115',
1186 'timestamp': 1700079417.0,
1187 'duration': 341.048,
1188 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1189 'tags': ['Kennedy24'],
1190 'repost_count': int,
1191 'like_count': int,
1192 'comment_count': int,
1193 'age_limit': 0,
1194 '_old_archive_ids': ['twitter 1724884212803834154'],
1195 },
4813173e 1196 }, {
1197 # x.com
1198 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1199 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1200 'info_dict': {
1201 'id': '1790637589910654976',
1202 'ext': 'mp4',
1203 'title': 'Historic Vids - One of the most intense moments in history',
1204 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1205 'display_id': '1790637656616943991',
1206 'uploader': 'Historic Vids',
1207 'uploader_id': 'historyinmemes',
1208 'uploader_url': 'https://twitter.com/historyinmemes',
1209 'channel_id': '855481986290524160',
1210 'upload_date': '20240515',
1211 'timestamp': 1715756260.0,
1212 'duration': 15.488,
1213 'tags': [],
1214 'comment_count': int,
1215 'repost_count': int,
1216 'like_count': int,
1217 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1218 'age_limit': 0,
1219 '_old_archive_ids': ['twitter 1790637656616943991'],
add96eb9 1220 },
82fb2357 1221 }, {
1222 # onion route
1223 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1224 'only_matching': True,
18ca61c5
RA
1225 }, {
1226 # Twitch Clip Embed
1227 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1228 'only_matching': True,
10a5091e
RA
1229 }, {
1230 # promo_video_website card
1231 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1232 'only_matching': True,
00dd0cd5 1233 }, {
1234 # promo_video_convo card
1235 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1236 'only_matching': True,
1237 }, {
1238 # appplayer card
1239 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1240 'only_matching': True,
30a074c2 1241 }, {
1242 # video_direct_message card
1243 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1244 'only_matching': True,
1245 }, {
1246 # poll2choice_video card
1247 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1248 'only_matching': True,
1249 }, {
1250 # poll3choice_video card
1251 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1252 'only_matching': True,
1253 }, {
1254 # poll4choice_video card
1255 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1256 'only_matching': True,
cf5881fc 1257 }]
f57f84f6 1258
a006ce2b 1259 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1260
1261 @property
1262 def _GRAPHQL_ENDPOINT(self):
1263 if self.is_logged_in:
1264 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1265 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1266
7a26ce26
SS
1267 def _graphql_to_legacy(self, data, twid):
1268 result = traverse_obj(data, (
1269 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1270 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
92315c03 1271 'tweet_results', 'result', ('tweet', None), {dict},
1272 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1273 data, ('tweetResult', 'result', {dict}), default={})
7a26ce26 1274
1c54a98e 1275 typename = result.get('__typename')
1276 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1277 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
7543c9c9 1278
7a26ce26 1279 if 'tombstone' in result:
147e62fc 1280 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
7a26ce26 1281 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1c54a98e 1282 elif typename == 'TweetUnavailable':
92315c03 1283 reason = result.get('reason')
1284 if reason == 'NsfwLoggedOut':
1285 self.raise_login_required('NSFW tweet requires authentication')
6014355c 1286 elif reason == 'Protected':
1287 self.raise_login_required('You are not authorized to view this protected tweet')
92315c03 1288 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1c54a98e 1289 # Result for "stale tweet" needs additional transformation
1290 elif typename == 'TweetWithVisibilityResults':
1291 result = traverse_obj(result, ('tweet', {dict})) or {}
7a26ce26
SS
1292
1293 status = result.get('legacy', {})
1294 status.update(traverse_obj(result, {
1295 'user': ('core', 'user_results', 'result', 'legacy'),
1296 'card': ('card', 'legacy'),
1297 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
a006ce2b 1298 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
7a26ce26
SS
1299 }, expected_type=dict, default={}))
1300
a006ce2b 1301 # extra transformations needed since result does not match legacy format
1302 if status.get('retweeted_status'):
1303 status['retweeted_status']['user'] = traverse_obj(status, (
1304 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1305
7a26ce26
SS
1306 binding_values = {
1307 binding_value.get('key'): binding_value.get('value')
147e62fc 1308 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
7a26ce26
SS
1309 }
1310 if binding_values:
1311 status['card']['binding_values'] = binding_values
1312
1313 return status
1314
1315 def _build_graphql_query(self, media_id):
1316 return {
1317 'variables': {
1318 'focalTweetId': media_id,
1319 'includePromotedContent': True,
1320 'with_rux_injections': False,
1321 'withBirdwatchNotes': True,
1322 'withCommunity': True,
1323 'withDownvotePerspective': False,
1324 'withQuickPromoteEligibilityTweetFields': True,
1325 'withReactionsMetadata': False,
1326 'withReactionsPerspective': False,
1327 'withSuperFollowsTweetFields': True,
1328 'withSuperFollowsUserFields': True,
1329 'withV2Timeline': True,
1330 'withVoice': True,
1331 },
1332 'features': {
1333 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1334 'interactive_text_enabled': True,
1335 'responsive_web_edit_tweet_api_enabled': True,
1336 'responsive_web_enhance_cards_enabled': True,
1337 'responsive_web_graphql_timeline_navigation_enabled': False,
1338 'responsive_web_text_conversations_enabled': False,
1339 'responsive_web_uc_gql_enabled': True,
1340 'standardized_nudges_misinfo': True,
1341 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1342 'tweetypie_unmention_optimization_enabled': True,
1343 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1344 'verified_phone_label_enabled': False,
1345 'vibe_api_enabled': True,
1346 },
92315c03 1347 } if self.is_logged_in else {
1348 'variables': {
1349 'tweetId': media_id,
1350 'withCommunity': False,
1351 'includePromotedContent': False,
1352 'withVoice': False,
1353 },
1354 'features': {
1355 'creator_subscriptions_tweet_preview_api_enabled': True,
1356 'tweetypie_unmention_optimization_enabled': True,
1357 'responsive_web_edit_tweet_api_enabled': True,
1358 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1359 'view_counts_everywhere_api_enabled': True,
1360 'longform_notetweets_consumption_enabled': True,
1361 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1362 'tweet_awards_web_tipping_enabled': False,
1363 'freedom_of_speech_not_reach_fetch_enabled': True,
1364 'standardized_nudges_misinfo': True,
1365 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1366 'longform_notetweets_rich_text_read_enabled': True,
1367 'longform_notetweets_inline_media_enabled': True,
1368 'responsive_web_graphql_exclude_directive_enabled': True,
1369 'verified_phone_label_enabled': False,
1370 'responsive_web_media_download_video_enabled': False,
1371 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1372 'responsive_web_graphql_timeline_navigation_enabled': True,
add96eb9 1373 'responsive_web_enhance_cards_enabled': False,
92315c03 1374 },
1375 'fieldToggles': {
add96eb9 1376 'withArticleRichContentState': False,
1377 },
7a26ce26
SS
1378 }
1379
116c2684 1380 def _call_syndication_api(self, twid):
1381 self.report_warning(
1382 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1383 status = self._download_json(
1384 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1385 headers={'User-Agent': 'Googlebot'}, query={
1386 'id': twid,
1387 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1388 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
a006ce2b 1389 })
116c2684 1390 if not status:
1391 raise ExtractorError('Syndication endpoint returned empty JSON response')
1392 # Transform the result so its structure matches that of legacy/graphql
1393 media = []
1394 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1395 detail['id_str'] = traverse_obj(detail, (
1396 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1397 media.append(detail)
1398 status['extended_entities'] = {'media': media}
1399
1400 return status
6014355c 1401
116c2684 1402 def _extract_status(self, twid):
1403 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1404 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1405
1406 try:
1407 if self.is_logged_in or self._selected_api == 'graphql':
1408 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1409 elif self._selected_api == 'legacy':
1410 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1411 'cards_platform': 'Web-12',
1412 'include_cards': 1,
1413 'include_reply_count': 1,
1414 'include_user_entities': 0,
1415 'tweet_mode': 'extended',
a006ce2b 1416 })
116c2684 1417 except ExtractorError as e:
1418 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1419 raise
1420 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1421 status = self._call_syndication_api(twid)
6014355c 1422
116c2684 1423 if self._selected_api == 'syndication':
1424 status = self._call_syndication_api(twid)
a006ce2b 1425
1426 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
6014355c 1427
1428 def _real_extract(self, url):
1429 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1430 status = self._extract_status(twid)
575036b4 1431
92315c03 1432 title = description = traverse_obj(
1433 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
575036b4 1434 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 1435 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
1436 user = status.get('user') or {}
1437 uploader = user.get('name')
1438 if uploader:
7a26ce26 1439 title = f'{uploader} - {title}'
18ca61c5
RA
1440 uploader_id = user.get('screen_name')
1441
cf5881fc 1442 info = {
18ca61c5
RA
1443 'id': twid,
1444 'title': title,
1445 'description': description,
1446 'uploader': uploader,
1447 'timestamp': unified_timestamp(status.get('created_at')),
55f18333 1448 'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
18ca61c5 1449 'uploader_id': uploader_id,
a70635b8 1450 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
1451 'like_count': int_or_none(status.get('favorite_count')),
1452 'repost_count': int_or_none(status.get('retweet_count')),
1453 'comment_count': int_or_none(status.get('reply_count')),
1454 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 1455 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 1456 }
cf5881fc 1457
30a074c2 1458 def extract_from_video_info(media):
a006ce2b 1459 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
13b2ae29 1460 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
1461
1462 formats = []
4bed4363 1463 subtitles = {}
92315c03 1464 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
4bed4363
F
1465 fmts, subs = self._extract_variant_formats(variant, twid)
1466 subtitles = self._merge_subtitles(subtitles, subs)
1467 formats.extend(fmts)
18ca61c5
RA
1468
1469 thumbnails = []
1470 media_url = media.get('media_url_https') or media.get('media_url')
1471 if media_url:
1472 def add_thumbnail(name, size):
1473 thumbnails.append({
1474 'id': name,
1475 'url': update_url_query(media_url, {'name': name}),
1476 'width': int_or_none(size.get('w') or size.get('width')),
1477 'height': int_or_none(size.get('h') or size.get('height')),
1478 })
1479 for name, size in media.get('sizes', {}).items():
1480 add_thumbnail(name, size)
1481 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1482
13b2ae29 1483 return {
b03fa783 1484 'id': media_id,
18ca61c5 1485 'formats': formats,
4bed4363 1486 'subtitles': subtitles,
18ca61c5 1487 'thumbnails': thumbnails,
1c54a98e 1488 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
92315c03 1489 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
e7d22348 1490 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1491 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
13b2ae29 1492 }
30a074c2 1493
13b2ae29
SS
1494 def extract_from_card_info(card):
1495 if not card:
1496 return
1497
1498 self.write_debug(f'Extracting from card info: {card.get("url")}')
1499 binding_values = card['binding_values']
1500
1501 def get_binding_value(k):
1502 o = binding_values.get(k) or {}
1503 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1504
1505 card_name = card['name'].split(':')[-1]
1506 if card_name == 'player':
7a26ce26 1507 yield {
13b2ae29
SS
1508 '_type': 'url',
1509 'url': get_binding_value('player_url'),
1510 }
1511 elif card_name == 'periscope_broadcast':
7a26ce26 1512 yield {
13b2ae29
SS
1513 '_type': 'url',
1514 'url': get_binding_value('url') or get_binding_value('player_url'),
1515 'ie_key': PeriscopeIE.ie_key(),
1516 }
1517 elif card_name == 'broadcast':
7a26ce26 1518 yield {
13b2ae29
SS
1519 '_type': 'url',
1520 'url': get_binding_value('broadcast_url'),
1521 'ie_key': TwitterBroadcastIE.ie_key(),
1522 }
7a26ce26
SS
1523 elif card_name == 'audiospace':
1524 yield {
1525 '_type': 'url',
1526 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1527 'ie_key': TwitterSpacesIE.ie_key(),
1528 }
13b2ae29 1529 elif card_name == 'summary':
7a26ce26 1530 yield {
18ca61c5 1531 '_type': 'url',
13b2ae29
SS
1532 'url': get_binding_value('card_url'),
1533 }
1534 elif card_name == 'unified_card':
7a26ce26
SS
1535 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1536 yield from map(extract_from_video_info, traverse_obj(
1537 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1538 # amplify, promo_video_website, promo_video_convo, appplayer,
1539 # video_direct_message, poll2choice_video, poll3choice_video,
1540 # poll4choice_video, ...
1541 else:
1542 is_amplify = card_name == 'amplify'
1543 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1544 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1545 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1546
1547 thumbnails = []
1548 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1549 image = get_binding_value('player_image' + suffix) or {}
1550 image_url = image.get('url')
1551 if not image_url or '/player-placeholder' in image_url:
1552 continue
1553 thumbnails.append({
1554 'id': suffix[1:] if suffix else 'medium',
1555 'url': image_url,
1556 'width': int_or_none(image.get('width')),
1557 'height': int_or_none(image.get('height')),
1558 })
1559
7a26ce26 1560 yield {
13b2ae29
SS
1561 'formats': formats,
1562 'subtitles': subtitles,
1563 'thumbnails': thumbnails,
1564 'duration': int_or_none(get_binding_value(
1565 'content_duration_seconds')),
1566 }
1567
b6795fd3 1568 videos = traverse_obj(status, (
b03fa783 1569 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1570
b6795fd3
SS
1571 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1572 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1573 else:
92315c03 1574 desired_obj = traverse_obj(status, (
1575 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
b6795fd3
SS
1576 if not desired_obj:
1577 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1578 elif desired_obj.get('type') != 'video':
1579 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1580
1581 # Restore original archive id and video index in title
1582 for index, entry in enumerate(videos, 1):
1583 if entry.get('id') != desired_obj.get('id'):
1584 continue
1585 if index == 1:
1586 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1587 if len(videos) != 1:
1588 info['title'] += f' #{index}'
1589 break
1590
1591 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1592
1593 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1594 if not entries:
1595 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1596 if not expanded_url or expanded_url == url:
147e62fc 1597 self.raise_no_formats('No video could be found in this tweet', expected=True)
1598 return info
13b2ae29
SS
1599
1600 return self.url_result(expanded_url, display_id=twid, **info)
1601
1602 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1603
1604 if len(entries) == 1:
1605 return entries[0]
1606
1607 for index, entry in enumerate(entries, 1):
1608 entry['title'] += f' #{index}'
1609
1610 return self.playlist_result(entries, **info)
445d72b8
YCH
1611
1612
1613class TwitterAmplifyIE(TwitterBaseIE):
1614 IE_NAME = 'twitter:amplify'
25042f73 1615 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1616
1617 _TEST = {
1618 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1619 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1620 'info_dict': {
1621 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1622 'ext': 'mp4',
1623 'title': 'Twitter Video',
bdbf4ba4 1624 'thumbnail': 're:^https?://.*',
445d72b8 1625 },
7a26ce26 1626 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1627 }
1628
1629 def _real_extract(self, url):
1630 video_id = self._match_id(url)
1631 webpage = self._download_webpage(url, video_id)
1632
1633 vmap_url = self._html_search_meta(
1634 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1635 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1636
bdbf4ba4
YCH
1637 thumbnails = []
1638 thumbnail = self._html_search_meta(
1639 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1640
1641 def _find_dimension(target):
1642 w = int_or_none(self._html_search_meta(
add96eb9 1643 f'twitter:{target}:width', webpage, fatal=False))
bdbf4ba4 1644 h = int_or_none(self._html_search_meta(
add96eb9 1645 f'twitter:{target}:height', webpage, fatal=False))
bdbf4ba4
YCH
1646 return w, h
1647
1648 if thumbnail:
1649 thumbnail_w, thumbnail_h = _find_dimension('image')
1650 thumbnails.append({
1651 'url': thumbnail,
1652 'width': thumbnail_w,
1653 'height': thumbnail_h,
1654 })
1655
1656 video_w, video_h = _find_dimension('player')
9be31e77 1657 formats[0].update({
bdbf4ba4
YCH
1658 'width': video_w,
1659 'height': video_h,
9be31e77 1660 })
bdbf4ba4 1661
445d72b8
YCH
1662 return {
1663 'id': video_id,
1664 'title': 'Twitter Video',
bdbf4ba4
YCH
1665 'formats': formats,
1666 'thumbnails': thumbnails,
445d72b8 1667 }
18ca61c5
RA
1668
1669
1670class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1671 IE_NAME = 'twitter:broadcast'
1672 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1673
7d337ca9 1674 _TESTS = [{
7b0b53ea
S
1675 # untitled Periscope video
1676 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1677 'info_dict': {
1678 'id': '1yNGaQLWpejGj',
1679 'ext': 'mp4',
1680 'title': 'Andrea May Sahouri - Periscope Broadcast',
1681 'uploader': 'Andrea May Sahouri',
7d337ca9
H
1682 'uploader_id': 'andreamsahouri',
1683 'uploader_url': 'https://twitter.com/andreamsahouri',
1684 'timestamp': 1590973638,
1685 'upload_date': '20200601',
7a26ce26
SS
1686 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1687 'view_count': int,
7b0b53ea 1688 },
7d337ca9
H
1689 }, {
1690 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1691 'info_dict': {
1692 'id': '1ZkKzeyrPbaxv',
1693 'ext': 'mp4',
1694 'title': 'Starship | SN10 | High-Altitude Flight Test',
1695 'uploader': 'SpaceX',
1696 'uploader_id': 'SpaceX',
1697 'uploader_url': 'https://twitter.com/SpaceX',
1698 'timestamp': 1614812942,
1699 'upload_date': '20210303',
1700 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1701 'view_count': int,
1702 },
1703 }, {
1704 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1705 'info_dict': {
1706 'id': '1OyKAVQrgzwGb',
1707 'ext': 'mp4',
1708 'title': 'Starship Flight Test',
1709 'uploader': 'SpaceX',
1710 'uploader_id': 'SpaceX',
1711 'uploader_url': 'https://twitter.com/SpaceX',
1712 'timestamp': 1681993964,
1713 'upload_date': '20230420',
1714 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1715 'view_count': int,
1716 },
1717 }]
7b0b53ea 1718
18ca61c5
RA
1719 def _real_extract(self, url):
1720 broadcast_id = self._match_id(url)
1721 broadcast = self._call_api(
1722 'broadcasts/show.json', broadcast_id,
1723 {'ids': broadcast_id})['broadcasts'][broadcast_id]
a006ce2b 1724 if not broadcast:
1725 raise ExtractorError('Broadcast no longer exists', expected=True)
18ca61c5 1726 info = self._parse_broadcast_data(broadcast, broadcast_id)
7d337ca9
H
1727 info['title'] = broadcast.get('status') or info.get('title')
1728 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1729 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
f6e97090 1730 if info['live_status'] == 'is_upcoming':
1731 return info
1732
18ca61c5
RA
1733 media_key = broadcast['media_key']
1734 source = self._call_api(
7a26ce26 1735 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1736 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1737 if '/live_video_stream/geoblocked/' in m3u8_url:
1738 self.raise_geo_restricted()
add96eb9 1739 m3u8_id = urllib.parse.parse_qs(urllib.parse.urlparse(
18ca61c5
RA
1740 m3u8_url).query).get('type', [None])[0]
1741 state, width, height = self._extract_common_format_info(broadcast)
1742 info['formats'] = self._extract_pscp_m3u8_formats(
1743 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1744 return info
86b868c6
U
1745
1746
7a26ce26
SS
1747class TwitterSpacesIE(TwitterBaseIE):
1748 IE_NAME = 'twitter:spaces'
1749 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1750
1751 _TESTS = [{
1752 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1753 'info_dict': {
1754 'id': '1RDxlgyvNXzJL',
1755 'ext': 'm4a',
1756 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1757 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1758 'uploader': r're:Lucio Di Gaetano.*?',
1759 'uploader_id': 'luciodigaetano',
1760 'live_status': 'was_live',
1cffd621 1761 'timestamp': 1659877956,
1762 'upload_date': '20220807',
1763 'release_timestamp': 1659904215,
1764 'release_date': '20220807',
7a26ce26
SS
1765 },
1766 'params': {'skip_download': 'm3u8'},
613dbce1 1767 }, {
1768 # post_live/TimedOut but downloadable
1769 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1770 'info_dict': {
1771 'id': '1vAxRAVQWONJl',
1772 'ext': 'm4a',
1773 'title': 'Framing Up FinOps: Billing Tools',
1774 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1775 'uploader': 'Google Cloud',
1776 'uploader_id': 'googlecloud',
1777 'live_status': 'post_live',
1778 'timestamp': 1681409554,
1779 'upload_date': '20230413',
1780 'release_timestamp': 1681839000,
1781 'release_date': '20230418',
1782 },
1783 'params': {'skip_download': 'm3u8'},
1784 }, {
1785 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1786 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1787 'info_dict': {
1788 'id': '1eaKbrQbjoRKX',
1789 'ext': 'm4a',
1790 'title': 'あ',
1791 'description': 'Twitter Space participated by nobody yet',
1792 'uploader': '息根とめる🔪Twitchで復活',
1793 'uploader_id': 'tomeru_ikinone',
1794 'live_status': 'was_live',
1795 'timestamp': 1685617198,
1796 'upload_date': '20230601',
1797 },
1798 'params': {'skip_download': 'm3u8'},
7a26ce26
SS
1799 }]
1800
1801 SPACE_STATUS = {
1802 'notstarted': 'is_upcoming',
1803 'ended': 'was_live',
1804 'running': 'is_live',
1805 'timedout': 'post_live',
1806 }
1807
1808 def _build_graphql_query(self, space_id):
1809 return {
1810 'variables': {
1811 'id': space_id,
1812 'isMetatagsQuery': True,
1813 'withDownvotePerspective': False,
1814 'withReactionsMetadata': False,
1815 'withReactionsPerspective': False,
1816 'withReplays': True,
1817 'withSuperFollowsUserFields': True,
1818 'withSuperFollowsTweetFields': True,
1819 },
1820 'features': {
1821 'dont_mention_me_view_api_enabled': True,
1822 'interactive_text_enabled': True,
1823 'responsive_web_edit_tweet_api_enabled': True,
1824 'responsive_web_enhance_cards_enabled': True,
1825 'responsive_web_uc_gql_enabled': True,
1826 'spaces_2022_h2_clipping': True,
1827 'spaces_2022_h2_spaces_communities': False,
1828 'standardized_nudges_misinfo': True,
1829 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1830 'vibe_api_enabled': True,
1831 },
1832 }
1833
1834 def _real_extract(self, url):
1835 space_id = self._match_id(url)
92315c03 1836 if not self.is_logged_in:
1837 self.raise_login_required('Twitter Spaces require authentication')
7a26ce26
SS
1838 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1839 if not space_data:
1840 raise ExtractorError('Twitter Space not found', expected=True)
1841
1842 metadata = space_data['metadata']
1843 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1cffd621 1844 is_live = live_status == 'is_live'
7a26ce26
SS
1845
1846 formats = []
c6ef5537 1847 headers = {'Referer': 'https://twitter.com/'}
7a26ce26
SS
1848 if live_status == 'is_upcoming':
1849 self.raise_no_formats('Twitter Space not started yet', expected=True)
1cffd621 1850 elif not is_live and not metadata.get('is_space_available_for_replay'):
1851 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1852 elif metadata.get('media_key'):
1853 source = traverse_obj(
1854 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1855 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
613dbce1 1856 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1857 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
c6ef5537 1858 headers=headers, fatal=False) if source else []
7a26ce26
SS
1859 for fmt in formats:
1860 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1cffd621 1861 if not is_live:
1862 fmt['container'] = 'm4a_dash'
7a26ce26
SS
1863
1864 participants = ', '.join(traverse_obj(
1865 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1cffd621 1866
1867 if not formats and live_status == 'post_live':
1868 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1869
7a26ce26
SS
1870 return {
1871 'id': space_id,
1872 'title': metadata.get('title'),
1873 'description': f'Twitter Space participated by {participants}',
1874 'uploader': traverse_obj(
1875 metadata, ('creator_results', 'result', 'legacy', 'name')),
1876 'uploader_id': traverse_obj(
1877 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1878 'live_status': live_status,
1c16d9df
C
1879 'release_timestamp': try_call(
1880 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1cffd621 1881 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
7a26ce26 1882 'formats': formats,
c6ef5537 1883 'http_headers': headers,
7a26ce26
SS
1884 }
1885
1886
86b868c6
U
1887class TwitterShortenerIE(TwitterBaseIE):
1888 IE_NAME = 'twitter:shortener'
b634ba74 1889 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
a537ab1a 1890 _BASE_URL = 'https://t.co/'
86b868c6
U
1891
1892 def _real_extract(self, url):
5ad28e7f 1893 mobj = self._match_valid_url(url)
add96eb9 1894 eid, shortcode = mobj.group('eid', 'id')
a537ab1a 1895 if eid:
add96eb9 1896 shortcode = eid
1897 url = self._BASE_URL + shortcode
1898 new_url = self._request_webpage(url, shortcode, headers={'User-Agent': 'curl'}).url
1899 __UNSAFE_LINK = 'https://twitter.com/safety/unsafe_link_warning?unsafe_link='
a537ab1a 1900 if new_url.startswith(__UNSAFE_LINK):
add96eb9 1901 new_url = new_url.replace(__UNSAFE_LINK, '')
9e20a9c4 1902 return self.url_result(new_url)