]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[cleanup] Misc (#10075)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
e897bd82 1import functools
7a26ce26 2import json
a006ce2b 3import random
23e7cba8 4import re
add96eb9 5import urllib.parse
23e7cba8
S
6
7from .common import InfoExtractor
13b2ae29 8from .periscope import PeriscopeBaseIE, PeriscopeIE
116c2684 9from ..networking.exceptions import HTTPError
23e7cba8 10from ..utils import (
2edfd745 11 ExtractorError,
13b2ae29 12 dict_get,
92315c03 13 filter_dict,
23e7cba8 14 float_or_none,
13b2ae29 15 format_field,
cf5881fc 16 int_or_none,
6aaf96a3 17 join_nonempty,
13b2ae29 18 make_archive_id,
147e62fc 19 remove_end,
13b2ae29
SS
20 str_or_none,
21 strip_or_none,
f1150b9e 22 traverse_obj,
7a26ce26 23 try_call,
2edfd745 24 try_get,
18ca61c5
RA
25 unified_timestamp,
26 update_url_query,
41d1cca3 27 url_or_none,
2edfd745 28 xpath_text,
23e7cba8
S
29)
30
31
445d72b8 32class TwitterBaseIE(InfoExtractor):
d1795f4a 33 _NETRC_MACHINE = 'twitter'
3e35aa32 34 _API_BASE = 'https://api.x.com/1.1/'
35 _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
4813173e 36 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
92315c03 37 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
38 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
d1795f4a 39 _flow_token = None
40
41 _LOGIN_INIT_DATA = json.dumps({
42 'input_flow_data': {
43 'flow_context': {
44 'debug_overrides': {},
45 'start_location': {
add96eb9 46 'location': 'unknown',
47 },
48 },
d1795f4a 49 },
50 'subtask_versions': {
51 'action_list': 2,
52 'alert_dialog': 1,
53 'app_download_cta': 1,
54 'check_logged_in_account': 1,
55 'choice_selection': 3,
56 'contacts_live_sync_permission_prompt': 0,
57 'cta': 7,
58 'email_verification': 2,
59 'end_flow': 1,
60 'enter_date': 1,
61 'enter_email': 2,
62 'enter_password': 5,
63 'enter_phone': 2,
64 'enter_recaptcha': 1,
65 'enter_text': 5,
66 'enter_username': 2,
67 'generic_urt': 3,
68 'in_app_notification': 1,
69 'interest_picker': 3,
70 'js_instrumentation': 1,
71 'menu_dialog': 1,
72 'notifications_permission_prompt': 2,
73 'open_account': 2,
74 'open_home_timeline': 1,
75 'open_link': 1,
76 'phone_verification': 4,
77 'privacy_options': 1,
78 'security_key': 3,
79 'select_avatar': 4,
80 'select_banner': 2,
81 'settings_list': 7,
82 'show_code': 1,
83 'sign_up': 2,
84 'sign_up_review': 4,
85 'tweet_selection_urt': 1,
86 'update_users': 1,
87 'upload_media': 1,
88 'user_recommendations_list': 4,
89 'user_recommendations_urt': 1,
90 'wait_spinner': 3,
add96eb9 91 'web_modal': 1,
92 },
d1795f4a 93 }, separators=(',', ':')).encode()
18ca61c5
RA
94
95 def _extract_variant_formats(self, variant, video_id):
96 variant_url = variant.get('url')
97 if not variant_url:
4bed4363 98 return [], {}
18ca61c5 99 elif '.m3u8' in variant_url:
28e53d60 100 fmts, subs = self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
101 variant_url, video_id, 'mp4', 'm3u8_native',
102 m3u8_id='hls', fatal=False)
28e53d60 103 for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
104 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
105 f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
106 return fmts, subs
18ca61c5
RA
107 else:
108 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
109 f = {
110 'url': variant_url,
6aaf96a3 111 'format_id': join_nonempty('http', tbr),
18ca61c5
RA
112 'tbr': tbr,
113 }
114 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 115 return [f], {}
18ca61c5 116
9be31e77 117 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 118 vmap_url = url_or_none(vmap_url)
119 if not vmap_url:
f1150b9e 120 return [], {}
445d72b8 121 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 122 formats = []
4bed4363 123 subtitles = {}
18ca61c5
RA
124 urls = []
125 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
add96eb9 126 video_variant.attrib['url'] = urllib.parse.unquote(
18ca61c5
RA
127 video_variant.attrib['url'])
128 urls.append(video_variant.attrib['url'])
4bed4363
F
129 fmts, subs = self._extract_variant_formats(
130 video_variant.attrib, video_id)
131 formats.extend(fmts)
132 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
133 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
134 if video_url not in urls:
4bed4363
F
135 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
136 formats.extend(fmts)
137 subtitles = self._merge_subtitles(subtitles, subs)
138 return formats, subtitles
445d72b8 139
2edfd745
YCH
140 @staticmethod
141 def _search_dimensions_in_video_url(a_format, video_url):
142 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
143 if m:
144 a_format.update({
145 'width': int(m.group('width')),
146 'height': int(m.group('height')),
147 })
148
d1795f4a 149 @property
7a26ce26
SS
150 def is_logged_in(self):
151 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
152
3e35aa32 153 # XXX: Temporary workaround until twitter.com => x.com migration is completed
154 def _real_initialize(self):
155 if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
156 return
157 # User has not yet been migrated to x.com and has passed twitter.com cookies
158 TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
159 TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
160
a006ce2b 161 @functools.cached_property
162 def _selected_api(self):
163 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
164
92315c03 165 def _fetch_guest_token(self, display_id):
166 guest_token = traverse_obj(self._download_json(
167 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
a006ce2b 168 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
92315c03 169 ('guest_token', {str}))
170 if not guest_token:
b03fa783 171 raise ExtractorError('Could not retrieve guest token')
92315c03 172 return guest_token
b03fa783 173
92315c03 174 def _set_base_headers(self, legacy=False):
175 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
176 return filter_dict({
177 'Authorization': f'Bearer {bearer_token}',
178 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
179 })
d1795f4a 180
181 def _call_login_api(self, note, headers, query={}, data=None):
182 response = self._download_json(
183 f'{self._API_BASE}onboarding/task.json', None, note,
184 headers=headers, query=query, data=data, expected_status=400)
185 error = traverse_obj(response, ('errors', 0, 'message', {str}))
186 if error:
187 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
188 elif traverse_obj(response, 'status') != 'success':
189 raise ExtractorError('Login was unsuccessful')
190
191 subtask = traverse_obj(
192 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
193 if not subtask:
194 raise ExtractorError('Twitter API did not return next login subtask')
195
196 self._flow_token = response['flow_token']
7a26ce26 197
d1795f4a 198 return subtask
199
200 def _perform_login(self, username, password):
201 if self.is_logged_in:
202 return
203
3e35aa32 204 guest_token = self._fetch_guest_token(None)
92315c03 205 headers = {
206 **self._set_base_headers(),
d1795f4a 207 'content-type': 'application/json',
92315c03 208 'x-guest-token': guest_token,
d1795f4a 209 'x-twitter-client-language': 'en',
210 'x-twitter-active-user': 'yes',
3e35aa32 211 'Referer': 'https://x.com/',
212 'Origin': 'https://x.com',
92315c03 213 }
d1795f4a 214
215 def build_login_json(*subtask_inputs):
216 return json.dumps({
217 'flow_token': self._flow_token,
add96eb9 218 'subtask_inputs': subtask_inputs,
d1795f4a 219 }, separators=(',', ':')).encode()
220
221 def input_dict(subtask_id, text):
222 return {
223 'subtask_id': subtask_id,
224 'enter_text': {
225 'text': text,
add96eb9 226 'link': 'next_link',
227 },
d1795f4a 228 }
7a26ce26 229
d1795f4a 230 next_subtask = self._call_login_api(
231 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
232
233 while not self.is_logged_in:
234 if next_subtask == 'LoginJsInstrumentationSubtask':
235 next_subtask = self._call_login_api(
236 'Submitting JS instrumentation response', headers, data=build_login_json({
237 'subtask_id': next_subtask,
238 'js_instrumentation': {
239 'response': '{}',
add96eb9 240 'link': 'next_link',
241 },
d1795f4a 242 }))
243
244 elif next_subtask == 'LoginEnterUserIdentifierSSO':
245 next_subtask = self._call_login_api(
246 'Submitting username', headers, data=build_login_json({
247 'subtask_id': next_subtask,
248 'settings_list': {
249 'setting_responses': [{
250 'key': 'user_identifier',
251 'response_data': {
252 'text_data': {
add96eb9 253 'result': username,
254 },
255 },
d1795f4a 256 }],
add96eb9 257 'link': 'next_link',
258 },
d1795f4a 259 }))
260
261 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
262 next_subtask = self._call_login_api(
263 'Submitting alternate identifier', headers,
264 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
265 'one of username, phone number or email that was not used as --username'))))
266
267 elif next_subtask == 'LoginEnterPassword':
268 next_subtask = self._call_login_api(
269 'Submitting password', headers, data=build_login_json({
270 'subtask_id': next_subtask,
271 'enter_password': {
272 'password': password,
add96eb9 273 'link': 'next_link',
274 },
d1795f4a 275 }))
276
277 elif next_subtask == 'AccountDuplicationCheck':
278 next_subtask = self._call_login_api(
279 'Submitting account duplication check', headers, data=build_login_json({
280 'subtask_id': next_subtask,
281 'check_logged_in_account': {
add96eb9 282 'link': 'AccountDuplicationCheck_false',
283 },
d1795f4a 284 }))
285
286 elif next_subtask == 'LoginTwoFactorAuthChallenge':
287 next_subtask = self._call_login_api(
288 'Submitting 2FA token', headers, data=build_login_json(input_dict(
289 next_subtask, self._get_tfa_info('two-factor authentication token'))))
290
291 elif next_subtask == 'LoginAcid':
292 next_subtask = self._call_login_api(
293 'Submitting confirmation code', headers, data=build_login_json(input_dict(
294 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
295
6014355c 296 elif next_subtask == 'ArkoseLogin':
297 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
298
299 elif next_subtask == 'DenyLoginSubtask':
300 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
301
d1795f4a 302 elif next_subtask == 'LoginSuccessSubtask':
303 raise ExtractorError('Twitter API did not grant auth token cookie')
304
305 else:
306 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
307
308 self.report_login()
309
310 def _call_api(self, path, video_id, query={}, graphql=False):
a006ce2b 311 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
92315c03 312 headers.update({
313 'x-twitter-auth-type': 'OAuth2Session',
314 'x-twitter-client-language': 'en',
315 'x-twitter-active-user': 'yes',
316 } if self.is_logged_in else {
add96eb9 317 'x-guest-token': self._fetch_guest_token(video_id),
92315c03 318 })
319 allowed_status = {400, 401, 403, 404} if graphql else {403}
320 result = self._download_json(
321 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
322 video_id, headers=headers, query=query, expected_status=allowed_status,
323 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
b03fa783 324
92315c03 325 if result.get('errors'):
326 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
6014355c 327 if errors and 'not authorized' in errors:
328 self.raise_login_required(remove_end(errors, '.'))
329 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
b03fa783 330
92315c03 331 return result
7a26ce26
SS
332
333 def _build_graphql_query(self, media_id):
334 raise NotImplementedError('Method must be implemented to support GraphQL')
335
336 def _call_graphql_api(self, endpoint, media_id):
337 data = self._build_graphql_query(media_id)
338 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
339 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
340
341
342class TwitterCardIE(InfoExtractor):
014e8803 343 IE_NAME = 'twitter:card'
18ca61c5 344 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 345 _TESTS = [
346 {
347 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 348 # MD5 checksums are different in different places
c3dea3f8 349 'info_dict': {
7a26ce26 350 'id': '560070131976392705',
c3dea3f8 351 'ext': 'mp4',
18ca61c5
RA
352 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
353 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
354 'uploader': 'Twitter',
355 'uploader_id': 'Twitter',
356 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 357 'duration': 30.033,
18ca61c5
RA
358 'timestamp': 1422366112,
359 'upload_date': '20150127',
7a26ce26
SS
360 'age_limit': 0,
361 'comment_count': int,
362 'tags': [],
363 'repost_count': int,
364 'like_count': int,
365 'display_id': '560070183650213889',
366 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 367 },
23e7cba8 368 },
c3dea3f8 369 {
370 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 371 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 372 'info_dict': {
373 'id': '623160978427936768',
374 'ext': 'mp4',
18ca61c5
RA
375 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
376 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
377 'uploader': 'NASA',
378 'uploader_id': 'NASA',
379 'timestamp': 1437408129,
380 'upload_date': '20150720',
7a26ce26
SS
381 'uploader_url': 'https://twitter.com/NASA',
382 'age_limit': 0,
383 'comment_count': int,
384 'like_count': int,
385 'repost_count': int,
386 'tags': ['PlutoFlyby'],
c3dea3f8 387 },
add96eb9 388 'params': {'format': '[protocol=https]'},
4a7b7903
YCH
389 },
390 {
391 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 392 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
393 'info_dict': {
394 'id': 'dq4Oj5quskI',
395 'ext': 'mp4',
396 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 397 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 398 'upload_date': '20111013',
18ca61c5 399 'uploader': 'OMG! UBUNTU!',
4a7b7903 400 'uploader_id': 'omgubuntu',
7a26ce26
SS
401 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
402 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
403 'channel_follower_count': int,
404 'chapters': 'count:8',
405 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
406 'duration': 138,
407 'categories': ['Film & Animation'],
408 'age_limit': 0,
409 'comment_count': int,
410 'availability': 'public',
411 'like_count': int,
412 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
413 'view_count': int,
414 'tags': 'count:12',
415 'channel': 'OMG! UBUNTU!',
416 'playable_in_embed': True,
4a7b7903 417 },
31752f76 418 'add_ie': ['Youtube'],
5f1b2aea
YCH
419 },
420 {
421 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
422 'info_dict': {
423 'id': 'iBb2x00UVlv',
424 'ext': 'mp4',
425 'upload_date': '20151113',
426 'uploader_id': '1189339351084113920',
acb6e97e
YCH
427 'uploader': 'ArsenalTerje',
428 'title': 'Vine by ArsenalTerje',
e8f20ffa 429 'timestamp': 1447451307,
7a26ce26
SS
430 'alt_title': 'Vine by ArsenalTerje',
431 'comment_count': int,
432 'like_count': int,
433 'thumbnail': r're:^https?://[^?#]+\.jpg',
434 'view_count': int,
435 'repost_count': int,
5f1b2aea
YCH
436 },
437 'add_ie': ['Vine'],
7a26ce26
SS
438 'params': {'skip_download': 'm3u8'},
439 },
440 {
0ae937a7 441 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 442 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
443 'info_dict': {
444 'id': '705235433198714880',
445 'ext': 'mp4',
18ca61c5
RA
446 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
447 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
448 'uploader': 'Brent Yarina',
449 'uploader_id': 'BTNBrentYarina',
450 'timestamp': 1456976204,
451 'upload_date': '20160303',
0ae937a7 452 },
18ca61c5 453 'skip': 'This content is no longer available.',
7a26ce26
SS
454 },
455 {
748a462f
S
456 'url': 'https://twitter.com/i/videos/752274308186120192',
457 'only_matching': True,
0ae937a7 458 },
c3dea3f8 459 ]
23e7cba8
S
460
461 def _real_extract(self, url):
18ca61c5
RA
462 status_id = self._match_id(url)
463 return self.url_result(
464 'https://twitter.com/statuses/' + status_id,
465 TwitterIE.ie_key(), status_id)
c8398a9b 466
03879ff0 467
18ca61c5 468class TwitterIE(TwitterBaseIE):
014e8803 469 IE_NAME = 'twitter'
b6795fd3 470 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 471
cf5881fc 472 _TESTS = [{
48aae2d2 473 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 474 'info_dict': {
13b2ae29
SS
475 'id': '643211870443208704',
476 'display_id': '643211948184596480',
f57f84f6 477 'ext': 'mp4',
575036b4 478 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 479 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 480 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
55f18333 481 'channel_id': '549749560',
48aae2d2
YCH
482 'uploader': 'FREE THE NIPPLE',
483 'uploader_id': 'freethenipple',
3b65a6fb 484 'duration': 12.922,
18ca61c5
RA
485 'timestamp': 1442188653,
486 'upload_date': '20150913',
13b2ae29 487 'uploader_url': 'https://twitter.com/freethenipple',
b03fa783 488 'comment_count': int,
489 'repost_count': int,
13b2ae29
SS
490 'like_count': int,
491 'tags': [],
492 'age_limit': 18,
1c54a98e 493 '_old_archive_ids': ['twitter 643211948184596480'],
f57f84f6 494 },
55f18333 495 'skip': 'Requires authentication',
cf5881fc
YCH
496 }, {
497 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
498 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
499 'info_dict': {
500 'id': '657991469417025536',
501 'ext': 'mp4',
502 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
503 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 504 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
505 'uploader': 'Gifs',
506 'uploader_id': 'giphz',
507 },
7efc1c2b 508 'expected_warnings': ['height', 'width'],
fc0a45fa 509 'skip': 'Account suspended',
b703ebee
JMF
510 }, {
511 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
512 'info_dict': {
513 'id': '665052190608723968',
13b2ae29 514 'display_id': '665052190608723968',
b703ebee 515 'ext': 'mp4',
b6795fd3 516 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 517 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
55f18333 518 'channel_id': '20106852',
b703ebee 519 'uploader_id': 'starwars',
7a26ce26 520 'uploader': r're:Star Wars.*',
18ca61c5
RA
521 'timestamp': 1447395772,
522 'upload_date': '20151113',
13b2ae29 523 'uploader_url': 'https://twitter.com/starwars',
b03fa783 524 'comment_count': int,
525 'repost_count': int,
13b2ae29
SS
526 'like_count': int,
527 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
528 'age_limit': 0,
1c54a98e 529 '_old_archive_ids': ['twitter 665052190608723968'],
b703ebee 530 },
0ae937a7
YCH
531 }, {
532 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
533 'info_dict': {
534 'id': '705235433198714880',
535 'ext': 'mp4',
18ca61c5
RA
536 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
537 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
538 'uploader_id': 'BTNBrentYarina',
539 'uploader': 'Brent Yarina',
18ca61c5
RA
540 'timestamp': 1456976204,
541 'upload_date': '20160303',
13b2ae29
SS
542 'uploader_url': 'https://twitter.com/BTNBrentYarina',
543 'comment_count': int,
544 'repost_count': int,
545 'like_count': int,
546 'tags': [],
547 'age_limit': 0,
0ae937a7
YCH
548 },
549 'params': {
550 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
551 # Test case of TwitterCardIE
552 'skip_download': True,
553 },
352e7d98 554 'skip': 'Dead external link',
03879ff0
YCH
555 }, {
556 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 557 'info_dict': {
13b2ae29
SS
558 'id': '700207414000242688',
559 'display_id': '700207533655363584',
03879ff0 560 'ext': 'mp4',
13b2ae29 561 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 562 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 563 'thumbnail': r're:^https?://.*\.jpg',
55f18333 564 'channel_id': '1383165541',
13b2ae29
SS
565 'uploader': 'jaydin donte geer',
566 'uploader_id': 'jaydingeer',
3b65a6fb 567 'duration': 30.0,
18ca61c5
RA
568 'timestamp': 1455777459,
569 'upload_date': '20160218',
13b2ae29 570 'uploader_url': 'https://twitter.com/jaydingeer',
b03fa783 571 'comment_count': int,
572 'repost_count': int,
13b2ae29
SS
573 'like_count': int,
574 'tags': ['Damndaniel'],
575 'age_limit': 0,
1c54a98e 576 '_old_archive_ids': ['twitter 700207533655363584'],
03879ff0 577 },
395fd4b0
YCH
578 }, {
579 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
580 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
581 'info_dict': {
582 'id': 'MIOxnrUteUd',
583 'ext': 'mp4',
18ca61c5
RA
584 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
585 'uploader': 'TAKUMA',
586 'uploader_id': '1004126642786242560',
3615bfe1 587 'timestamp': 1402826626,
395fd4b0 588 'upload_date': '20140615',
13b2ae29
SS
589 'thumbnail': r're:^https?://.*\.jpg',
590 'alt_title': 'Vine by TAKUMA',
591 'comment_count': int,
592 'repost_count': int,
593 'like_count': int,
594 'view_count': int,
395fd4b0
YCH
595 },
596 'add_ie': ['Vine'],
36b7d9db
YCH
597 }, {
598 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 599 'info_dict': {
13b2ae29
SS
600 'id': '717462543795523584',
601 'display_id': '719944021058060289',
36b7d9db
YCH
602 'ext': 'mp4',
603 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5 604 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
55f18333 605 'channel_id': '701615052',
18ca61c5 606 'uploader_id': 'CaptainAmerica',
36b7d9db 607 'uploader': 'Captain America',
3b65a6fb 608 'duration': 3.17,
18ca61c5
RA
609 'timestamp': 1460483005,
610 'upload_date': '20160412',
13b2ae29
SS
611 'uploader_url': 'https://twitter.com/CaptainAmerica',
612 'thumbnail': r're:^https?://.*\.jpg',
b03fa783 613 'comment_count': int,
614 'repost_count': int,
13b2ae29
SS
615 'like_count': int,
616 'tags': [],
617 'age_limit': 0,
1c54a98e 618 '_old_archive_ids': ['twitter 719944021058060289'],
36b7d9db 619 },
f0bc5a86
YCH
620 }, {
621 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
622 'info_dict': {
623 'id': '1zqKVVlkqLaKB',
624 'ext': 'mp4',
18ca61c5 625 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 626 'upload_date': '20160923',
18ca61c5
RA
627 'uploader_id': '1PmKqpJdOJQoY',
628 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 629 'timestamp': 1474613214,
13b2ae29 630 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
631 },
632 'add_ie': ['Periscope'],
1c54a98e 633 'skip': 'Broadcast not found',
2edfd745
YCH
634 }, {
635 # has mp4 formats via mobile API
636 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
637 'info_dict': {
6014355c 638 'id': '852077943283097602',
2edfd745
YCH
639 'ext': 'mp4',
640 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 641 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
55f18333 642 'channel_id': '2526757026',
2edfd745
YCH
643 'uploader': 'عالم الأخبار',
644 'uploader_id': 'news_al3alm',
3b65a6fb 645 'duration': 277.4,
18ca61c5
RA
646 'timestamp': 1492000653,
647 'upload_date': '20170412',
6014355c 648 'display_id': '852138619213144067',
649 'age_limit': 0,
650 'uploader_url': 'https://twitter.com/news_al3alm',
651 'thumbnail': r're:^https?://.*\.jpg',
652 'tags': [],
653 'repost_count': int,
6014355c 654 'like_count': int,
655 'comment_count': int,
1c54a98e 656 '_old_archive_ids': ['twitter 852138619213144067'],
2edfd745 657 },
5c1452e8
GF
658 }, {
659 'url': 'https://twitter.com/i/web/status/910031516746514432',
660 'info_dict': {
13b2ae29
SS
661 'id': '910030238373089285',
662 'display_id': '910031516746514432',
5c1452e8
GF
663 'ext': 'mp4',
664 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
665 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 666 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
55f18333 667 'channel_id': '2319432498',
5c1452e8
GF
668 'uploader': 'Préfet de Guadeloupe',
669 'uploader_id': 'Prefet971',
670 'duration': 47.48,
18ca61c5
RA
671 'timestamp': 1505803395,
672 'upload_date': '20170919',
13b2ae29 673 'uploader_url': 'https://twitter.com/Prefet971',
b03fa783 674 'comment_count': int,
675 'repost_count': int,
13b2ae29
SS
676 'like_count': int,
677 'tags': ['Maria'],
678 'age_limit': 0,
1c54a98e 679 '_old_archive_ids': ['twitter 910031516746514432'],
5c1452e8
GF
680 },
681 'params': {
682 'skip_download': True, # requires ffmpeg
683 },
2593725a
S
684 }, {
685 # card via api.twitter.com/1.1/videos/tweet/config
686 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
687 'info_dict': {
13b2ae29
SS
688 'id': '1001551417340022785',
689 'display_id': '1001551623938805763',
2593725a
S
690 'ext': 'mp4',
691 'title': 're:.*?Shep is on a roll today.*?',
692 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 693 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
55f18333 694 'channel_id': '255036353',
2593725a
S
695 'uploader': 'Lis Power',
696 'uploader_id': 'LisPower1',
697 'duration': 111.278,
18ca61c5
RA
698 'timestamp': 1527623489,
699 'upload_date': '20180529',
13b2ae29 700 'uploader_url': 'https://twitter.com/LisPower1',
b03fa783 701 'comment_count': int,
702 'repost_count': int,
13b2ae29
SS
703 'like_count': int,
704 'tags': [],
705 'age_limit': 0,
1c54a98e 706 '_old_archive_ids': ['twitter 1001551623938805763'],
2593725a
S
707 },
708 'params': {
709 'skip_download': True, # requires ffmpeg
710 },
b7ef93f0
S
711 }, {
712 'url': 'https://twitter.com/foobar/status/1087791357756956680',
713 'info_dict': {
13b2ae29
SS
714 'id': '1087791272830607360',
715 'display_id': '1087791357756956680',
b7ef93f0 716 'ext': 'mp4',
6014355c 717 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
b7ef93f0 718 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 719 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
6014355c 720 'uploader': 'X',
721 'uploader_id': 'X',
b7ef93f0 722 'duration': 61.567,
18ca61c5
RA
723 'timestamp': 1548184644,
724 'upload_date': '20190122',
6014355c 725 'uploader_url': 'https://twitter.com/X',
b03fa783 726 'comment_count': int,
727 'repost_count': int,
13b2ae29 728 'like_count': int,
b03fa783 729 'view_count': int,
13b2ae29
SS
730 'tags': [],
731 'age_limit': 0,
18ca61c5 732 },
a006ce2b 733 'skip': 'This Tweet is unavailable',
18ca61c5
RA
734 }, {
735 # not available in Periscope
736 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
737 'info_dict': {
738 'id': '1vOGwqejwoWxB',
739 'ext': 'mp4',
740 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
741 'uploader': 'Vivi',
742 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
743 'thumbnail': r're:^https?://.*\.jpg',
744 'tags': ['EduTECH2019'],
745 'view_count': int,
b7ef93f0 746 },
18ca61c5 747 'add_ie': ['TwitterBroadcast'],
a006ce2b 748 'skip': 'Broadcast no longer exists',
30a074c2 749 }, {
750 # unified card
751 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
752 'info_dict': {
13b2ae29
SS
753 'id': '1349774757969989634',
754 'display_id': '1349794411333394432',
30a074c2 755 'ext': 'mp4',
756 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
757 'thumbnail': r're:^https?://.*\.jpg',
758 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
55f18333 759 'channel_id': '18552281',
30a074c2 760 'uploader': 'Brooklyn Nets',
761 'uploader_id': 'BrooklynNets',
762 'duration': 324.484,
763 'timestamp': 1610651040,
764 'upload_date': '20210114',
13b2ae29 765 'uploader_url': 'https://twitter.com/BrooklynNets',
b03fa783 766 'comment_count': int,
767 'repost_count': int,
13b2ae29
SS
768 'like_count': int,
769 'tags': [],
770 'age_limit': 0,
1c54a98e 771 '_old_archive_ids': ['twitter 1349794411333394432'],
30a074c2 772 },
773 'params': {
774 'skip_download': True,
775 },
13b2ae29
SS
776 }, {
777 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
778 'info_dict': {
779 'id': '1577855447914409984',
780 'display_id': '1577855540407197696',
781 'ext': 'mp4',
55f18333 782 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
352e7d98 783 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 784 'upload_date': '20221006',
55f18333 785 'channel_id': '143077138',
786 'uploader': 'Oshtru',
13b2ae29
SS
787 'uploader_id': 'oshtru',
788 'uploader_url': 'https://twitter.com/oshtru',
789 'thumbnail': r're:^https?://.*\.jpg',
790 'duration': 30.03,
7a26ce26 791 'timestamp': 1665025050,
b03fa783 792 'comment_count': int,
793 'repost_count': int,
13b2ae29
SS
794 'like_count': int,
795 'tags': [],
796 'age_limit': 0,
1c54a98e 797 '_old_archive_ids': ['twitter 1577855540407197696'],
13b2ae29
SS
798 },
799 'params': {'skip_download': True},
800 }, {
801 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
802 'info_dict': {
803 'id': '1577719286659006464',
55f18333 804 'title': 'Ultima Reload - Test',
13b2ae29 805 'description': 'Test https://t.co/Y3KEZD7Dad',
55f18333 806 'channel_id': '168922496',
807 'uploader': 'Ultima Reload',
13b2ae29
SS
808 'uploader_id': 'UltimaShadowX',
809 'uploader_url': 'https://twitter.com/UltimaShadowX',
810 'upload_date': '20221005',
7a26ce26 811 'timestamp': 1664992565,
b03fa783 812 'comment_count': int,
813 'repost_count': int,
13b2ae29
SS
814 'like_count': int,
815 'tags': [],
816 'age_limit': 0,
817 },
818 'playlist_count': 4,
819 'params': {'skip_download': True},
7a26ce26
SS
820 }, {
821 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
822 'info_dict': {
823 'id': '1575559336759263233',
824 'display_id': '1575560063510810624',
825 'ext': 'mp4',
826 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
827 'thumbnail': r're:^https?://.*\.jpg',
828 'description': 'md5:95aea692fda36a12081b9629b02daa92',
55f18333 829 'channel_id': '1094109584',
7a26ce26
SS
830 'uploader': 'Max Olson',
831 'uploader_id': 'MesoMax919',
832 'uploader_url': 'https://twitter.com/MesoMax919',
833 'duration': 21.321,
834 'timestamp': 1664477766,
835 'upload_date': '20220929',
b03fa783 836 'comment_count': int,
837 'repost_count': int,
7a26ce26
SS
838 'like_count': int,
839 'tags': ['HurricaneIan'],
840 'age_limit': 0,
1c54a98e 841 '_old_archive_ids': ['twitter 1575560063510810624'],
7a26ce26
SS
842 },
843 }, {
a006ce2b 844 # Adult content, fails if not logged in
7a26ce26
SS
845 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
846 'info_dict': {
847 'id': '1575199163847000068',
848 'display_id': '1575199173472927762',
849 'ext': 'mp4',
850 'title': str,
851 'description': str,
55f18333 852 'channel_id': '1217167793541480450',
7a26ce26
SS
853 'uploader': str,
854 'uploader_id': 'Rizdraws',
855 'uploader_url': 'https://twitter.com/Rizdraws',
856 'upload_date': '20220928',
857 'timestamp': 1664391723,
16bed382 858 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
859 'like_count': int,
860 'repost_count': int,
861 'comment_count': int,
862 'age_limit': 18,
55f18333 863 'tags': [],
864 '_old_archive_ids': ['twitter 1575199173472927762'],
7a26ce26 865 },
a006ce2b 866 'params': {'skip_download': 'The media could not be played'},
147e62fc 867 'skip': 'Requires authentication',
7a26ce26 868 }, {
a006ce2b 869 # Playlist result only with graphql API
7a26ce26
SS
870 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
871 'playlist_mincount': 2,
872 'info_dict': {
873 'id': '1395079556562706435',
874 'title': str,
875 'tags': [],
55f18333 876 'channel_id': '21539378',
7a26ce26
SS
877 'uploader': str,
878 'like_count': int,
879 'upload_date': '20210519',
880 'age_limit': 0,
881 'repost_count': int,
147e62fc 882 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
7a26ce26
SS
883 'uploader_id': 'Srirachachau',
884 'comment_count': int,
885 'uploader_url': 'https://twitter.com/Srirachachau',
886 'timestamp': 1621447860,
887 },
888 }, {
7a26ce26
SS
889 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
890 'playlist_mincount': 2,
891 'info_dict': {
892 'id': '1578353380363501568',
893 'title': str,
55f18333 894 'channel_id': '2195866214',
7a26ce26
SS
895 'uploader_id': 'DavidToons_',
896 'repost_count': int,
897 'like_count': int,
898 'uploader': str,
899 'timestamp': 1665143744,
900 'uploader_url': 'https://twitter.com/DavidToons_',
147e62fc 901 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
7a26ce26
SS
902 'tags': [],
903 'comment_count': int,
904 'upload_date': '20221007',
905 'age_limit': 0,
906 },
907 }, {
908 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
909 'playlist_count': 2,
910 'info_dict': {
911 'id': '1578401165338976258',
912 'title': str,
913 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
55f18333 914 'channel_id': '19338359',
7a26ce26
SS
915 'uploader': str,
916 'uploader_id': 'primevideouk',
917 'timestamp': 1665155137,
918 'upload_date': '20221007',
919 'age_limit': 0,
920 'uploader_url': 'https://twitter.com/primevideouk',
b03fa783 921 'comment_count': int,
922 'repost_count': int,
7a26ce26
SS
923 'like_count': int,
924 'tags': ['TheRingsOfPower'],
925 },
926 }, {
927 # Twitter Spaces
928 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
929 'info_dict': {
930 'id': '1lPJqmBeeNAJb',
931 'ext': 'm4a',
932 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
933 'uploader': r're:Monique Camarra.+?',
934 'uploader_id': 'MoniqueCamarra',
935 'live_status': 'was_live',
1c16d9df 936 'release_timestamp': 1658417414,
a006ce2b 937 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
1cffd621 938 'timestamp': 1658407771,
939 'release_date': '20220721',
940 'upload_date': '20220721',
7a26ce26
SS
941 },
942 'add_ie': ['TwitterSpaces'],
943 'params': {'skip_download': 'm3u8'},
92315c03 944 'skip': 'Requires authentication',
16bed382 945 }, {
946 # URL specifies video number but --yes-playlist
947 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
948 'playlist_mincount': 2,
949 'info_dict': {
950 'id': '1600649710662213632',
951 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
952 'timestamp': 1670459604.0,
953 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
b03fa783 954 'comment_count': int,
16bed382 955 'uploader_id': 'CTVJLaidlaw',
55f18333 956 'channel_id': '80082014',
b03fa783 957 'repost_count': int,
16bed382 958 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
959 'upload_date': '20221208',
960 'age_limit': 0,
961 'uploader': 'Jocelyn Laidlaw',
962 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
963 'like_count': int,
964 },
965 }, {
966 # URL specifies video number and --no-playlist
967 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
968 'info_dict': {
969 'id': '1600649511827013632',
970 'ext': 'mp4',
147e62fc 971 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
16bed382 972 'thumbnail': r're:^https?://.+\.jpg',
973 'timestamp': 1670459604.0,
55f18333 974 'channel_id': '80082014',
16bed382 975 'uploader_id': 'CTVJLaidlaw',
976 'uploader': 'Jocelyn Laidlaw',
b03fa783 977 'repost_count': int,
978 'comment_count': int,
16bed382 979 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
980 'duration': 102.226,
981 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
982 'display_id': '1600649710662213632',
983 'like_count': int,
984 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
985 'upload_date': '20221208',
986 'age_limit': 0,
1c54a98e 987 '_old_archive_ids': ['twitter 1600649710662213632'],
16bed382 988 },
989 'params': {'noplaylist': True},
7543c9c9 990 }, {
991 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
992 # note the id different between extraction and url
993 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
994 'info_dict': {
995 'id': '1621117577354424321',
996 'display_id': '1621117700482416640',
997 'ext': 'mp4',
998 'title': '뽀 - 아 최우제 이동속도 봐',
999 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
1000 'duration': 24.598,
55f18333 1001 'channel_id': '1281839411068432384',
7543c9c9 1002 'uploader': '뽀',
1003 'uploader_id': 's2FAKER',
1004 'uploader_url': 'https://twitter.com/s2FAKER',
1005 'upload_date': '20230202',
1006 'timestamp': 1675339553.0,
1007 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
1008 'age_limit': 18,
1009 'tags': [],
1010 'like_count': int,
b03fa783 1011 'repost_count': int,
1012 'comment_count': int,
1c54a98e 1013 '_old_archive_ids': ['twitter 1621117700482416640'],
7543c9c9 1014 },
55f18333 1015 'skip': 'Requires authentication',
b6795fd3
SS
1016 }, {
1017 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1018 'info_dict': {
1019 'id': '1599108643743473680',
1020 'display_id': '1599108751385972737',
1021 'ext': 'mp4',
1022 'title': '\u06ea - \U0001F48B',
55f18333 1023 'channel_id': '1347791436809441283',
b6795fd3
SS
1024 'uploader_url': 'https://twitter.com/hlo_again',
1025 'like_count': int,
1026 'uploader_id': 'hlo_again',
1027 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
b03fa783 1028 'repost_count': int,
b6795fd3 1029 'duration': 9.531,
b03fa783 1030 'comment_count': int,
b6795fd3
SS
1031 'upload_date': '20221203',
1032 'age_limit': 0,
1033 'timestamp': 1670092210.0,
1034 'tags': [],
1035 'uploader': '\u06ea',
1036 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1c54a98e 1037 '_old_archive_ids': ['twitter 1599108751385972737'],
b6795fd3
SS
1038 },
1039 'params': {'noplaylist': True},
1040 }, {
b6795fd3
SS
1041 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1042 'info_dict': {
1043 'id': '1600009362759733248',
1044 'display_id': '1600009574919962625',
1045 'ext': 'mp4',
55f18333 1046 'channel_id': '211814412',
b6795fd3
SS
1047 'uploader_url': 'https://twitter.com/MunTheShinobi',
1048 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
b6795fd3
SS
1049 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1050 'age_limit': 0,
a006ce2b 1051 'uploader': 'Mün',
b03fa783 1052 'repost_count': int,
b6795fd3 1053 'upload_date': '20221206',
a006ce2b 1054 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
b03fa783 1055 'comment_count': int,
b6795fd3
SS
1056 'like_count': int,
1057 'tags': [],
1058 'uploader_id': 'MunTheShinobi',
1059 'duration': 139.987,
1060 'timestamp': 1670306984.0,
1c54a98e 1061 '_old_archive_ids': ['twitter 1600009574919962625'],
b6795fd3 1062 },
cf605226 1063 }, {
a006ce2b 1064 # retweeted_status (private)
cf605226 1065 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1066 'info_dict': {
1067 'id': '1623274794488659969',
1068 'display_id': '1623739803874349067',
1069 'ext': 'mp4',
1070 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
92315c03 1071 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
cf605226 1072 'uploader': 'Johnny Bullets',
1073 'uploader_id': 'Johnnybull3ts',
1074 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1075 'age_limit': 0,
1076 'tags': [],
1077 'duration': 8.033,
1078 'timestamp': 1675853859.0,
1079 'upload_date': '20230208',
1080 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1081 'like_count': int,
b03fa783 1082 'repost_count': int,
cf605226 1083 },
6014355c 1084 'skip': 'Protected tweet',
92315c03 1085 }, {
a006ce2b 1086 # retweeted_status
1087 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
92315c03 1088 'info_dict': {
a006ce2b 1089 'id': '1694928337846538240',
92315c03 1090 'ext': 'mp4',
a006ce2b 1091 'display_id': '1695424220702888009',
1092 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1093 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
55f18333 1094 'channel_id': '15212187',
a006ce2b 1095 'uploader': 'Benny Johnson',
1096 'uploader_id': 'bennyjohnson',
1097 'uploader_url': 'https://twitter.com/bennyjohnson',
92315c03 1098 'age_limit': 0,
1099 'tags': [],
a006ce2b 1100 'duration': 45.001,
1101 'timestamp': 1692962814.0,
1102 'upload_date': '20230825',
1103 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
92315c03 1104 'like_count': int,
92315c03 1105 'repost_count': int,
1106 'comment_count': int,
1c54a98e 1107 '_old_archive_ids': ['twitter 1695424220702888009'],
92315c03 1108 },
a006ce2b 1109 }, {
1110 # retweeted_status w/ legacy API
1111 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1112 'info_dict': {
1113 'id': '1694928337846538240',
1114 'ext': 'mp4',
1115 'display_id': '1695424220702888009',
1116 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1117 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
55f18333 1118 'channel_id': '15212187',
a006ce2b 1119 'uploader': 'Benny Johnson',
1120 'uploader_id': 'bennyjohnson',
1121 'uploader_url': 'https://twitter.com/bennyjohnson',
1122 'age_limit': 0,
1123 'tags': [],
1124 'duration': 45.001,
1125 'timestamp': 1692962814.0,
1126 'upload_date': '20230825',
1127 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1128 'like_count': int,
1129 'repost_count': int,
1c54a98e 1130 '_old_archive_ids': ['twitter 1695424220702888009'],
a006ce2b 1131 },
1132 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1133 }, {
1134 # Broadcast embedded in tweet
1c54a98e 1135 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
a006ce2b 1136 'info_dict': {
1c54a98e 1137 'id': '1rmxPMjLzAXKN',
a006ce2b 1138 'ext': 'mp4',
1c54a98e 1139 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
a006ce2b 1140 'uploader': 'Jessica Dobson',
1c54a98e 1141 'uploader_id': 'JessicaDobsonWX',
1142 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1143 'timestamp': 1701566398,
1144 'upload_date': '20231203',
1145 'live_status': 'was_live',
1146 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1147 'concurrent_view_count': int,
a006ce2b 1148 'view_count': int,
1149 },
1150 'add_ie': ['TwitterBroadcast'],
1151 }, {
55f18333 1152 # Animated gif and quote tweet video
a006ce2b 1153 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1154 'playlist_mincount': 2,
1155 'info_dict': {
1156 'id': '1696256659889565950',
1157 'title': 'BAKOON - https://t.co/zom968d0a0',
1158 'description': 'https://t.co/zom968d0a0',
1159 'tags': [],
55f18333 1160 'channel_id': '1263540390',
a006ce2b 1161 'uploader': 'BAKOON',
1162 'uploader_id': 'BAKKOOONN',
1163 'uploader_url': 'https://twitter.com/BAKKOOONN',
1164 'age_limit': 18,
1165 'timestamp': 1693254077.0,
1166 'upload_date': '20230828',
1167 'like_count': int,
55f18333 1168 'comment_count': int,
1169 'repost_count': int,
a006ce2b 1170 },
55f18333 1171 'skip': 'Requires authentication',
1c54a98e 1172 }, {
1173 # "stale tweet" with typename "TweetWithVisibilityResults"
1174 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
55f18333 1175 'md5': '511377ff8dfa7545307084dca4dce319',
1c54a98e 1176 'info_dict': {
1177 'id': '1724883339285544960',
1178 'ext': 'mp4',
1179 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1180 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1181 'display_id': '1724884212803834154',
55f18333 1182 'channel_id': '337808606',
1c54a98e 1183 'uploader': 'Robert F. Kennedy Jr',
1184 'uploader_id': 'RobertKennedyJr',
1185 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1186 'upload_date': '20231115',
1187 'timestamp': 1700079417.0,
1188 'duration': 341.048,
1189 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1190 'tags': ['Kennedy24'],
1191 'repost_count': int,
1192 'like_count': int,
1193 'comment_count': int,
1194 'age_limit': 0,
1195 '_old_archive_ids': ['twitter 1724884212803834154'],
1196 },
4813173e 1197 }, {
1198 # x.com
1199 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1200 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1201 'info_dict': {
1202 'id': '1790637589910654976',
1203 'ext': 'mp4',
1204 'title': 'Historic Vids - One of the most intense moments in history',
1205 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1206 'display_id': '1790637656616943991',
1207 'uploader': 'Historic Vids',
1208 'uploader_id': 'historyinmemes',
1209 'uploader_url': 'https://twitter.com/historyinmemes',
1210 'channel_id': '855481986290524160',
1211 'upload_date': '20240515',
1212 'timestamp': 1715756260.0,
1213 'duration': 15.488,
1214 'tags': [],
1215 'comment_count': int,
1216 'repost_count': int,
1217 'like_count': int,
1218 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1219 'age_limit': 0,
1220 '_old_archive_ids': ['twitter 1790637656616943991'],
add96eb9 1221 },
82fb2357 1222 }, {
1223 # onion route
1224 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1225 'only_matching': True,
18ca61c5
RA
1226 }, {
1227 # Twitch Clip Embed
1228 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1229 'only_matching': True,
10a5091e
RA
1230 }, {
1231 # promo_video_website card
1232 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1233 'only_matching': True,
00dd0cd5 1234 }, {
1235 # promo_video_convo card
1236 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1237 'only_matching': True,
1238 }, {
1239 # appplayer card
1240 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1241 'only_matching': True,
30a074c2 1242 }, {
1243 # video_direct_message card
1244 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1245 'only_matching': True,
1246 }, {
1247 # poll2choice_video card
1248 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1249 'only_matching': True,
1250 }, {
1251 # poll3choice_video card
1252 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1253 'only_matching': True,
1254 }, {
1255 # poll4choice_video card
1256 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1257 'only_matching': True,
cf5881fc 1258 }]
f57f84f6 1259
a006ce2b 1260 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1261
1262 @property
1263 def _GRAPHQL_ENDPOINT(self):
1264 if self.is_logged_in:
1265 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1266 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1267
7a26ce26
SS
1268 def _graphql_to_legacy(self, data, twid):
1269 result = traverse_obj(data, (
1270 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1271 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
92315c03 1272 'tweet_results', 'result', ('tweet', None), {dict},
1273 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1274 data, ('tweetResult', 'result', {dict}), default={})
7a26ce26 1275
1c54a98e 1276 typename = result.get('__typename')
1277 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1278 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
7543c9c9 1279
7a26ce26 1280 if 'tombstone' in result:
147e62fc 1281 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
7a26ce26 1282 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1c54a98e 1283 elif typename == 'TweetUnavailable':
92315c03 1284 reason = result.get('reason')
1285 if reason == 'NsfwLoggedOut':
1286 self.raise_login_required('NSFW tweet requires authentication')
6014355c 1287 elif reason == 'Protected':
1288 self.raise_login_required('You are not authorized to view this protected tweet')
92315c03 1289 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1c54a98e 1290 # Result for "stale tweet" needs additional transformation
1291 elif typename == 'TweetWithVisibilityResults':
1292 result = traverse_obj(result, ('tweet', {dict})) or {}
7a26ce26
SS
1293
1294 status = result.get('legacy', {})
1295 status.update(traverse_obj(result, {
1296 'user': ('core', 'user_results', 'result', 'legacy'),
1297 'card': ('card', 'legacy'),
1298 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
a006ce2b 1299 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
7a26ce26
SS
1300 }, expected_type=dict, default={}))
1301
a006ce2b 1302 # extra transformations needed since result does not match legacy format
1303 if status.get('retweeted_status'):
1304 status['retweeted_status']['user'] = traverse_obj(status, (
1305 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1306
7a26ce26
SS
1307 binding_values = {
1308 binding_value.get('key'): binding_value.get('value')
147e62fc 1309 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
7a26ce26
SS
1310 }
1311 if binding_values:
1312 status['card']['binding_values'] = binding_values
1313
1314 return status
1315
1316 def _build_graphql_query(self, media_id):
1317 return {
1318 'variables': {
1319 'focalTweetId': media_id,
1320 'includePromotedContent': True,
1321 'with_rux_injections': False,
1322 'withBirdwatchNotes': True,
1323 'withCommunity': True,
1324 'withDownvotePerspective': False,
1325 'withQuickPromoteEligibilityTweetFields': True,
1326 'withReactionsMetadata': False,
1327 'withReactionsPerspective': False,
1328 'withSuperFollowsTweetFields': True,
1329 'withSuperFollowsUserFields': True,
1330 'withV2Timeline': True,
1331 'withVoice': True,
1332 },
1333 'features': {
1334 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1335 'interactive_text_enabled': True,
1336 'responsive_web_edit_tweet_api_enabled': True,
1337 'responsive_web_enhance_cards_enabled': True,
1338 'responsive_web_graphql_timeline_navigation_enabled': False,
1339 'responsive_web_text_conversations_enabled': False,
1340 'responsive_web_uc_gql_enabled': True,
1341 'standardized_nudges_misinfo': True,
1342 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1343 'tweetypie_unmention_optimization_enabled': True,
1344 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1345 'verified_phone_label_enabled': False,
1346 'vibe_api_enabled': True,
1347 },
92315c03 1348 } if self.is_logged_in else {
1349 'variables': {
1350 'tweetId': media_id,
1351 'withCommunity': False,
1352 'includePromotedContent': False,
1353 'withVoice': False,
1354 },
1355 'features': {
1356 'creator_subscriptions_tweet_preview_api_enabled': True,
1357 'tweetypie_unmention_optimization_enabled': True,
1358 'responsive_web_edit_tweet_api_enabled': True,
1359 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1360 'view_counts_everywhere_api_enabled': True,
1361 'longform_notetweets_consumption_enabled': True,
1362 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1363 'tweet_awards_web_tipping_enabled': False,
1364 'freedom_of_speech_not_reach_fetch_enabled': True,
1365 'standardized_nudges_misinfo': True,
1366 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1367 'longform_notetweets_rich_text_read_enabled': True,
1368 'longform_notetweets_inline_media_enabled': True,
1369 'responsive_web_graphql_exclude_directive_enabled': True,
1370 'verified_phone_label_enabled': False,
1371 'responsive_web_media_download_video_enabled': False,
1372 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1373 'responsive_web_graphql_timeline_navigation_enabled': True,
add96eb9 1374 'responsive_web_enhance_cards_enabled': False,
92315c03 1375 },
1376 'fieldToggles': {
add96eb9 1377 'withArticleRichContentState': False,
1378 },
7a26ce26
SS
1379 }
1380
116c2684 1381 def _call_syndication_api(self, twid):
1382 self.report_warning(
1383 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1384 status = self._download_json(
1385 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1386 headers={'User-Agent': 'Googlebot'}, query={
1387 'id': twid,
1388 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1389 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
a006ce2b 1390 })
116c2684 1391 if not status:
1392 raise ExtractorError('Syndication endpoint returned empty JSON response')
1393 # Transform the result so its structure matches that of legacy/graphql
1394 media = []
1395 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1396 detail['id_str'] = traverse_obj(detail, (
1397 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1398 media.append(detail)
1399 status['extended_entities'] = {'media': media}
1400
1401 return status
6014355c 1402
116c2684 1403 def _extract_status(self, twid):
1404 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1405 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1406
1407 try:
1408 if self.is_logged_in or self._selected_api == 'graphql':
1409 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1410 elif self._selected_api == 'legacy':
1411 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1412 'cards_platform': 'Web-12',
1413 'include_cards': 1,
1414 'include_reply_count': 1,
1415 'include_user_entities': 0,
1416 'tweet_mode': 'extended',
a006ce2b 1417 })
116c2684 1418 except ExtractorError as e:
1419 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1420 raise
1421 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1422 status = self._call_syndication_api(twid)
6014355c 1423
116c2684 1424 if self._selected_api == 'syndication':
1425 status = self._call_syndication_api(twid)
a006ce2b 1426
1427 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
6014355c 1428
1429 def _real_extract(self, url):
1430 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1431 status = self._extract_status(twid)
575036b4 1432
92315c03 1433 title = description = traverse_obj(
1434 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
575036b4 1435 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 1436 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
1437 user = status.get('user') or {}
1438 uploader = user.get('name')
1439 if uploader:
7a26ce26 1440 title = f'{uploader} - {title}'
18ca61c5
RA
1441 uploader_id = user.get('screen_name')
1442
cf5881fc 1443 info = {
18ca61c5
RA
1444 'id': twid,
1445 'title': title,
1446 'description': description,
1447 'uploader': uploader,
1448 'timestamp': unified_timestamp(status.get('created_at')),
55f18333 1449 'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
18ca61c5 1450 'uploader_id': uploader_id,
a70635b8 1451 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
1452 'like_count': int_or_none(status.get('favorite_count')),
1453 'repost_count': int_or_none(status.get('retweet_count')),
1454 'comment_count': int_or_none(status.get('reply_count')),
1455 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 1456 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 1457 }
cf5881fc 1458
30a074c2 1459 def extract_from_video_info(media):
a006ce2b 1460 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
13b2ae29 1461 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
1462
1463 formats = []
4bed4363 1464 subtitles = {}
92315c03 1465 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
4bed4363
F
1466 fmts, subs = self._extract_variant_formats(variant, twid)
1467 subtitles = self._merge_subtitles(subtitles, subs)
1468 formats.extend(fmts)
18ca61c5
RA
1469
1470 thumbnails = []
1471 media_url = media.get('media_url_https') or media.get('media_url')
1472 if media_url:
1473 def add_thumbnail(name, size):
1474 thumbnails.append({
1475 'id': name,
1476 'url': update_url_query(media_url, {'name': name}),
1477 'width': int_or_none(size.get('w') or size.get('width')),
1478 'height': int_or_none(size.get('h') or size.get('height')),
1479 })
1480 for name, size in media.get('sizes', {}).items():
1481 add_thumbnail(name, size)
1482 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1483
13b2ae29 1484 return {
b03fa783 1485 'id': media_id,
18ca61c5 1486 'formats': formats,
4bed4363 1487 'subtitles': subtitles,
18ca61c5 1488 'thumbnails': thumbnails,
1c54a98e 1489 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
92315c03 1490 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
e7d22348 1491 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1492 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
13b2ae29 1493 }
30a074c2 1494
13b2ae29
SS
1495 def extract_from_card_info(card):
1496 if not card:
1497 return
1498
1499 self.write_debug(f'Extracting from card info: {card.get("url")}')
1500 binding_values = card['binding_values']
1501
1502 def get_binding_value(k):
1503 o = binding_values.get(k) or {}
1504 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1505
1506 card_name = card['name'].split(':')[-1]
1507 if card_name == 'player':
7a26ce26 1508 yield {
13b2ae29
SS
1509 '_type': 'url',
1510 'url': get_binding_value('player_url'),
1511 }
1512 elif card_name == 'periscope_broadcast':
7a26ce26 1513 yield {
13b2ae29
SS
1514 '_type': 'url',
1515 'url': get_binding_value('url') or get_binding_value('player_url'),
1516 'ie_key': PeriscopeIE.ie_key(),
1517 }
1518 elif card_name == 'broadcast':
7a26ce26 1519 yield {
13b2ae29
SS
1520 '_type': 'url',
1521 'url': get_binding_value('broadcast_url'),
1522 'ie_key': TwitterBroadcastIE.ie_key(),
1523 }
7a26ce26
SS
1524 elif card_name == 'audiospace':
1525 yield {
1526 '_type': 'url',
1527 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1528 'ie_key': TwitterSpacesIE.ie_key(),
1529 }
13b2ae29 1530 elif card_name == 'summary':
7a26ce26 1531 yield {
18ca61c5 1532 '_type': 'url',
13b2ae29
SS
1533 'url': get_binding_value('card_url'),
1534 }
1535 elif card_name == 'unified_card':
7a26ce26
SS
1536 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1537 yield from map(extract_from_video_info, traverse_obj(
1538 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1539 # amplify, promo_video_website, promo_video_convo, appplayer,
1540 # video_direct_message, poll2choice_video, poll3choice_video,
1541 # poll4choice_video, ...
1542 else:
1543 is_amplify = card_name == 'amplify'
1544 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1545 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1546 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1547
1548 thumbnails = []
1549 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1550 image = get_binding_value('player_image' + suffix) or {}
1551 image_url = image.get('url')
1552 if not image_url or '/player-placeholder' in image_url:
1553 continue
1554 thumbnails.append({
1555 'id': suffix[1:] if suffix else 'medium',
1556 'url': image_url,
1557 'width': int_or_none(image.get('width')),
1558 'height': int_or_none(image.get('height')),
1559 })
1560
7a26ce26 1561 yield {
13b2ae29
SS
1562 'formats': formats,
1563 'subtitles': subtitles,
1564 'thumbnails': thumbnails,
1565 'duration': int_or_none(get_binding_value(
1566 'content_duration_seconds')),
1567 }
1568
b6795fd3 1569 videos = traverse_obj(status, (
b03fa783 1570 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1571
b6795fd3
SS
1572 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1573 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1574 else:
92315c03 1575 desired_obj = traverse_obj(status, (
1576 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
b6795fd3
SS
1577 if not desired_obj:
1578 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1579 elif desired_obj.get('type') != 'video':
1580 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1581
1582 # Restore original archive id and video index in title
1583 for index, entry in enumerate(videos, 1):
1584 if entry.get('id') != desired_obj.get('id'):
1585 continue
1586 if index == 1:
1587 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1588 if len(videos) != 1:
1589 info['title'] += f' #{index}'
1590 break
1591
1592 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1593
1594 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1595 if not entries:
1596 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1597 if not expanded_url or expanded_url == url:
147e62fc 1598 self.raise_no_formats('No video could be found in this tweet', expected=True)
1599 return info
13b2ae29
SS
1600
1601 return self.url_result(expanded_url, display_id=twid, **info)
1602
1603 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1604
1605 if len(entries) == 1:
1606 return entries[0]
1607
1608 for index, entry in enumerate(entries, 1):
1609 entry['title'] += f' #{index}'
1610
1611 return self.playlist_result(entries, **info)
445d72b8
YCH
1612
1613
1614class TwitterAmplifyIE(TwitterBaseIE):
1615 IE_NAME = 'twitter:amplify'
25042f73 1616 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1617
1618 _TEST = {
1619 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1620 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1621 'info_dict': {
1622 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1623 'ext': 'mp4',
1624 'title': 'Twitter Video',
bdbf4ba4 1625 'thumbnail': 're:^https?://.*',
445d72b8 1626 },
7a26ce26 1627 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1628 }
1629
1630 def _real_extract(self, url):
1631 video_id = self._match_id(url)
1632 webpage = self._download_webpage(url, video_id)
1633
1634 vmap_url = self._html_search_meta(
1635 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1636 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1637
bdbf4ba4
YCH
1638 thumbnails = []
1639 thumbnail = self._html_search_meta(
1640 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1641
1642 def _find_dimension(target):
1643 w = int_or_none(self._html_search_meta(
add96eb9 1644 f'twitter:{target}:width', webpage, fatal=False))
bdbf4ba4 1645 h = int_or_none(self._html_search_meta(
add96eb9 1646 f'twitter:{target}:height', webpage, fatal=False))
bdbf4ba4
YCH
1647 return w, h
1648
1649 if thumbnail:
1650 thumbnail_w, thumbnail_h = _find_dimension('image')
1651 thumbnails.append({
1652 'url': thumbnail,
1653 'width': thumbnail_w,
1654 'height': thumbnail_h,
1655 })
1656
1657 video_w, video_h = _find_dimension('player')
9be31e77 1658 formats[0].update({
bdbf4ba4
YCH
1659 'width': video_w,
1660 'height': video_h,
9be31e77 1661 })
bdbf4ba4 1662
445d72b8
YCH
1663 return {
1664 'id': video_id,
1665 'title': 'Twitter Video',
bdbf4ba4
YCH
1666 'formats': formats,
1667 'thumbnails': thumbnails,
445d72b8 1668 }
18ca61c5
RA
1669
1670
1671class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1672 IE_NAME = 'twitter:broadcast'
1673 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1674
7d337ca9 1675 _TESTS = [{
7b0b53ea
S
1676 # untitled Periscope video
1677 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1678 'info_dict': {
1679 'id': '1yNGaQLWpejGj',
1680 'ext': 'mp4',
1681 'title': 'Andrea May Sahouri - Periscope Broadcast',
1682 'uploader': 'Andrea May Sahouri',
7d337ca9
H
1683 'uploader_id': 'andreamsahouri',
1684 'uploader_url': 'https://twitter.com/andreamsahouri',
1685 'timestamp': 1590973638,
1686 'upload_date': '20200601',
7a26ce26
SS
1687 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1688 'view_count': int,
7b0b53ea 1689 },
7d337ca9
H
1690 }, {
1691 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1692 'info_dict': {
1693 'id': '1ZkKzeyrPbaxv',
1694 'ext': 'mp4',
1695 'title': 'Starship | SN10 | High-Altitude Flight Test',
1696 'uploader': 'SpaceX',
1697 'uploader_id': 'SpaceX',
1698 'uploader_url': 'https://twitter.com/SpaceX',
1699 'timestamp': 1614812942,
1700 'upload_date': '20210303',
1701 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1702 'view_count': int,
1703 },
1704 }, {
1705 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1706 'info_dict': {
1707 'id': '1OyKAVQrgzwGb',
1708 'ext': 'mp4',
1709 'title': 'Starship Flight Test',
1710 'uploader': 'SpaceX',
1711 'uploader_id': 'SpaceX',
1712 'uploader_url': 'https://twitter.com/SpaceX',
1713 'timestamp': 1681993964,
1714 'upload_date': '20230420',
1715 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1716 'view_count': int,
1717 },
1718 }]
7b0b53ea 1719
18ca61c5
RA
1720 def _real_extract(self, url):
1721 broadcast_id = self._match_id(url)
1722 broadcast = self._call_api(
1723 'broadcasts/show.json', broadcast_id,
1724 {'ids': broadcast_id})['broadcasts'][broadcast_id]
a006ce2b 1725 if not broadcast:
1726 raise ExtractorError('Broadcast no longer exists', expected=True)
18ca61c5 1727 info = self._parse_broadcast_data(broadcast, broadcast_id)
7d337ca9
H
1728 info['title'] = broadcast.get('status') or info.get('title')
1729 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1730 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
f6e97090 1731 if info['live_status'] == 'is_upcoming':
1732 return info
1733
18ca61c5
RA
1734 media_key = broadcast['media_key']
1735 source = self._call_api(
7a26ce26 1736 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1737 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1738 if '/live_video_stream/geoblocked/' in m3u8_url:
1739 self.raise_geo_restricted()
add96eb9 1740 m3u8_id = urllib.parse.parse_qs(urllib.parse.urlparse(
18ca61c5
RA
1741 m3u8_url).query).get('type', [None])[0]
1742 state, width, height = self._extract_common_format_info(broadcast)
1743 info['formats'] = self._extract_pscp_m3u8_formats(
1744 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1745 return info
86b868c6
U
1746
1747
7a26ce26
SS
1748class TwitterSpacesIE(TwitterBaseIE):
1749 IE_NAME = 'twitter:spaces'
1750 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1751
1752 _TESTS = [{
1753 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1754 'info_dict': {
1755 'id': '1RDxlgyvNXzJL',
1756 'ext': 'm4a',
1757 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1758 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1759 'uploader': r're:Lucio Di Gaetano.*?',
1760 'uploader_id': 'luciodigaetano',
1761 'live_status': 'was_live',
1cffd621 1762 'timestamp': 1659877956,
1763 'upload_date': '20220807',
1764 'release_timestamp': 1659904215,
1765 'release_date': '20220807',
7a26ce26
SS
1766 },
1767 'params': {'skip_download': 'm3u8'},
613dbce1 1768 }, {
1769 # post_live/TimedOut but downloadable
1770 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1771 'info_dict': {
1772 'id': '1vAxRAVQWONJl',
1773 'ext': 'm4a',
1774 'title': 'Framing Up FinOps: Billing Tools',
1775 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1776 'uploader': 'Google Cloud',
1777 'uploader_id': 'googlecloud',
1778 'live_status': 'post_live',
1779 'timestamp': 1681409554,
1780 'upload_date': '20230413',
1781 'release_timestamp': 1681839000,
1782 'release_date': '20230418',
1783 },
1784 'params': {'skip_download': 'm3u8'},
1785 }, {
1786 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1787 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1788 'info_dict': {
1789 'id': '1eaKbrQbjoRKX',
1790 'ext': 'm4a',
1791 'title': 'あ',
1792 'description': 'Twitter Space participated by nobody yet',
1793 'uploader': '息根とめる🔪Twitchで復活',
1794 'uploader_id': 'tomeru_ikinone',
1795 'live_status': 'was_live',
1796 'timestamp': 1685617198,
1797 'upload_date': '20230601',
1798 },
1799 'params': {'skip_download': 'm3u8'},
7a26ce26
SS
1800 }]
1801
1802 SPACE_STATUS = {
1803 'notstarted': 'is_upcoming',
1804 'ended': 'was_live',
1805 'running': 'is_live',
1806 'timedout': 'post_live',
1807 }
1808
1809 def _build_graphql_query(self, space_id):
1810 return {
1811 'variables': {
1812 'id': space_id,
1813 'isMetatagsQuery': True,
1814 'withDownvotePerspective': False,
1815 'withReactionsMetadata': False,
1816 'withReactionsPerspective': False,
1817 'withReplays': True,
1818 'withSuperFollowsUserFields': True,
1819 'withSuperFollowsTweetFields': True,
1820 },
1821 'features': {
1822 'dont_mention_me_view_api_enabled': True,
1823 'interactive_text_enabled': True,
1824 'responsive_web_edit_tweet_api_enabled': True,
1825 'responsive_web_enhance_cards_enabled': True,
1826 'responsive_web_uc_gql_enabled': True,
1827 'spaces_2022_h2_clipping': True,
1828 'spaces_2022_h2_spaces_communities': False,
1829 'standardized_nudges_misinfo': True,
1830 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1831 'vibe_api_enabled': True,
1832 },
1833 }
1834
1835 def _real_extract(self, url):
1836 space_id = self._match_id(url)
92315c03 1837 if not self.is_logged_in:
1838 self.raise_login_required('Twitter Spaces require authentication')
7a26ce26
SS
1839 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1840 if not space_data:
1841 raise ExtractorError('Twitter Space not found', expected=True)
1842
1843 metadata = space_data['metadata']
1844 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1cffd621 1845 is_live = live_status == 'is_live'
7a26ce26
SS
1846
1847 formats = []
c6ef5537 1848 headers = {'Referer': 'https://twitter.com/'}
7a26ce26
SS
1849 if live_status == 'is_upcoming':
1850 self.raise_no_formats('Twitter Space not started yet', expected=True)
1cffd621 1851 elif not is_live and not metadata.get('is_space_available_for_replay'):
1852 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1853 elif metadata.get('media_key'):
1854 source = traverse_obj(
1855 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1856 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
613dbce1 1857 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1858 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
c6ef5537 1859 headers=headers, fatal=False) if source else []
7a26ce26
SS
1860 for fmt in formats:
1861 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1cffd621 1862 if not is_live:
1863 fmt['container'] = 'm4a_dash'
7a26ce26
SS
1864
1865 participants = ', '.join(traverse_obj(
1866 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1cffd621 1867
1868 if not formats and live_status == 'post_live':
1869 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1870
7a26ce26
SS
1871 return {
1872 'id': space_id,
1873 'title': metadata.get('title'),
1874 'description': f'Twitter Space participated by {participants}',
1875 'uploader': traverse_obj(
1876 metadata, ('creator_results', 'result', 'legacy', 'name')),
1877 'uploader_id': traverse_obj(
1878 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1879 'live_status': live_status,
1c16d9df
C
1880 'release_timestamp': try_call(
1881 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1cffd621 1882 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
7a26ce26 1883 'formats': formats,
c6ef5537 1884 'http_headers': headers,
7a26ce26
SS
1885 }
1886
1887
86b868c6
U
1888class TwitterShortenerIE(TwitterBaseIE):
1889 IE_NAME = 'twitter:shortener'
b634ba74 1890 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
a537ab1a 1891 _BASE_URL = 'https://t.co/'
86b868c6
U
1892
1893 def _real_extract(self, url):
5ad28e7f 1894 mobj = self._match_valid_url(url)
add96eb9 1895 eid, shortcode = mobj.group('eid', 'id')
a537ab1a 1896 if eid:
add96eb9 1897 shortcode = eid
1898 url = self._BASE_URL + shortcode
1899 new_url = self._request_webpage(url, shortcode, headers={'User-Agent': 'curl'}).url
1900 __UNSAFE_LINK = 'https://twitter.com/safety/unsafe_link_warning?unsafe_link='
a537ab1a 1901 if new_url.startswith(__UNSAFE_LINK):
add96eb9 1902 new_url = new_url.replace(__UNSAFE_LINK, '')
9e20a9c4 1903 return self.url_result(new_url)