]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
e897bd82 1import functools
7a26ce26 2import json
a006ce2b 3import random
23e7cba8
S
4import re
5
6from .common import InfoExtractor
13b2ae29 7from .periscope import PeriscopeBaseIE, PeriscopeIE
18ca61c5 8from ..compat import (
18ca61c5
RA
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12)
116c2684 13from ..networking.exceptions import HTTPError
23e7cba8 14from ..utils import (
2edfd745 15 ExtractorError,
13b2ae29 16 dict_get,
92315c03 17 filter_dict,
23e7cba8 18 float_or_none,
13b2ae29 19 format_field,
cf5881fc 20 int_or_none,
13b2ae29 21 make_archive_id,
147e62fc 22 remove_end,
13b2ae29
SS
23 str_or_none,
24 strip_or_none,
f1150b9e 25 traverse_obj,
7a26ce26 26 try_call,
2edfd745 27 try_get,
18ca61c5
RA
28 unified_timestamp,
29 update_url_query,
41d1cca3 30 url_or_none,
2edfd745 31 xpath_text,
23e7cba8
S
32)
33
34
445d72b8 35class TwitterBaseIE(InfoExtractor):
d1795f4a 36 _NETRC_MACHINE = 'twitter'
3e35aa32 37 _API_BASE = 'https://api.x.com/1.1/'
38 _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
4813173e 39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
92315c03 40 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
41 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
d1795f4a 42 _flow_token = None
43
44 _LOGIN_INIT_DATA = json.dumps({
45 'input_flow_data': {
46 'flow_context': {
47 'debug_overrides': {},
48 'start_location': {
49 'location': 'unknown'
50 }
51 }
52 },
53 'subtask_versions': {
54 'action_list': 2,
55 'alert_dialog': 1,
56 'app_download_cta': 1,
57 'check_logged_in_account': 1,
58 'choice_selection': 3,
59 'contacts_live_sync_permission_prompt': 0,
60 'cta': 7,
61 'email_verification': 2,
62 'end_flow': 1,
63 'enter_date': 1,
64 'enter_email': 2,
65 'enter_password': 5,
66 'enter_phone': 2,
67 'enter_recaptcha': 1,
68 'enter_text': 5,
69 'enter_username': 2,
70 'generic_urt': 3,
71 'in_app_notification': 1,
72 'interest_picker': 3,
73 'js_instrumentation': 1,
74 'menu_dialog': 1,
75 'notifications_permission_prompt': 2,
76 'open_account': 2,
77 'open_home_timeline': 1,
78 'open_link': 1,
79 'phone_verification': 4,
80 'privacy_options': 1,
81 'security_key': 3,
82 'select_avatar': 4,
83 'select_banner': 2,
84 'settings_list': 7,
85 'show_code': 1,
86 'sign_up': 2,
87 'sign_up_review': 4,
88 'tweet_selection_urt': 1,
89 'update_users': 1,
90 'upload_media': 1,
91 'user_recommendations_list': 4,
92 'user_recommendations_urt': 1,
93 'wait_spinner': 3,
94 'web_modal': 1
95 }
96 }, separators=(',', ':')).encode()
18ca61c5
RA
97
98 def _extract_variant_formats(self, variant, video_id):
99 variant_url = variant.get('url')
100 if not variant_url:
4bed4363 101 return [], {}
18ca61c5 102 elif '.m3u8' in variant_url:
28e53d60 103 fmts, subs = self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
104 variant_url, video_id, 'mp4', 'm3u8_native',
105 m3u8_id='hls', fatal=False)
28e53d60 106 for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
107 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
108 f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
109 return fmts, subs
18ca61c5
RA
110 else:
111 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
112 f = {
113 'url': variant_url,
114 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
115 'tbr': tbr,
116 }
117 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 118 return [f], {}
18ca61c5 119
9be31e77 120 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 121 vmap_url = url_or_none(vmap_url)
122 if not vmap_url:
f1150b9e 123 return [], {}
445d72b8 124 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 125 formats = []
4bed4363 126 subtitles = {}
18ca61c5
RA
127 urls = []
128 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
129 video_variant.attrib['url'] = compat_urllib_parse_unquote(
130 video_variant.attrib['url'])
131 urls.append(video_variant.attrib['url'])
4bed4363
F
132 fmts, subs = self._extract_variant_formats(
133 video_variant.attrib, video_id)
134 formats.extend(fmts)
135 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
136 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
137 if video_url not in urls:
4bed4363
F
138 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
139 formats.extend(fmts)
140 subtitles = self._merge_subtitles(subtitles, subs)
141 return formats, subtitles
445d72b8 142
2edfd745
YCH
143 @staticmethod
144 def _search_dimensions_in_video_url(a_format, video_url):
145 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
146 if m:
147 a_format.update({
148 'width': int(m.group('width')),
149 'height': int(m.group('height')),
150 })
151
d1795f4a 152 @property
7a26ce26
SS
153 def is_logged_in(self):
154 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
155
3e35aa32 156 # XXX: Temporary workaround until twitter.com => x.com migration is completed
157 def _real_initialize(self):
158 if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
159 return
160 # User has not yet been migrated to x.com and has passed twitter.com cookies
161 TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
162 TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
163
a006ce2b 164 @functools.cached_property
165 def _selected_api(self):
166 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
167
92315c03 168 def _fetch_guest_token(self, display_id):
169 guest_token = traverse_obj(self._download_json(
170 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
a006ce2b 171 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
92315c03 172 ('guest_token', {str}))
173 if not guest_token:
b03fa783 174 raise ExtractorError('Could not retrieve guest token')
92315c03 175 return guest_token
b03fa783 176
92315c03 177 def _set_base_headers(self, legacy=False):
178 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
179 return filter_dict({
180 'Authorization': f'Bearer {bearer_token}',
181 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
182 })
d1795f4a 183
184 def _call_login_api(self, note, headers, query={}, data=None):
185 response = self._download_json(
186 f'{self._API_BASE}onboarding/task.json', None, note,
187 headers=headers, query=query, data=data, expected_status=400)
188 error = traverse_obj(response, ('errors', 0, 'message', {str}))
189 if error:
190 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
191 elif traverse_obj(response, 'status') != 'success':
192 raise ExtractorError('Login was unsuccessful')
193
194 subtask = traverse_obj(
195 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
196 if not subtask:
197 raise ExtractorError('Twitter API did not return next login subtask')
198
199 self._flow_token = response['flow_token']
7a26ce26 200
d1795f4a 201 return subtask
202
203 def _perform_login(self, username, password):
204 if self.is_logged_in:
205 return
206
3e35aa32 207 guest_token = self._fetch_guest_token(None)
92315c03 208 headers = {
209 **self._set_base_headers(),
d1795f4a 210 'content-type': 'application/json',
92315c03 211 'x-guest-token': guest_token,
d1795f4a 212 'x-twitter-client-language': 'en',
213 'x-twitter-active-user': 'yes',
3e35aa32 214 'Referer': 'https://x.com/',
215 'Origin': 'https://x.com',
92315c03 216 }
d1795f4a 217
218 def build_login_json(*subtask_inputs):
219 return json.dumps({
220 'flow_token': self._flow_token,
221 'subtask_inputs': subtask_inputs
222 }, separators=(',', ':')).encode()
223
224 def input_dict(subtask_id, text):
225 return {
226 'subtask_id': subtask_id,
227 'enter_text': {
228 'text': text,
229 'link': 'next_link'
230 }
231 }
7a26ce26 232
d1795f4a 233 next_subtask = self._call_login_api(
234 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
235
236 while not self.is_logged_in:
237 if next_subtask == 'LoginJsInstrumentationSubtask':
238 next_subtask = self._call_login_api(
239 'Submitting JS instrumentation response', headers, data=build_login_json({
240 'subtask_id': next_subtask,
241 'js_instrumentation': {
242 'response': '{}',
243 'link': 'next_link'
244 }
245 }))
246
247 elif next_subtask == 'LoginEnterUserIdentifierSSO':
248 next_subtask = self._call_login_api(
249 'Submitting username', headers, data=build_login_json({
250 'subtask_id': next_subtask,
251 'settings_list': {
252 'setting_responses': [{
253 'key': 'user_identifier',
254 'response_data': {
255 'text_data': {
256 'result': username
257 }
258 }
259 }],
260 'link': 'next_link'
261 }
262 }))
263
264 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
265 next_subtask = self._call_login_api(
266 'Submitting alternate identifier', headers,
267 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
268 'one of username, phone number or email that was not used as --username'))))
269
270 elif next_subtask == 'LoginEnterPassword':
271 next_subtask = self._call_login_api(
272 'Submitting password', headers, data=build_login_json({
273 'subtask_id': next_subtask,
274 'enter_password': {
275 'password': password,
276 'link': 'next_link'
277 }
278 }))
279
280 elif next_subtask == 'AccountDuplicationCheck':
281 next_subtask = self._call_login_api(
282 'Submitting account duplication check', headers, data=build_login_json({
283 'subtask_id': next_subtask,
284 'check_logged_in_account': {
285 'link': 'AccountDuplicationCheck_false'
286 }
287 }))
288
289 elif next_subtask == 'LoginTwoFactorAuthChallenge':
290 next_subtask = self._call_login_api(
291 'Submitting 2FA token', headers, data=build_login_json(input_dict(
292 next_subtask, self._get_tfa_info('two-factor authentication token'))))
293
294 elif next_subtask == 'LoginAcid':
295 next_subtask = self._call_login_api(
296 'Submitting confirmation code', headers, data=build_login_json(input_dict(
297 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
298
6014355c 299 elif next_subtask == 'ArkoseLogin':
300 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
301
302 elif next_subtask == 'DenyLoginSubtask':
303 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
304
d1795f4a 305 elif next_subtask == 'LoginSuccessSubtask':
306 raise ExtractorError('Twitter API did not grant auth token cookie')
307
308 else:
309 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
310
311 self.report_login()
312
313 def _call_api(self, path, video_id, query={}, graphql=False):
a006ce2b 314 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
92315c03 315 headers.update({
316 'x-twitter-auth-type': 'OAuth2Session',
317 'x-twitter-client-language': 'en',
318 'x-twitter-active-user': 'yes',
319 } if self.is_logged_in else {
320 'x-guest-token': self._fetch_guest_token(video_id)
321 })
322 allowed_status = {400, 401, 403, 404} if graphql else {403}
323 result = self._download_json(
324 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
325 video_id, headers=headers, query=query, expected_status=allowed_status,
326 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
b03fa783 327
92315c03 328 if result.get('errors'):
329 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
6014355c 330 if errors and 'not authorized' in errors:
331 self.raise_login_required(remove_end(errors, '.'))
332 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
b03fa783 333
92315c03 334 return result
7a26ce26
SS
335
336 def _build_graphql_query(self, media_id):
337 raise NotImplementedError('Method must be implemented to support GraphQL')
338
339 def _call_graphql_api(self, endpoint, media_id):
340 data = self._build_graphql_query(media_id)
341 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
342 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
343
344
345class TwitterCardIE(InfoExtractor):
014e8803 346 IE_NAME = 'twitter:card'
18ca61c5 347 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 348 _TESTS = [
349 {
350 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 351 # MD5 checksums are different in different places
c3dea3f8 352 'info_dict': {
7a26ce26 353 'id': '560070131976392705',
c3dea3f8 354 'ext': 'mp4',
18ca61c5
RA
355 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
356 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
357 'uploader': 'Twitter',
358 'uploader_id': 'Twitter',
359 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 360 'duration': 30.033,
18ca61c5
RA
361 'timestamp': 1422366112,
362 'upload_date': '20150127',
7a26ce26
SS
363 'age_limit': 0,
364 'comment_count': int,
365 'tags': [],
366 'repost_count': int,
367 'like_count': int,
368 'display_id': '560070183650213889',
369 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 370 },
23e7cba8 371 },
c3dea3f8 372 {
373 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 374 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 375 'info_dict': {
376 'id': '623160978427936768',
377 'ext': 'mp4',
18ca61c5
RA
378 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
379 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
380 'uploader': 'NASA',
381 'uploader_id': 'NASA',
382 'timestamp': 1437408129,
383 'upload_date': '20150720',
7a26ce26
SS
384 'uploader_url': 'https://twitter.com/NASA',
385 'age_limit': 0,
386 'comment_count': int,
387 'like_count': int,
388 'repost_count': int,
389 'tags': ['PlutoFlyby'],
c3dea3f8 390 },
7a26ce26 391 'params': {'format': '[protocol=https]'}
4a7b7903
YCH
392 },
393 {
394 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 395 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
396 'info_dict': {
397 'id': 'dq4Oj5quskI',
398 'ext': 'mp4',
399 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 400 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 401 'upload_date': '20111013',
18ca61c5 402 'uploader': 'OMG! UBUNTU!',
4a7b7903 403 'uploader_id': 'omgubuntu',
7a26ce26
SS
404 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
405 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
406 'channel_follower_count': int,
407 'chapters': 'count:8',
408 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
409 'duration': 138,
410 'categories': ['Film & Animation'],
411 'age_limit': 0,
412 'comment_count': int,
413 'availability': 'public',
414 'like_count': int,
415 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
416 'view_count': int,
417 'tags': 'count:12',
418 'channel': 'OMG! UBUNTU!',
419 'playable_in_embed': True,
4a7b7903 420 },
31752f76 421 'add_ie': ['Youtube'],
5f1b2aea
YCH
422 },
423 {
424 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
425 'info_dict': {
426 'id': 'iBb2x00UVlv',
427 'ext': 'mp4',
428 'upload_date': '20151113',
429 'uploader_id': '1189339351084113920',
acb6e97e
YCH
430 'uploader': 'ArsenalTerje',
431 'title': 'Vine by ArsenalTerje',
e8f20ffa 432 'timestamp': 1447451307,
7a26ce26
SS
433 'alt_title': 'Vine by ArsenalTerje',
434 'comment_count': int,
435 'like_count': int,
436 'thumbnail': r're:^https?://[^?#]+\.jpg',
437 'view_count': int,
438 'repost_count': int,
5f1b2aea
YCH
439 },
440 'add_ie': ['Vine'],
7a26ce26
SS
441 'params': {'skip_download': 'm3u8'},
442 },
443 {
0ae937a7 444 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 445 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
446 'info_dict': {
447 'id': '705235433198714880',
448 'ext': 'mp4',
18ca61c5
RA
449 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
450 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
451 'uploader': 'Brent Yarina',
452 'uploader_id': 'BTNBrentYarina',
453 'timestamp': 1456976204,
454 'upload_date': '20160303',
0ae937a7 455 },
18ca61c5 456 'skip': 'This content is no longer available.',
7a26ce26
SS
457 },
458 {
748a462f
S
459 'url': 'https://twitter.com/i/videos/752274308186120192',
460 'only_matching': True,
0ae937a7 461 },
c3dea3f8 462 ]
23e7cba8
S
463
464 def _real_extract(self, url):
18ca61c5
RA
465 status_id = self._match_id(url)
466 return self.url_result(
467 'https://twitter.com/statuses/' + status_id,
468 TwitterIE.ie_key(), status_id)
c8398a9b 469
03879ff0 470
18ca61c5 471class TwitterIE(TwitterBaseIE):
014e8803 472 IE_NAME = 'twitter'
b6795fd3 473 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 474
cf5881fc 475 _TESTS = [{
48aae2d2 476 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 477 'info_dict': {
13b2ae29
SS
478 'id': '643211870443208704',
479 'display_id': '643211948184596480',
f57f84f6 480 'ext': 'mp4',
575036b4 481 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 482 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 483 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
55f18333 484 'channel_id': '549749560',
48aae2d2
YCH
485 'uploader': 'FREE THE NIPPLE',
486 'uploader_id': 'freethenipple',
3b65a6fb 487 'duration': 12.922,
18ca61c5
RA
488 'timestamp': 1442188653,
489 'upload_date': '20150913',
13b2ae29 490 'uploader_url': 'https://twitter.com/freethenipple',
b03fa783 491 'comment_count': int,
492 'repost_count': int,
13b2ae29
SS
493 'like_count': int,
494 'tags': [],
495 'age_limit': 18,
1c54a98e 496 '_old_archive_ids': ['twitter 643211948184596480'],
f57f84f6 497 },
55f18333 498 'skip': 'Requires authentication',
cf5881fc
YCH
499 }, {
500 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
501 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
502 'info_dict': {
503 'id': '657991469417025536',
504 'ext': 'mp4',
505 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
506 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 507 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
508 'uploader': 'Gifs',
509 'uploader_id': 'giphz',
510 },
7efc1c2b 511 'expected_warnings': ['height', 'width'],
fc0a45fa 512 'skip': 'Account suspended',
b703ebee
JMF
513 }, {
514 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
515 'info_dict': {
516 'id': '665052190608723968',
13b2ae29 517 'display_id': '665052190608723968',
b703ebee 518 'ext': 'mp4',
b6795fd3 519 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 520 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
55f18333 521 'channel_id': '20106852',
b703ebee 522 'uploader_id': 'starwars',
7a26ce26 523 'uploader': r're:Star Wars.*',
18ca61c5
RA
524 'timestamp': 1447395772,
525 'upload_date': '20151113',
13b2ae29 526 'uploader_url': 'https://twitter.com/starwars',
b03fa783 527 'comment_count': int,
528 'repost_count': int,
13b2ae29
SS
529 'like_count': int,
530 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
531 'age_limit': 0,
1c54a98e 532 '_old_archive_ids': ['twitter 665052190608723968'],
b703ebee 533 },
0ae937a7
YCH
534 }, {
535 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
536 'info_dict': {
537 'id': '705235433198714880',
538 'ext': 'mp4',
18ca61c5
RA
539 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
540 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
541 'uploader_id': 'BTNBrentYarina',
542 'uploader': 'Brent Yarina',
18ca61c5
RA
543 'timestamp': 1456976204,
544 'upload_date': '20160303',
13b2ae29
SS
545 'uploader_url': 'https://twitter.com/BTNBrentYarina',
546 'comment_count': int,
547 'repost_count': int,
548 'like_count': int,
549 'tags': [],
550 'age_limit': 0,
0ae937a7
YCH
551 },
552 'params': {
553 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
554 # Test case of TwitterCardIE
555 'skip_download': True,
556 },
352e7d98 557 'skip': 'Dead external link',
03879ff0
YCH
558 }, {
559 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 560 'info_dict': {
13b2ae29
SS
561 'id': '700207414000242688',
562 'display_id': '700207533655363584',
03879ff0 563 'ext': 'mp4',
13b2ae29 564 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 565 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 566 'thumbnail': r're:^https?://.*\.jpg',
55f18333 567 'channel_id': '1383165541',
13b2ae29
SS
568 'uploader': 'jaydin donte geer',
569 'uploader_id': 'jaydingeer',
3b65a6fb 570 'duration': 30.0,
18ca61c5
RA
571 'timestamp': 1455777459,
572 'upload_date': '20160218',
13b2ae29 573 'uploader_url': 'https://twitter.com/jaydingeer',
b03fa783 574 'comment_count': int,
575 'repost_count': int,
13b2ae29
SS
576 'like_count': int,
577 'tags': ['Damndaniel'],
578 'age_limit': 0,
1c54a98e 579 '_old_archive_ids': ['twitter 700207533655363584'],
03879ff0 580 },
395fd4b0
YCH
581 }, {
582 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
583 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
584 'info_dict': {
585 'id': 'MIOxnrUteUd',
586 'ext': 'mp4',
18ca61c5
RA
587 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
588 'uploader': 'TAKUMA',
589 'uploader_id': '1004126642786242560',
3615bfe1 590 'timestamp': 1402826626,
395fd4b0 591 'upload_date': '20140615',
13b2ae29
SS
592 'thumbnail': r're:^https?://.*\.jpg',
593 'alt_title': 'Vine by TAKUMA',
594 'comment_count': int,
595 'repost_count': int,
596 'like_count': int,
597 'view_count': int,
395fd4b0
YCH
598 },
599 'add_ie': ['Vine'],
36b7d9db
YCH
600 }, {
601 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 602 'info_dict': {
13b2ae29
SS
603 'id': '717462543795523584',
604 'display_id': '719944021058060289',
36b7d9db
YCH
605 'ext': 'mp4',
606 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5 607 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
55f18333 608 'channel_id': '701615052',
18ca61c5 609 'uploader_id': 'CaptainAmerica',
36b7d9db 610 'uploader': 'Captain America',
3b65a6fb 611 'duration': 3.17,
18ca61c5
RA
612 'timestamp': 1460483005,
613 'upload_date': '20160412',
13b2ae29
SS
614 'uploader_url': 'https://twitter.com/CaptainAmerica',
615 'thumbnail': r're:^https?://.*\.jpg',
b03fa783 616 'comment_count': int,
617 'repost_count': int,
13b2ae29
SS
618 'like_count': int,
619 'tags': [],
620 'age_limit': 0,
1c54a98e 621 '_old_archive_ids': ['twitter 719944021058060289'],
36b7d9db 622 },
f0bc5a86
YCH
623 }, {
624 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
625 'info_dict': {
626 'id': '1zqKVVlkqLaKB',
627 'ext': 'mp4',
18ca61c5 628 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 629 'upload_date': '20160923',
18ca61c5
RA
630 'uploader_id': '1PmKqpJdOJQoY',
631 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 632 'timestamp': 1474613214,
13b2ae29 633 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
634 },
635 'add_ie': ['Periscope'],
1c54a98e 636 'skip': 'Broadcast not found',
2edfd745
YCH
637 }, {
638 # has mp4 formats via mobile API
639 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
640 'info_dict': {
6014355c 641 'id': '852077943283097602',
2edfd745
YCH
642 'ext': 'mp4',
643 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 644 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
55f18333 645 'channel_id': '2526757026',
2edfd745
YCH
646 'uploader': 'عالم الأخبار',
647 'uploader_id': 'news_al3alm',
3b65a6fb 648 'duration': 277.4,
18ca61c5
RA
649 'timestamp': 1492000653,
650 'upload_date': '20170412',
6014355c 651 'display_id': '852138619213144067',
652 'age_limit': 0,
653 'uploader_url': 'https://twitter.com/news_al3alm',
654 'thumbnail': r're:^https?://.*\.jpg',
655 'tags': [],
656 'repost_count': int,
6014355c 657 'like_count': int,
658 'comment_count': int,
1c54a98e 659 '_old_archive_ids': ['twitter 852138619213144067'],
2edfd745 660 },
5c1452e8
GF
661 }, {
662 'url': 'https://twitter.com/i/web/status/910031516746514432',
663 'info_dict': {
13b2ae29
SS
664 'id': '910030238373089285',
665 'display_id': '910031516746514432',
5c1452e8
GF
666 'ext': 'mp4',
667 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
668 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 669 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
55f18333 670 'channel_id': '2319432498',
5c1452e8
GF
671 'uploader': 'Préfet de Guadeloupe',
672 'uploader_id': 'Prefet971',
673 'duration': 47.48,
18ca61c5
RA
674 'timestamp': 1505803395,
675 'upload_date': '20170919',
13b2ae29 676 'uploader_url': 'https://twitter.com/Prefet971',
b03fa783 677 'comment_count': int,
678 'repost_count': int,
13b2ae29
SS
679 'like_count': int,
680 'tags': ['Maria'],
681 'age_limit': 0,
1c54a98e 682 '_old_archive_ids': ['twitter 910031516746514432'],
5c1452e8
GF
683 },
684 'params': {
685 'skip_download': True, # requires ffmpeg
686 },
2593725a
S
687 }, {
688 # card via api.twitter.com/1.1/videos/tweet/config
689 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
690 'info_dict': {
13b2ae29
SS
691 'id': '1001551417340022785',
692 'display_id': '1001551623938805763',
2593725a
S
693 'ext': 'mp4',
694 'title': 're:.*?Shep is on a roll today.*?',
695 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 696 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
55f18333 697 'channel_id': '255036353',
2593725a
S
698 'uploader': 'Lis Power',
699 'uploader_id': 'LisPower1',
700 'duration': 111.278,
18ca61c5
RA
701 'timestamp': 1527623489,
702 'upload_date': '20180529',
13b2ae29 703 'uploader_url': 'https://twitter.com/LisPower1',
b03fa783 704 'comment_count': int,
705 'repost_count': int,
13b2ae29
SS
706 'like_count': int,
707 'tags': [],
708 'age_limit': 0,
1c54a98e 709 '_old_archive_ids': ['twitter 1001551623938805763'],
2593725a
S
710 },
711 'params': {
712 'skip_download': True, # requires ffmpeg
713 },
b7ef93f0
S
714 }, {
715 'url': 'https://twitter.com/foobar/status/1087791357756956680',
716 'info_dict': {
13b2ae29
SS
717 'id': '1087791272830607360',
718 'display_id': '1087791357756956680',
b7ef93f0 719 'ext': 'mp4',
6014355c 720 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
b7ef93f0 721 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 722 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
6014355c 723 'uploader': 'X',
724 'uploader_id': 'X',
b7ef93f0 725 'duration': 61.567,
18ca61c5
RA
726 'timestamp': 1548184644,
727 'upload_date': '20190122',
6014355c 728 'uploader_url': 'https://twitter.com/X',
b03fa783 729 'comment_count': int,
730 'repost_count': int,
13b2ae29 731 'like_count': int,
b03fa783 732 'view_count': int,
13b2ae29
SS
733 'tags': [],
734 'age_limit': 0,
18ca61c5 735 },
a006ce2b 736 'skip': 'This Tweet is unavailable',
18ca61c5
RA
737 }, {
738 # not available in Periscope
739 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
740 'info_dict': {
741 'id': '1vOGwqejwoWxB',
742 'ext': 'mp4',
743 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
744 'uploader': 'Vivi',
745 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
746 'thumbnail': r're:^https?://.*\.jpg',
747 'tags': ['EduTECH2019'],
748 'view_count': int,
b7ef93f0 749 },
18ca61c5 750 'add_ie': ['TwitterBroadcast'],
a006ce2b 751 'skip': 'Broadcast no longer exists',
30a074c2 752 }, {
753 # unified card
754 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
755 'info_dict': {
13b2ae29
SS
756 'id': '1349774757969989634',
757 'display_id': '1349794411333394432',
30a074c2 758 'ext': 'mp4',
759 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
760 'thumbnail': r're:^https?://.*\.jpg',
761 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
55f18333 762 'channel_id': '18552281',
30a074c2 763 'uploader': 'Brooklyn Nets',
764 'uploader_id': 'BrooklynNets',
765 'duration': 324.484,
766 'timestamp': 1610651040,
767 'upload_date': '20210114',
13b2ae29 768 'uploader_url': 'https://twitter.com/BrooklynNets',
b03fa783 769 'comment_count': int,
770 'repost_count': int,
13b2ae29
SS
771 'like_count': int,
772 'tags': [],
773 'age_limit': 0,
1c54a98e 774 '_old_archive_ids': ['twitter 1349794411333394432'],
30a074c2 775 },
776 'params': {
777 'skip_download': True,
778 },
13b2ae29
SS
779 }, {
780 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
781 'info_dict': {
782 'id': '1577855447914409984',
783 'display_id': '1577855540407197696',
784 'ext': 'mp4',
55f18333 785 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
352e7d98 786 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 787 'upload_date': '20221006',
55f18333 788 'channel_id': '143077138',
789 'uploader': 'Oshtru',
13b2ae29
SS
790 'uploader_id': 'oshtru',
791 'uploader_url': 'https://twitter.com/oshtru',
792 'thumbnail': r're:^https?://.*\.jpg',
793 'duration': 30.03,
7a26ce26 794 'timestamp': 1665025050,
b03fa783 795 'comment_count': int,
796 'repost_count': int,
13b2ae29
SS
797 'like_count': int,
798 'tags': [],
799 'age_limit': 0,
1c54a98e 800 '_old_archive_ids': ['twitter 1577855540407197696'],
13b2ae29
SS
801 },
802 'params': {'skip_download': True},
803 }, {
804 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
805 'info_dict': {
806 'id': '1577719286659006464',
55f18333 807 'title': 'Ultima Reload - Test',
13b2ae29 808 'description': 'Test https://t.co/Y3KEZD7Dad',
55f18333 809 'channel_id': '168922496',
810 'uploader': 'Ultima Reload',
13b2ae29
SS
811 'uploader_id': 'UltimaShadowX',
812 'uploader_url': 'https://twitter.com/UltimaShadowX',
813 'upload_date': '20221005',
7a26ce26 814 'timestamp': 1664992565,
b03fa783 815 'comment_count': int,
816 'repost_count': int,
13b2ae29
SS
817 'like_count': int,
818 'tags': [],
819 'age_limit': 0,
820 },
821 'playlist_count': 4,
822 'params': {'skip_download': True},
7a26ce26
SS
823 }, {
824 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
825 'info_dict': {
826 'id': '1575559336759263233',
827 'display_id': '1575560063510810624',
828 'ext': 'mp4',
829 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
830 'thumbnail': r're:^https?://.*\.jpg',
831 'description': 'md5:95aea692fda36a12081b9629b02daa92',
55f18333 832 'channel_id': '1094109584',
7a26ce26
SS
833 'uploader': 'Max Olson',
834 'uploader_id': 'MesoMax919',
835 'uploader_url': 'https://twitter.com/MesoMax919',
836 'duration': 21.321,
837 'timestamp': 1664477766,
838 'upload_date': '20220929',
b03fa783 839 'comment_count': int,
840 'repost_count': int,
7a26ce26
SS
841 'like_count': int,
842 'tags': ['HurricaneIan'],
843 'age_limit': 0,
1c54a98e 844 '_old_archive_ids': ['twitter 1575560063510810624'],
7a26ce26
SS
845 },
846 }, {
a006ce2b 847 # Adult content, fails if not logged in
7a26ce26
SS
848 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
849 'info_dict': {
850 'id': '1575199163847000068',
851 'display_id': '1575199173472927762',
852 'ext': 'mp4',
853 'title': str,
854 'description': str,
55f18333 855 'channel_id': '1217167793541480450',
7a26ce26
SS
856 'uploader': str,
857 'uploader_id': 'Rizdraws',
858 'uploader_url': 'https://twitter.com/Rizdraws',
859 'upload_date': '20220928',
860 'timestamp': 1664391723,
16bed382 861 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
862 'like_count': int,
863 'repost_count': int,
864 'comment_count': int,
865 'age_limit': 18,
55f18333 866 'tags': [],
867 '_old_archive_ids': ['twitter 1575199173472927762'],
7a26ce26 868 },
a006ce2b 869 'params': {'skip_download': 'The media could not be played'},
147e62fc 870 'skip': 'Requires authentication',
7a26ce26 871 }, {
a006ce2b 872 # Playlist result only with graphql API
7a26ce26
SS
873 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
874 'playlist_mincount': 2,
875 'info_dict': {
876 'id': '1395079556562706435',
877 'title': str,
878 'tags': [],
55f18333 879 'channel_id': '21539378',
7a26ce26
SS
880 'uploader': str,
881 'like_count': int,
882 'upload_date': '20210519',
883 'age_limit': 0,
884 'repost_count': int,
147e62fc 885 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
7a26ce26
SS
886 'uploader_id': 'Srirachachau',
887 'comment_count': int,
888 'uploader_url': 'https://twitter.com/Srirachachau',
889 'timestamp': 1621447860,
890 },
891 }, {
7a26ce26
SS
892 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
893 'playlist_mincount': 2,
894 'info_dict': {
895 'id': '1578353380363501568',
896 'title': str,
55f18333 897 'channel_id': '2195866214',
7a26ce26
SS
898 'uploader_id': 'DavidToons_',
899 'repost_count': int,
900 'like_count': int,
901 'uploader': str,
902 'timestamp': 1665143744,
903 'uploader_url': 'https://twitter.com/DavidToons_',
147e62fc 904 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
7a26ce26
SS
905 'tags': [],
906 'comment_count': int,
907 'upload_date': '20221007',
908 'age_limit': 0,
909 },
910 }, {
911 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
912 'playlist_count': 2,
913 'info_dict': {
914 'id': '1578401165338976258',
915 'title': str,
916 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
55f18333 917 'channel_id': '19338359',
7a26ce26
SS
918 'uploader': str,
919 'uploader_id': 'primevideouk',
920 'timestamp': 1665155137,
921 'upload_date': '20221007',
922 'age_limit': 0,
923 'uploader_url': 'https://twitter.com/primevideouk',
b03fa783 924 'comment_count': int,
925 'repost_count': int,
7a26ce26
SS
926 'like_count': int,
927 'tags': ['TheRingsOfPower'],
928 },
929 }, {
930 # Twitter Spaces
931 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
932 'info_dict': {
933 'id': '1lPJqmBeeNAJb',
934 'ext': 'm4a',
935 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
936 'uploader': r're:Monique Camarra.+?',
937 'uploader_id': 'MoniqueCamarra',
938 'live_status': 'was_live',
1c16d9df 939 'release_timestamp': 1658417414,
a006ce2b 940 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
1cffd621 941 'timestamp': 1658407771,
942 'release_date': '20220721',
943 'upload_date': '20220721',
7a26ce26
SS
944 },
945 'add_ie': ['TwitterSpaces'],
946 'params': {'skip_download': 'm3u8'},
92315c03 947 'skip': 'Requires authentication',
16bed382 948 }, {
949 # URL specifies video number but --yes-playlist
950 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
951 'playlist_mincount': 2,
952 'info_dict': {
953 'id': '1600649710662213632',
954 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
955 'timestamp': 1670459604.0,
956 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
b03fa783 957 'comment_count': int,
16bed382 958 'uploader_id': 'CTVJLaidlaw',
55f18333 959 'channel_id': '80082014',
b03fa783 960 'repost_count': int,
16bed382 961 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
962 'upload_date': '20221208',
963 'age_limit': 0,
964 'uploader': 'Jocelyn Laidlaw',
965 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
966 'like_count': int,
967 },
968 }, {
969 # URL specifies video number and --no-playlist
970 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
971 'info_dict': {
972 'id': '1600649511827013632',
973 'ext': 'mp4',
147e62fc 974 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
16bed382 975 'thumbnail': r're:^https?://.+\.jpg',
976 'timestamp': 1670459604.0,
55f18333 977 'channel_id': '80082014',
16bed382 978 'uploader_id': 'CTVJLaidlaw',
979 'uploader': 'Jocelyn Laidlaw',
b03fa783 980 'repost_count': int,
981 'comment_count': int,
16bed382 982 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
983 'duration': 102.226,
984 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
985 'display_id': '1600649710662213632',
986 'like_count': int,
987 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
988 'upload_date': '20221208',
989 'age_limit': 0,
1c54a98e 990 '_old_archive_ids': ['twitter 1600649710662213632'],
16bed382 991 },
992 'params': {'noplaylist': True},
7543c9c9 993 }, {
994 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
995 # note the id different between extraction and url
996 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
997 'info_dict': {
998 'id': '1621117577354424321',
999 'display_id': '1621117700482416640',
1000 'ext': 'mp4',
1001 'title': '뽀 - 아 최우제 이동속도 봐',
1002 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
1003 'duration': 24.598,
55f18333 1004 'channel_id': '1281839411068432384',
7543c9c9 1005 'uploader': '뽀',
1006 'uploader_id': 's2FAKER',
1007 'uploader_url': 'https://twitter.com/s2FAKER',
1008 'upload_date': '20230202',
1009 'timestamp': 1675339553.0,
1010 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
1011 'age_limit': 18,
1012 'tags': [],
1013 'like_count': int,
b03fa783 1014 'repost_count': int,
1015 'comment_count': int,
1c54a98e 1016 '_old_archive_ids': ['twitter 1621117700482416640'],
7543c9c9 1017 },
55f18333 1018 'skip': 'Requires authentication',
b6795fd3
SS
1019 }, {
1020 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1021 'info_dict': {
1022 'id': '1599108643743473680',
1023 'display_id': '1599108751385972737',
1024 'ext': 'mp4',
1025 'title': '\u06ea - \U0001F48B',
55f18333 1026 'channel_id': '1347791436809441283',
b6795fd3
SS
1027 'uploader_url': 'https://twitter.com/hlo_again',
1028 'like_count': int,
1029 'uploader_id': 'hlo_again',
1030 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
b03fa783 1031 'repost_count': int,
b6795fd3 1032 'duration': 9.531,
b03fa783 1033 'comment_count': int,
b6795fd3
SS
1034 'upload_date': '20221203',
1035 'age_limit': 0,
1036 'timestamp': 1670092210.0,
1037 'tags': [],
1038 'uploader': '\u06ea',
1039 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1c54a98e 1040 '_old_archive_ids': ['twitter 1599108751385972737'],
b6795fd3
SS
1041 },
1042 'params': {'noplaylist': True},
1043 }, {
b6795fd3
SS
1044 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1045 'info_dict': {
1046 'id': '1600009362759733248',
1047 'display_id': '1600009574919962625',
1048 'ext': 'mp4',
55f18333 1049 'channel_id': '211814412',
b6795fd3
SS
1050 'uploader_url': 'https://twitter.com/MunTheShinobi',
1051 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
b6795fd3
SS
1052 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1053 'age_limit': 0,
a006ce2b 1054 'uploader': 'Mün',
b03fa783 1055 'repost_count': int,
b6795fd3 1056 'upload_date': '20221206',
a006ce2b 1057 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
b03fa783 1058 'comment_count': int,
b6795fd3
SS
1059 'like_count': int,
1060 'tags': [],
1061 'uploader_id': 'MunTheShinobi',
1062 'duration': 139.987,
1063 'timestamp': 1670306984.0,
1c54a98e 1064 '_old_archive_ids': ['twitter 1600009574919962625'],
b6795fd3 1065 },
cf605226 1066 }, {
a006ce2b 1067 # retweeted_status (private)
cf605226 1068 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1069 'info_dict': {
1070 'id': '1623274794488659969',
1071 'display_id': '1623739803874349067',
1072 'ext': 'mp4',
1073 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
92315c03 1074 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
cf605226 1075 'uploader': 'Johnny Bullets',
1076 'uploader_id': 'Johnnybull3ts',
1077 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1078 'age_limit': 0,
1079 'tags': [],
1080 'duration': 8.033,
1081 'timestamp': 1675853859.0,
1082 'upload_date': '20230208',
1083 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1084 'like_count': int,
b03fa783 1085 'repost_count': int,
cf605226 1086 },
6014355c 1087 'skip': 'Protected tweet',
92315c03 1088 }, {
a006ce2b 1089 # retweeted_status
1090 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
92315c03 1091 'info_dict': {
a006ce2b 1092 'id': '1694928337846538240',
92315c03 1093 'ext': 'mp4',
a006ce2b 1094 'display_id': '1695424220702888009',
1095 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1096 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
55f18333 1097 'channel_id': '15212187',
a006ce2b 1098 'uploader': 'Benny Johnson',
1099 'uploader_id': 'bennyjohnson',
1100 'uploader_url': 'https://twitter.com/bennyjohnson',
92315c03 1101 'age_limit': 0,
1102 'tags': [],
a006ce2b 1103 'duration': 45.001,
1104 'timestamp': 1692962814.0,
1105 'upload_date': '20230825',
1106 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
92315c03 1107 'like_count': int,
92315c03 1108 'repost_count': int,
1109 'comment_count': int,
1c54a98e 1110 '_old_archive_ids': ['twitter 1695424220702888009'],
92315c03 1111 },
a006ce2b 1112 }, {
1113 # retweeted_status w/ legacy API
1114 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1115 'info_dict': {
1116 'id': '1694928337846538240',
1117 'ext': 'mp4',
1118 'display_id': '1695424220702888009',
1119 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1120 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
55f18333 1121 'channel_id': '15212187',
a006ce2b 1122 'uploader': 'Benny Johnson',
1123 'uploader_id': 'bennyjohnson',
1124 'uploader_url': 'https://twitter.com/bennyjohnson',
1125 'age_limit': 0,
1126 'tags': [],
1127 'duration': 45.001,
1128 'timestamp': 1692962814.0,
1129 'upload_date': '20230825',
1130 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1131 'like_count': int,
1132 'repost_count': int,
1c54a98e 1133 '_old_archive_ids': ['twitter 1695424220702888009'],
a006ce2b 1134 },
1135 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1136 }, {
1137 # Broadcast embedded in tweet
1c54a98e 1138 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
a006ce2b 1139 'info_dict': {
1c54a98e 1140 'id': '1rmxPMjLzAXKN',
a006ce2b 1141 'ext': 'mp4',
1c54a98e 1142 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
a006ce2b 1143 'uploader': 'Jessica Dobson',
1c54a98e 1144 'uploader_id': 'JessicaDobsonWX',
1145 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1146 'timestamp': 1701566398,
1147 'upload_date': '20231203',
1148 'live_status': 'was_live',
1149 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1150 'concurrent_view_count': int,
a006ce2b 1151 'view_count': int,
1152 },
1153 'add_ie': ['TwitterBroadcast'],
1154 }, {
55f18333 1155 # Animated gif and quote tweet video
a006ce2b 1156 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1157 'playlist_mincount': 2,
1158 'info_dict': {
1159 'id': '1696256659889565950',
1160 'title': 'BAKOON - https://t.co/zom968d0a0',
1161 'description': 'https://t.co/zom968d0a0',
1162 'tags': [],
55f18333 1163 'channel_id': '1263540390',
a006ce2b 1164 'uploader': 'BAKOON',
1165 'uploader_id': 'BAKKOOONN',
1166 'uploader_url': 'https://twitter.com/BAKKOOONN',
1167 'age_limit': 18,
1168 'timestamp': 1693254077.0,
1169 'upload_date': '20230828',
1170 'like_count': int,
55f18333 1171 'comment_count': int,
1172 'repost_count': int,
a006ce2b 1173 },
55f18333 1174 'skip': 'Requires authentication',
1c54a98e 1175 }, {
1176 # "stale tweet" with typename "TweetWithVisibilityResults"
1177 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
55f18333 1178 'md5': '511377ff8dfa7545307084dca4dce319',
1c54a98e 1179 'info_dict': {
1180 'id': '1724883339285544960',
1181 'ext': 'mp4',
1182 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1183 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1184 'display_id': '1724884212803834154',
55f18333 1185 'channel_id': '337808606',
1c54a98e 1186 'uploader': 'Robert F. Kennedy Jr',
1187 'uploader_id': 'RobertKennedyJr',
1188 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1189 'upload_date': '20231115',
1190 'timestamp': 1700079417.0,
1191 'duration': 341.048,
1192 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1193 'tags': ['Kennedy24'],
1194 'repost_count': int,
1195 'like_count': int,
1196 'comment_count': int,
1197 'age_limit': 0,
1198 '_old_archive_ids': ['twitter 1724884212803834154'],
1199 },
4813173e 1200 }, {
1201 # x.com
1202 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1203 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1204 'info_dict': {
1205 'id': '1790637589910654976',
1206 'ext': 'mp4',
1207 'title': 'Historic Vids - One of the most intense moments in history',
1208 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1209 'display_id': '1790637656616943991',
1210 'uploader': 'Historic Vids',
1211 'uploader_id': 'historyinmemes',
1212 'uploader_url': 'https://twitter.com/historyinmemes',
1213 'channel_id': '855481986290524160',
1214 'upload_date': '20240515',
1215 'timestamp': 1715756260.0,
1216 'duration': 15.488,
1217 'tags': [],
1218 'comment_count': int,
1219 'repost_count': int,
1220 'like_count': int,
1221 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1222 'age_limit': 0,
1223 '_old_archive_ids': ['twitter 1790637656616943991'],
1224 }
82fb2357 1225 }, {
1226 # onion route
1227 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1228 'only_matching': True,
18ca61c5
RA
1229 }, {
1230 # Twitch Clip Embed
1231 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1232 'only_matching': True,
10a5091e
RA
1233 }, {
1234 # promo_video_website card
1235 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1236 'only_matching': True,
00dd0cd5 1237 }, {
1238 # promo_video_convo card
1239 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1240 'only_matching': True,
1241 }, {
1242 # appplayer card
1243 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1244 'only_matching': True,
30a074c2 1245 }, {
1246 # video_direct_message card
1247 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1248 'only_matching': True,
1249 }, {
1250 # poll2choice_video card
1251 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1252 'only_matching': True,
1253 }, {
1254 # poll3choice_video card
1255 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1256 'only_matching': True,
1257 }, {
1258 # poll4choice_video card
1259 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1260 'only_matching': True,
cf5881fc 1261 }]
f57f84f6 1262
a006ce2b 1263 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1264
1265 @property
1266 def _GRAPHQL_ENDPOINT(self):
1267 if self.is_logged_in:
1268 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1269 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1270
7a26ce26
SS
1271 def _graphql_to_legacy(self, data, twid):
1272 result = traverse_obj(data, (
1273 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1274 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
92315c03 1275 'tweet_results', 'result', ('tweet', None), {dict},
1276 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1277 data, ('tweetResult', 'result', {dict}), default={})
7a26ce26 1278
1c54a98e 1279 typename = result.get('__typename')
1280 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1281 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
7543c9c9 1282
7a26ce26 1283 if 'tombstone' in result:
147e62fc 1284 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
7a26ce26 1285 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1c54a98e 1286 elif typename == 'TweetUnavailable':
92315c03 1287 reason = result.get('reason')
1288 if reason == 'NsfwLoggedOut':
1289 self.raise_login_required('NSFW tweet requires authentication')
6014355c 1290 elif reason == 'Protected':
1291 self.raise_login_required('You are not authorized to view this protected tweet')
92315c03 1292 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1c54a98e 1293 # Result for "stale tweet" needs additional transformation
1294 elif typename == 'TweetWithVisibilityResults':
1295 result = traverse_obj(result, ('tweet', {dict})) or {}
7a26ce26
SS
1296
1297 status = result.get('legacy', {})
1298 status.update(traverse_obj(result, {
1299 'user': ('core', 'user_results', 'result', 'legacy'),
1300 'card': ('card', 'legacy'),
1301 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
a006ce2b 1302 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
7a26ce26
SS
1303 }, expected_type=dict, default={}))
1304
a006ce2b 1305 # extra transformations needed since result does not match legacy format
1306 if status.get('retweeted_status'):
1307 status['retweeted_status']['user'] = traverse_obj(status, (
1308 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1309
7a26ce26
SS
1310 binding_values = {
1311 binding_value.get('key'): binding_value.get('value')
147e62fc 1312 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
7a26ce26
SS
1313 }
1314 if binding_values:
1315 status['card']['binding_values'] = binding_values
1316
1317 return status
1318
1319 def _build_graphql_query(self, media_id):
1320 return {
1321 'variables': {
1322 'focalTweetId': media_id,
1323 'includePromotedContent': True,
1324 'with_rux_injections': False,
1325 'withBirdwatchNotes': True,
1326 'withCommunity': True,
1327 'withDownvotePerspective': False,
1328 'withQuickPromoteEligibilityTweetFields': True,
1329 'withReactionsMetadata': False,
1330 'withReactionsPerspective': False,
1331 'withSuperFollowsTweetFields': True,
1332 'withSuperFollowsUserFields': True,
1333 'withV2Timeline': True,
1334 'withVoice': True,
1335 },
1336 'features': {
1337 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1338 'interactive_text_enabled': True,
1339 'responsive_web_edit_tweet_api_enabled': True,
1340 'responsive_web_enhance_cards_enabled': True,
1341 'responsive_web_graphql_timeline_navigation_enabled': False,
1342 'responsive_web_text_conversations_enabled': False,
1343 'responsive_web_uc_gql_enabled': True,
1344 'standardized_nudges_misinfo': True,
1345 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1346 'tweetypie_unmention_optimization_enabled': True,
1347 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1348 'verified_phone_label_enabled': False,
1349 'vibe_api_enabled': True,
1350 },
92315c03 1351 } if self.is_logged_in else {
1352 'variables': {
1353 'tweetId': media_id,
1354 'withCommunity': False,
1355 'includePromotedContent': False,
1356 'withVoice': False,
1357 },
1358 'features': {
1359 'creator_subscriptions_tweet_preview_api_enabled': True,
1360 'tweetypie_unmention_optimization_enabled': True,
1361 'responsive_web_edit_tweet_api_enabled': True,
1362 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1363 'view_counts_everywhere_api_enabled': True,
1364 'longform_notetweets_consumption_enabled': True,
1365 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1366 'tweet_awards_web_tipping_enabled': False,
1367 'freedom_of_speech_not_reach_fetch_enabled': True,
1368 'standardized_nudges_misinfo': True,
1369 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1370 'longform_notetweets_rich_text_read_enabled': True,
1371 'longform_notetweets_inline_media_enabled': True,
1372 'responsive_web_graphql_exclude_directive_enabled': True,
1373 'verified_phone_label_enabled': False,
1374 'responsive_web_media_download_video_enabled': False,
1375 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1376 'responsive_web_graphql_timeline_navigation_enabled': True,
1377 'responsive_web_enhance_cards_enabled': False
1378 },
1379 'fieldToggles': {
1380 'withArticleRichContentState': False
1381 }
7a26ce26
SS
1382 }
1383
116c2684 1384 def _call_syndication_api(self, twid):
1385 self.report_warning(
1386 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1387 status = self._download_json(
1388 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1389 headers={'User-Agent': 'Googlebot'}, query={
1390 'id': twid,
1391 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1392 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
a006ce2b 1393 })
116c2684 1394 if not status:
1395 raise ExtractorError('Syndication endpoint returned empty JSON response')
1396 # Transform the result so its structure matches that of legacy/graphql
1397 media = []
1398 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1399 detail['id_str'] = traverse_obj(detail, (
1400 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1401 media.append(detail)
1402 status['extended_entities'] = {'media': media}
1403
1404 return status
6014355c 1405
116c2684 1406 def _extract_status(self, twid):
1407 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1408 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1409
1410 try:
1411 if self.is_logged_in or self._selected_api == 'graphql':
1412 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1413 elif self._selected_api == 'legacy':
1414 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1415 'cards_platform': 'Web-12',
1416 'include_cards': 1,
1417 'include_reply_count': 1,
1418 'include_user_entities': 0,
1419 'tweet_mode': 'extended',
a006ce2b 1420 })
116c2684 1421 except ExtractorError as e:
1422 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1423 raise
1424 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1425 status = self._call_syndication_api(twid)
6014355c 1426
116c2684 1427 if self._selected_api == 'syndication':
1428 status = self._call_syndication_api(twid)
a006ce2b 1429
1430 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
6014355c 1431
1432 def _real_extract(self, url):
1433 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1434 status = self._extract_status(twid)
575036b4 1435
92315c03 1436 title = description = traverse_obj(
1437 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
575036b4 1438 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 1439 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
1440 user = status.get('user') or {}
1441 uploader = user.get('name')
1442 if uploader:
7a26ce26 1443 title = f'{uploader} - {title}'
18ca61c5
RA
1444 uploader_id = user.get('screen_name')
1445
cf5881fc 1446 info = {
18ca61c5
RA
1447 'id': twid,
1448 'title': title,
1449 'description': description,
1450 'uploader': uploader,
1451 'timestamp': unified_timestamp(status.get('created_at')),
55f18333 1452 'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
18ca61c5 1453 'uploader_id': uploader_id,
a70635b8 1454 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
1455 'like_count': int_or_none(status.get('favorite_count')),
1456 'repost_count': int_or_none(status.get('retweet_count')),
1457 'comment_count': int_or_none(status.get('reply_count')),
1458 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 1459 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 1460 }
cf5881fc 1461
30a074c2 1462 def extract_from_video_info(media):
a006ce2b 1463 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
13b2ae29 1464 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
1465
1466 formats = []
4bed4363 1467 subtitles = {}
92315c03 1468 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
4bed4363
F
1469 fmts, subs = self._extract_variant_formats(variant, twid)
1470 subtitles = self._merge_subtitles(subtitles, subs)
1471 formats.extend(fmts)
18ca61c5
RA
1472
1473 thumbnails = []
1474 media_url = media.get('media_url_https') or media.get('media_url')
1475 if media_url:
1476 def add_thumbnail(name, size):
1477 thumbnails.append({
1478 'id': name,
1479 'url': update_url_query(media_url, {'name': name}),
1480 'width': int_or_none(size.get('w') or size.get('width')),
1481 'height': int_or_none(size.get('h') or size.get('height')),
1482 })
1483 for name, size in media.get('sizes', {}).items():
1484 add_thumbnail(name, size)
1485 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1486
13b2ae29 1487 return {
b03fa783 1488 'id': media_id,
18ca61c5 1489 'formats': formats,
4bed4363 1490 'subtitles': subtitles,
18ca61c5 1491 'thumbnails': thumbnails,
1c54a98e 1492 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
92315c03 1493 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
e7d22348 1494 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1495 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
13b2ae29 1496 }
30a074c2 1497
13b2ae29
SS
1498 def extract_from_card_info(card):
1499 if not card:
1500 return
1501
1502 self.write_debug(f'Extracting from card info: {card.get("url")}')
1503 binding_values = card['binding_values']
1504
1505 def get_binding_value(k):
1506 o = binding_values.get(k) or {}
1507 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1508
1509 card_name = card['name'].split(':')[-1]
1510 if card_name == 'player':
7a26ce26 1511 yield {
13b2ae29
SS
1512 '_type': 'url',
1513 'url': get_binding_value('player_url'),
1514 }
1515 elif card_name == 'periscope_broadcast':
7a26ce26 1516 yield {
13b2ae29
SS
1517 '_type': 'url',
1518 'url': get_binding_value('url') or get_binding_value('player_url'),
1519 'ie_key': PeriscopeIE.ie_key(),
1520 }
1521 elif card_name == 'broadcast':
7a26ce26 1522 yield {
13b2ae29
SS
1523 '_type': 'url',
1524 'url': get_binding_value('broadcast_url'),
1525 'ie_key': TwitterBroadcastIE.ie_key(),
1526 }
7a26ce26
SS
1527 elif card_name == 'audiospace':
1528 yield {
1529 '_type': 'url',
1530 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1531 'ie_key': TwitterSpacesIE.ie_key(),
1532 }
13b2ae29 1533 elif card_name == 'summary':
7a26ce26 1534 yield {
18ca61c5 1535 '_type': 'url',
13b2ae29
SS
1536 'url': get_binding_value('card_url'),
1537 }
1538 elif card_name == 'unified_card':
7a26ce26
SS
1539 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1540 yield from map(extract_from_video_info, traverse_obj(
1541 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1542 # amplify, promo_video_website, promo_video_convo, appplayer,
1543 # video_direct_message, poll2choice_video, poll3choice_video,
1544 # poll4choice_video, ...
1545 else:
1546 is_amplify = card_name == 'amplify'
1547 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1548 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1549 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1550
1551 thumbnails = []
1552 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1553 image = get_binding_value('player_image' + suffix) or {}
1554 image_url = image.get('url')
1555 if not image_url or '/player-placeholder' in image_url:
1556 continue
1557 thumbnails.append({
1558 'id': suffix[1:] if suffix else 'medium',
1559 'url': image_url,
1560 'width': int_or_none(image.get('width')),
1561 'height': int_or_none(image.get('height')),
1562 })
1563
7a26ce26 1564 yield {
13b2ae29
SS
1565 'formats': formats,
1566 'subtitles': subtitles,
1567 'thumbnails': thumbnails,
1568 'duration': int_or_none(get_binding_value(
1569 'content_duration_seconds')),
1570 }
1571
b6795fd3 1572 videos = traverse_obj(status, (
b03fa783 1573 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1574
b6795fd3
SS
1575 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1576 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1577 else:
92315c03 1578 desired_obj = traverse_obj(status, (
1579 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
b6795fd3
SS
1580 if not desired_obj:
1581 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1582 elif desired_obj.get('type') != 'video':
1583 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1584
1585 # Restore original archive id and video index in title
1586 for index, entry in enumerate(videos, 1):
1587 if entry.get('id') != desired_obj.get('id'):
1588 continue
1589 if index == 1:
1590 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1591 if len(videos) != 1:
1592 info['title'] += f' #{index}'
1593 break
1594
1595 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1596
1597 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1598 if not entries:
1599 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1600 if not expanded_url or expanded_url == url:
147e62fc 1601 self.raise_no_formats('No video could be found in this tweet', expected=True)
1602 return info
13b2ae29
SS
1603
1604 return self.url_result(expanded_url, display_id=twid, **info)
1605
1606 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1607
1608 if len(entries) == 1:
1609 return entries[0]
1610
1611 for index, entry in enumerate(entries, 1):
1612 entry['title'] += f' #{index}'
1613
1614 return self.playlist_result(entries, **info)
445d72b8
YCH
1615
1616
1617class TwitterAmplifyIE(TwitterBaseIE):
1618 IE_NAME = 'twitter:amplify'
25042f73 1619 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1620
1621 _TEST = {
1622 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1623 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1624 'info_dict': {
1625 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1626 'ext': 'mp4',
1627 'title': 'Twitter Video',
bdbf4ba4 1628 'thumbnail': 're:^https?://.*',
445d72b8 1629 },
7a26ce26 1630 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1631 }
1632
1633 def _real_extract(self, url):
1634 video_id = self._match_id(url)
1635 webpage = self._download_webpage(url, video_id)
1636
1637 vmap_url = self._html_search_meta(
1638 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1639 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1640
bdbf4ba4
YCH
1641 thumbnails = []
1642 thumbnail = self._html_search_meta(
1643 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1644
1645 def _find_dimension(target):
1646 w = int_or_none(self._html_search_meta(
1647 'twitter:%s:width' % target, webpage, fatal=False))
1648 h = int_or_none(self._html_search_meta(
1649 'twitter:%s:height' % target, webpage, fatal=False))
1650 return w, h
1651
1652 if thumbnail:
1653 thumbnail_w, thumbnail_h = _find_dimension('image')
1654 thumbnails.append({
1655 'url': thumbnail,
1656 'width': thumbnail_w,
1657 'height': thumbnail_h,
1658 })
1659
1660 video_w, video_h = _find_dimension('player')
9be31e77 1661 formats[0].update({
bdbf4ba4
YCH
1662 'width': video_w,
1663 'height': video_h,
9be31e77 1664 })
bdbf4ba4 1665
445d72b8
YCH
1666 return {
1667 'id': video_id,
1668 'title': 'Twitter Video',
bdbf4ba4
YCH
1669 'formats': formats,
1670 'thumbnails': thumbnails,
445d72b8 1671 }
18ca61c5
RA
1672
1673
1674class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1675 IE_NAME = 'twitter:broadcast'
1676 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1677
7d337ca9 1678 _TESTS = [{
7b0b53ea
S
1679 # untitled Periscope video
1680 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1681 'info_dict': {
1682 'id': '1yNGaQLWpejGj',
1683 'ext': 'mp4',
1684 'title': 'Andrea May Sahouri - Periscope Broadcast',
1685 'uploader': 'Andrea May Sahouri',
7d337ca9
H
1686 'uploader_id': 'andreamsahouri',
1687 'uploader_url': 'https://twitter.com/andreamsahouri',
1688 'timestamp': 1590973638,
1689 'upload_date': '20200601',
7a26ce26
SS
1690 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1691 'view_count': int,
7b0b53ea 1692 },
7d337ca9
H
1693 }, {
1694 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1695 'info_dict': {
1696 'id': '1ZkKzeyrPbaxv',
1697 'ext': 'mp4',
1698 'title': 'Starship | SN10 | High-Altitude Flight Test',
1699 'uploader': 'SpaceX',
1700 'uploader_id': 'SpaceX',
1701 'uploader_url': 'https://twitter.com/SpaceX',
1702 'timestamp': 1614812942,
1703 'upload_date': '20210303',
1704 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1705 'view_count': int,
1706 },
1707 }, {
1708 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1709 'info_dict': {
1710 'id': '1OyKAVQrgzwGb',
1711 'ext': 'mp4',
1712 'title': 'Starship Flight Test',
1713 'uploader': 'SpaceX',
1714 'uploader_id': 'SpaceX',
1715 'uploader_url': 'https://twitter.com/SpaceX',
1716 'timestamp': 1681993964,
1717 'upload_date': '20230420',
1718 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1719 'view_count': int,
1720 },
1721 }]
7b0b53ea 1722
18ca61c5
RA
1723 def _real_extract(self, url):
1724 broadcast_id = self._match_id(url)
1725 broadcast = self._call_api(
1726 'broadcasts/show.json', broadcast_id,
1727 {'ids': broadcast_id})['broadcasts'][broadcast_id]
a006ce2b 1728 if not broadcast:
1729 raise ExtractorError('Broadcast no longer exists', expected=True)
18ca61c5 1730 info = self._parse_broadcast_data(broadcast, broadcast_id)
7d337ca9
H
1731 info['title'] = broadcast.get('status') or info.get('title')
1732 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1733 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
f6e97090 1734 if info['live_status'] == 'is_upcoming':
1735 return info
1736
18ca61c5
RA
1737 media_key = broadcast['media_key']
1738 source = self._call_api(
7a26ce26 1739 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1740 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1741 if '/live_video_stream/geoblocked/' in m3u8_url:
1742 self.raise_geo_restricted()
1743 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1744 m3u8_url).query).get('type', [None])[0]
1745 state, width, height = self._extract_common_format_info(broadcast)
1746 info['formats'] = self._extract_pscp_m3u8_formats(
1747 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1748 return info
86b868c6
U
1749
1750
7a26ce26
SS
1751class TwitterSpacesIE(TwitterBaseIE):
1752 IE_NAME = 'twitter:spaces'
1753 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1754
1755 _TESTS = [{
1756 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1757 'info_dict': {
1758 'id': '1RDxlgyvNXzJL',
1759 'ext': 'm4a',
1760 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1761 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1762 'uploader': r're:Lucio Di Gaetano.*?',
1763 'uploader_id': 'luciodigaetano',
1764 'live_status': 'was_live',
1cffd621 1765 'timestamp': 1659877956,
1766 'upload_date': '20220807',
1767 'release_timestamp': 1659904215,
1768 'release_date': '20220807',
7a26ce26
SS
1769 },
1770 'params': {'skip_download': 'm3u8'},
613dbce1 1771 }, {
1772 # post_live/TimedOut but downloadable
1773 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1774 'info_dict': {
1775 'id': '1vAxRAVQWONJl',
1776 'ext': 'm4a',
1777 'title': 'Framing Up FinOps: Billing Tools',
1778 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1779 'uploader': 'Google Cloud',
1780 'uploader_id': 'googlecloud',
1781 'live_status': 'post_live',
1782 'timestamp': 1681409554,
1783 'upload_date': '20230413',
1784 'release_timestamp': 1681839000,
1785 'release_date': '20230418',
1786 },
1787 'params': {'skip_download': 'm3u8'},
1788 }, {
1789 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1790 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1791 'info_dict': {
1792 'id': '1eaKbrQbjoRKX',
1793 'ext': 'm4a',
1794 'title': 'あ',
1795 'description': 'Twitter Space participated by nobody yet',
1796 'uploader': '息根とめる🔪Twitchで復活',
1797 'uploader_id': 'tomeru_ikinone',
1798 'live_status': 'was_live',
1799 'timestamp': 1685617198,
1800 'upload_date': '20230601',
1801 },
1802 'params': {'skip_download': 'm3u8'},
7a26ce26
SS
1803 }]
1804
1805 SPACE_STATUS = {
1806 'notstarted': 'is_upcoming',
1807 'ended': 'was_live',
1808 'running': 'is_live',
1809 'timedout': 'post_live',
1810 }
1811
1812 def _build_graphql_query(self, space_id):
1813 return {
1814 'variables': {
1815 'id': space_id,
1816 'isMetatagsQuery': True,
1817 'withDownvotePerspective': False,
1818 'withReactionsMetadata': False,
1819 'withReactionsPerspective': False,
1820 'withReplays': True,
1821 'withSuperFollowsUserFields': True,
1822 'withSuperFollowsTweetFields': True,
1823 },
1824 'features': {
1825 'dont_mention_me_view_api_enabled': True,
1826 'interactive_text_enabled': True,
1827 'responsive_web_edit_tweet_api_enabled': True,
1828 'responsive_web_enhance_cards_enabled': True,
1829 'responsive_web_uc_gql_enabled': True,
1830 'spaces_2022_h2_clipping': True,
1831 'spaces_2022_h2_spaces_communities': False,
1832 'standardized_nudges_misinfo': True,
1833 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1834 'vibe_api_enabled': True,
1835 },
1836 }
1837
1838 def _real_extract(self, url):
1839 space_id = self._match_id(url)
92315c03 1840 if not self.is_logged_in:
1841 self.raise_login_required('Twitter Spaces require authentication')
7a26ce26
SS
1842 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1843 if not space_data:
1844 raise ExtractorError('Twitter Space not found', expected=True)
1845
1846 metadata = space_data['metadata']
1847 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1cffd621 1848 is_live = live_status == 'is_live'
7a26ce26
SS
1849
1850 formats = []
c6ef5537 1851 headers = {'Referer': 'https://twitter.com/'}
7a26ce26
SS
1852 if live_status == 'is_upcoming':
1853 self.raise_no_formats('Twitter Space not started yet', expected=True)
1cffd621 1854 elif not is_live and not metadata.get('is_space_available_for_replay'):
1855 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1856 elif metadata.get('media_key'):
1857 source = traverse_obj(
1858 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1859 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
613dbce1 1860 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1861 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
c6ef5537 1862 headers=headers, fatal=False) if source else []
7a26ce26
SS
1863 for fmt in formats:
1864 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1cffd621 1865 if not is_live:
1866 fmt['container'] = 'm4a_dash'
7a26ce26
SS
1867
1868 participants = ', '.join(traverse_obj(
1869 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1cffd621 1870
1871 if not formats and live_status == 'post_live':
1872 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1873
7a26ce26
SS
1874 return {
1875 'id': space_id,
1876 'title': metadata.get('title'),
1877 'description': f'Twitter Space participated by {participants}',
1878 'uploader': traverse_obj(
1879 metadata, ('creator_results', 'result', 'legacy', 'name')),
1880 'uploader_id': traverse_obj(
1881 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1882 'live_status': live_status,
1c16d9df
C
1883 'release_timestamp': try_call(
1884 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1cffd621 1885 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
7a26ce26 1886 'formats': formats,
c6ef5537 1887 'http_headers': headers,
7a26ce26
SS
1888 }
1889
1890
86b868c6
U
1891class TwitterShortenerIE(TwitterBaseIE):
1892 IE_NAME = 'twitter:shortener'
b634ba74 1893 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
a537ab1a 1894 _BASE_URL = 'https://t.co/'
86b868c6
U
1895
1896 def _real_extract(self, url):
5ad28e7f 1897 mobj = self._match_valid_url(url)
a537ab1a
U
1898 eid, id = mobj.group('eid', 'id')
1899 if eid:
1900 id = eid
1901 url = self._BASE_URL + id
3d2623a8 1902 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
a537ab1a
U
1903 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1904 if new_url.startswith(__UNSAFE_LINK):
1905 new_url = new_url.replace(__UNSAFE_LINK, "")
9e20a9c4 1906 return self.url_result(new_url)