]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[ie/twitter] Extract bitrate for HLS audio formats (#9257)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
7a26ce26 1import json
a006ce2b 2import random
23e7cba8
S
3import re
4
5from .common import InfoExtractor
13b2ae29 6from .periscope import PeriscopeBaseIE, PeriscopeIE
a006ce2b 7from ..compat import functools # isort: split
18ca61c5 8from ..compat import (
18ca61c5
RA
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12)
116c2684 13from ..networking.exceptions import HTTPError
23e7cba8 14from ..utils import (
2edfd745 15 ExtractorError,
13b2ae29 16 dict_get,
92315c03 17 filter_dict,
23e7cba8 18 float_or_none,
13b2ae29 19 format_field,
cf5881fc 20 int_or_none,
13b2ae29 21 make_archive_id,
147e62fc 22 remove_end,
13b2ae29
SS
23 str_or_none,
24 strip_or_none,
f1150b9e 25 traverse_obj,
7a26ce26 26 try_call,
2edfd745 27 try_get,
18ca61c5
RA
28 unified_timestamp,
29 update_url_query,
41d1cca3 30 url_or_none,
2edfd745 31 xpath_text,
23e7cba8
S
32)
33
34
445d72b8 35class TwitterBaseIE(InfoExtractor):
d1795f4a 36 _NETRC_MACHINE = 'twitter'
18ca61c5 37 _API_BASE = 'https://api.twitter.com/1.1/'
7a26ce26 38 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
82fb2357 39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
92315c03 40 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
41 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
d1795f4a 42 _flow_token = None
43
44 _LOGIN_INIT_DATA = json.dumps({
45 'input_flow_data': {
46 'flow_context': {
47 'debug_overrides': {},
48 'start_location': {
49 'location': 'unknown'
50 }
51 }
52 },
53 'subtask_versions': {
54 'action_list': 2,
55 'alert_dialog': 1,
56 'app_download_cta': 1,
57 'check_logged_in_account': 1,
58 'choice_selection': 3,
59 'contacts_live_sync_permission_prompt': 0,
60 'cta': 7,
61 'email_verification': 2,
62 'end_flow': 1,
63 'enter_date': 1,
64 'enter_email': 2,
65 'enter_password': 5,
66 'enter_phone': 2,
67 'enter_recaptcha': 1,
68 'enter_text': 5,
69 'enter_username': 2,
70 'generic_urt': 3,
71 'in_app_notification': 1,
72 'interest_picker': 3,
73 'js_instrumentation': 1,
74 'menu_dialog': 1,
75 'notifications_permission_prompt': 2,
76 'open_account': 2,
77 'open_home_timeline': 1,
78 'open_link': 1,
79 'phone_verification': 4,
80 'privacy_options': 1,
81 'security_key': 3,
82 'select_avatar': 4,
83 'select_banner': 2,
84 'settings_list': 7,
85 'show_code': 1,
86 'sign_up': 2,
87 'sign_up_review': 4,
88 'tweet_selection_urt': 1,
89 'update_users': 1,
90 'upload_media': 1,
91 'user_recommendations_list': 4,
92 'user_recommendations_urt': 1,
93 'wait_spinner': 3,
94 'web_modal': 1
95 }
96 }, separators=(',', ':')).encode()
18ca61c5
RA
97
98 def _extract_variant_formats(self, variant, video_id):
99 variant_url = variant.get('url')
100 if not variant_url:
4bed4363 101 return [], {}
18ca61c5 102 elif '.m3u8' in variant_url:
28e53d60 103 fmts, subs = self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
104 variant_url, video_id, 'mp4', 'm3u8_native',
105 m3u8_id='hls', fatal=False)
28e53d60 106 for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
107 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
108 f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
109 return fmts, subs
18ca61c5
RA
110 else:
111 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
112 f = {
113 'url': variant_url,
114 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
115 'tbr': tbr,
116 }
117 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 118 return [f], {}
18ca61c5 119
9be31e77 120 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 121 vmap_url = url_or_none(vmap_url)
122 if not vmap_url:
f1150b9e 123 return [], {}
445d72b8 124 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 125 formats = []
4bed4363 126 subtitles = {}
18ca61c5
RA
127 urls = []
128 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
129 video_variant.attrib['url'] = compat_urllib_parse_unquote(
130 video_variant.attrib['url'])
131 urls.append(video_variant.attrib['url'])
4bed4363
F
132 fmts, subs = self._extract_variant_formats(
133 video_variant.attrib, video_id)
134 formats.extend(fmts)
135 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
136 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
137 if video_url not in urls:
4bed4363
F
138 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
139 formats.extend(fmts)
140 subtitles = self._merge_subtitles(subtitles, subs)
141 return formats, subtitles
445d72b8 142
2edfd745
YCH
143 @staticmethod
144 def _search_dimensions_in_video_url(a_format, video_url):
145 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
146 if m:
147 a_format.update({
148 'width': int(m.group('width')),
149 'height': int(m.group('height')),
150 })
151
d1795f4a 152 @property
7a26ce26
SS
153 def is_logged_in(self):
154 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
155
a006ce2b 156 @functools.cached_property
157 def _selected_api(self):
158 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
159
92315c03 160 def _fetch_guest_token(self, display_id):
161 guest_token = traverse_obj(self._download_json(
162 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
a006ce2b 163 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
92315c03 164 ('guest_token', {str}))
165 if not guest_token:
b03fa783 166 raise ExtractorError('Could not retrieve guest token')
92315c03 167 return guest_token
b03fa783 168
92315c03 169 def _set_base_headers(self, legacy=False):
170 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
171 return filter_dict({
172 'Authorization': f'Bearer {bearer_token}',
173 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
174 })
d1795f4a 175
176 def _call_login_api(self, note, headers, query={}, data=None):
177 response = self._download_json(
178 f'{self._API_BASE}onboarding/task.json', None, note,
179 headers=headers, query=query, data=data, expected_status=400)
180 error = traverse_obj(response, ('errors', 0, 'message', {str}))
181 if error:
182 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
183 elif traverse_obj(response, 'status') != 'success':
184 raise ExtractorError('Login was unsuccessful')
185
186 subtask = traverse_obj(
187 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
188 if not subtask:
189 raise ExtractorError('Twitter API did not return next login subtask')
190
191 self._flow_token = response['flow_token']
7a26ce26 192
d1795f4a 193 return subtask
194
195 def _perform_login(self, username, password):
196 if self.is_logged_in:
197 return
198
92315c03 199 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
200 guest_token = self._search_regex(
201 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
202 headers = {
203 **self._set_base_headers(),
d1795f4a 204 'content-type': 'application/json',
92315c03 205 'x-guest-token': guest_token,
d1795f4a 206 'x-twitter-client-language': 'en',
207 'x-twitter-active-user': 'yes',
208 'Referer': 'https://twitter.com/',
209 'Origin': 'https://twitter.com',
92315c03 210 }
d1795f4a 211
212 def build_login_json(*subtask_inputs):
213 return json.dumps({
214 'flow_token': self._flow_token,
215 'subtask_inputs': subtask_inputs
216 }, separators=(',', ':')).encode()
217
218 def input_dict(subtask_id, text):
219 return {
220 'subtask_id': subtask_id,
221 'enter_text': {
222 'text': text,
223 'link': 'next_link'
224 }
225 }
7a26ce26 226
d1795f4a 227 next_subtask = self._call_login_api(
228 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
229
230 while not self.is_logged_in:
231 if next_subtask == 'LoginJsInstrumentationSubtask':
232 next_subtask = self._call_login_api(
233 'Submitting JS instrumentation response', headers, data=build_login_json({
234 'subtask_id': next_subtask,
235 'js_instrumentation': {
236 'response': '{}',
237 'link': 'next_link'
238 }
239 }))
240
241 elif next_subtask == 'LoginEnterUserIdentifierSSO':
242 next_subtask = self._call_login_api(
243 'Submitting username', headers, data=build_login_json({
244 'subtask_id': next_subtask,
245 'settings_list': {
246 'setting_responses': [{
247 'key': 'user_identifier',
248 'response_data': {
249 'text_data': {
250 'result': username
251 }
252 }
253 }],
254 'link': 'next_link'
255 }
256 }))
257
258 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
259 next_subtask = self._call_login_api(
260 'Submitting alternate identifier', headers,
261 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
262 'one of username, phone number or email that was not used as --username'))))
263
264 elif next_subtask == 'LoginEnterPassword':
265 next_subtask = self._call_login_api(
266 'Submitting password', headers, data=build_login_json({
267 'subtask_id': next_subtask,
268 'enter_password': {
269 'password': password,
270 'link': 'next_link'
271 }
272 }))
273
274 elif next_subtask == 'AccountDuplicationCheck':
275 next_subtask = self._call_login_api(
276 'Submitting account duplication check', headers, data=build_login_json({
277 'subtask_id': next_subtask,
278 'check_logged_in_account': {
279 'link': 'AccountDuplicationCheck_false'
280 }
281 }))
282
283 elif next_subtask == 'LoginTwoFactorAuthChallenge':
284 next_subtask = self._call_login_api(
285 'Submitting 2FA token', headers, data=build_login_json(input_dict(
286 next_subtask, self._get_tfa_info('two-factor authentication token'))))
287
288 elif next_subtask == 'LoginAcid':
289 next_subtask = self._call_login_api(
290 'Submitting confirmation code', headers, data=build_login_json(input_dict(
291 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
292
6014355c 293 elif next_subtask == 'ArkoseLogin':
294 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
295
296 elif next_subtask == 'DenyLoginSubtask':
297 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
298
d1795f4a 299 elif next_subtask == 'LoginSuccessSubtask':
300 raise ExtractorError('Twitter API did not grant auth token cookie')
301
302 else:
303 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
304
305 self.report_login()
306
307 def _call_api(self, path, video_id, query={}, graphql=False):
a006ce2b 308 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
92315c03 309 headers.update({
310 'x-twitter-auth-type': 'OAuth2Session',
311 'x-twitter-client-language': 'en',
312 'x-twitter-active-user': 'yes',
313 } if self.is_logged_in else {
314 'x-guest-token': self._fetch_guest_token(video_id)
315 })
316 allowed_status = {400, 401, 403, 404} if graphql else {403}
317 result = self._download_json(
318 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
319 video_id, headers=headers, query=query, expected_status=allowed_status,
320 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
b03fa783 321
92315c03 322 if result.get('errors'):
323 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
6014355c 324 if errors and 'not authorized' in errors:
325 self.raise_login_required(remove_end(errors, '.'))
326 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
b03fa783 327
92315c03 328 return result
7a26ce26
SS
329
330 def _build_graphql_query(self, media_id):
331 raise NotImplementedError('Method must be implemented to support GraphQL')
332
333 def _call_graphql_api(self, endpoint, media_id):
334 data = self._build_graphql_query(media_id)
335 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
336 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
337
338
339class TwitterCardIE(InfoExtractor):
014e8803 340 IE_NAME = 'twitter:card'
18ca61c5 341 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 342 _TESTS = [
343 {
344 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 345 # MD5 checksums are different in different places
c3dea3f8 346 'info_dict': {
7a26ce26 347 'id': '560070131976392705',
c3dea3f8 348 'ext': 'mp4',
18ca61c5
RA
349 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
350 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
351 'uploader': 'Twitter',
352 'uploader_id': 'Twitter',
353 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 354 'duration': 30.033,
18ca61c5
RA
355 'timestamp': 1422366112,
356 'upload_date': '20150127',
7a26ce26
SS
357 'age_limit': 0,
358 'comment_count': int,
359 'tags': [],
360 'repost_count': int,
361 'like_count': int,
362 'display_id': '560070183650213889',
363 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 364 },
23e7cba8 365 },
c3dea3f8 366 {
367 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 368 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 369 'info_dict': {
370 'id': '623160978427936768',
371 'ext': 'mp4',
18ca61c5
RA
372 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
373 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
374 'uploader': 'NASA',
375 'uploader_id': 'NASA',
376 'timestamp': 1437408129,
377 'upload_date': '20150720',
7a26ce26
SS
378 'uploader_url': 'https://twitter.com/NASA',
379 'age_limit': 0,
380 'comment_count': int,
381 'like_count': int,
382 'repost_count': int,
383 'tags': ['PlutoFlyby'],
c3dea3f8 384 },
7a26ce26 385 'params': {'format': '[protocol=https]'}
4a7b7903
YCH
386 },
387 {
388 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 389 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
390 'info_dict': {
391 'id': 'dq4Oj5quskI',
392 'ext': 'mp4',
393 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 394 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 395 'upload_date': '20111013',
18ca61c5 396 'uploader': 'OMG! UBUNTU!',
4a7b7903 397 'uploader_id': 'omgubuntu',
7a26ce26
SS
398 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
399 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
400 'channel_follower_count': int,
401 'chapters': 'count:8',
402 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
403 'duration': 138,
404 'categories': ['Film & Animation'],
405 'age_limit': 0,
406 'comment_count': int,
407 'availability': 'public',
408 'like_count': int,
409 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
410 'view_count': int,
411 'tags': 'count:12',
412 'channel': 'OMG! UBUNTU!',
413 'playable_in_embed': True,
4a7b7903 414 },
31752f76 415 'add_ie': ['Youtube'],
5f1b2aea
YCH
416 },
417 {
418 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
419 'info_dict': {
420 'id': 'iBb2x00UVlv',
421 'ext': 'mp4',
422 'upload_date': '20151113',
423 'uploader_id': '1189339351084113920',
acb6e97e
YCH
424 'uploader': 'ArsenalTerje',
425 'title': 'Vine by ArsenalTerje',
e8f20ffa 426 'timestamp': 1447451307,
7a26ce26
SS
427 'alt_title': 'Vine by ArsenalTerje',
428 'comment_count': int,
429 'like_count': int,
430 'thumbnail': r're:^https?://[^?#]+\.jpg',
431 'view_count': int,
432 'repost_count': int,
5f1b2aea
YCH
433 },
434 'add_ie': ['Vine'],
7a26ce26
SS
435 'params': {'skip_download': 'm3u8'},
436 },
437 {
0ae937a7 438 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 439 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
440 'info_dict': {
441 'id': '705235433198714880',
442 'ext': 'mp4',
18ca61c5
RA
443 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
444 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
445 'uploader': 'Brent Yarina',
446 'uploader_id': 'BTNBrentYarina',
447 'timestamp': 1456976204,
448 'upload_date': '20160303',
0ae937a7 449 },
18ca61c5 450 'skip': 'This content is no longer available.',
7a26ce26
SS
451 },
452 {
748a462f
S
453 'url': 'https://twitter.com/i/videos/752274308186120192',
454 'only_matching': True,
0ae937a7 455 },
c3dea3f8 456 ]
23e7cba8
S
457
458 def _real_extract(self, url):
18ca61c5
RA
459 status_id = self._match_id(url)
460 return self.url_result(
461 'https://twitter.com/statuses/' + status_id,
462 TwitterIE.ie_key(), status_id)
c8398a9b 463
03879ff0 464
18ca61c5 465class TwitterIE(TwitterBaseIE):
014e8803 466 IE_NAME = 'twitter'
b6795fd3 467 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 468
cf5881fc 469 _TESTS = [{
48aae2d2 470 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 471 'info_dict': {
13b2ae29
SS
472 'id': '643211870443208704',
473 'display_id': '643211948184596480',
f57f84f6 474 'ext': 'mp4',
575036b4 475 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 476 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 477 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
48aae2d2
YCH
478 'uploader': 'FREE THE NIPPLE',
479 'uploader_id': 'freethenipple',
3b65a6fb 480 'duration': 12.922,
18ca61c5
RA
481 'timestamp': 1442188653,
482 'upload_date': '20150913',
13b2ae29 483 'uploader_url': 'https://twitter.com/freethenipple',
b03fa783 484 'comment_count': int,
485 'repost_count': int,
13b2ae29
SS
486 'like_count': int,
487 'tags': [],
488 'age_limit': 18,
1c54a98e 489 '_old_archive_ids': ['twitter 643211948184596480'],
f57f84f6 490 },
cf5881fc
YCH
491 }, {
492 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
493 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
494 'info_dict': {
495 'id': '657991469417025536',
496 'ext': 'mp4',
497 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
498 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 499 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
500 'uploader': 'Gifs',
501 'uploader_id': 'giphz',
502 },
7efc1c2b 503 'expected_warnings': ['height', 'width'],
fc0a45fa 504 'skip': 'Account suspended',
b703ebee
JMF
505 }, {
506 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
507 'info_dict': {
508 'id': '665052190608723968',
13b2ae29 509 'display_id': '665052190608723968',
b703ebee 510 'ext': 'mp4',
b6795fd3 511 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 512 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
b703ebee 513 'uploader_id': 'starwars',
7a26ce26 514 'uploader': r're:Star Wars.*',
18ca61c5
RA
515 'timestamp': 1447395772,
516 'upload_date': '20151113',
13b2ae29 517 'uploader_url': 'https://twitter.com/starwars',
b03fa783 518 'comment_count': int,
519 'repost_count': int,
13b2ae29
SS
520 'like_count': int,
521 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
522 'age_limit': 0,
1c54a98e 523 '_old_archive_ids': ['twitter 665052190608723968'],
b703ebee 524 },
0ae937a7
YCH
525 }, {
526 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
527 'info_dict': {
528 'id': '705235433198714880',
529 'ext': 'mp4',
18ca61c5
RA
530 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
531 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
532 'uploader_id': 'BTNBrentYarina',
533 'uploader': 'Brent Yarina',
18ca61c5
RA
534 'timestamp': 1456976204,
535 'upload_date': '20160303',
13b2ae29
SS
536 'uploader_url': 'https://twitter.com/BTNBrentYarina',
537 'comment_count': int,
538 'repost_count': int,
539 'like_count': int,
540 'tags': [],
541 'age_limit': 0,
0ae937a7
YCH
542 },
543 'params': {
544 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
545 # Test case of TwitterCardIE
546 'skip_download': True,
547 },
352e7d98 548 'skip': 'Dead external link',
03879ff0
YCH
549 }, {
550 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 551 'info_dict': {
13b2ae29
SS
552 'id': '700207414000242688',
553 'display_id': '700207533655363584',
03879ff0 554 'ext': 'mp4',
13b2ae29 555 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 556 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 557 'thumbnail': r're:^https?://.*\.jpg',
13b2ae29
SS
558 'uploader': 'jaydin donte geer',
559 'uploader_id': 'jaydingeer',
3b65a6fb 560 'duration': 30.0,
18ca61c5
RA
561 'timestamp': 1455777459,
562 'upload_date': '20160218',
13b2ae29 563 'uploader_url': 'https://twitter.com/jaydingeer',
b03fa783 564 'comment_count': int,
565 'repost_count': int,
13b2ae29
SS
566 'like_count': int,
567 'tags': ['Damndaniel'],
568 'age_limit': 0,
1c54a98e 569 '_old_archive_ids': ['twitter 700207533655363584'],
03879ff0 570 },
395fd4b0
YCH
571 }, {
572 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
573 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
574 'info_dict': {
575 'id': 'MIOxnrUteUd',
576 'ext': 'mp4',
18ca61c5
RA
577 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
578 'uploader': 'TAKUMA',
579 'uploader_id': '1004126642786242560',
3615bfe1 580 'timestamp': 1402826626,
395fd4b0 581 'upload_date': '20140615',
13b2ae29
SS
582 'thumbnail': r're:^https?://.*\.jpg',
583 'alt_title': 'Vine by TAKUMA',
584 'comment_count': int,
585 'repost_count': int,
586 'like_count': int,
587 'view_count': int,
395fd4b0
YCH
588 },
589 'add_ie': ['Vine'],
36b7d9db
YCH
590 }, {
591 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 592 'info_dict': {
13b2ae29
SS
593 'id': '717462543795523584',
594 'display_id': '719944021058060289',
36b7d9db
YCH
595 'ext': 'mp4',
596 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5
RA
597 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
598 'uploader_id': 'CaptainAmerica',
36b7d9db 599 'uploader': 'Captain America',
3b65a6fb 600 'duration': 3.17,
18ca61c5
RA
601 'timestamp': 1460483005,
602 'upload_date': '20160412',
13b2ae29
SS
603 'uploader_url': 'https://twitter.com/CaptainAmerica',
604 'thumbnail': r're:^https?://.*\.jpg',
b03fa783 605 'comment_count': int,
606 'repost_count': int,
13b2ae29
SS
607 'like_count': int,
608 'tags': [],
609 'age_limit': 0,
1c54a98e 610 '_old_archive_ids': ['twitter 719944021058060289'],
36b7d9db 611 },
f0bc5a86
YCH
612 }, {
613 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
614 'info_dict': {
615 'id': '1zqKVVlkqLaKB',
616 'ext': 'mp4',
18ca61c5 617 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 618 'upload_date': '20160923',
18ca61c5
RA
619 'uploader_id': '1PmKqpJdOJQoY',
620 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 621 'timestamp': 1474613214,
13b2ae29 622 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
623 },
624 'add_ie': ['Periscope'],
1c54a98e 625 'skip': 'Broadcast not found',
2edfd745
YCH
626 }, {
627 # has mp4 formats via mobile API
628 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
629 'info_dict': {
6014355c 630 'id': '852077943283097602',
2edfd745
YCH
631 'ext': 'mp4',
632 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 633 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
2edfd745
YCH
634 'uploader': 'عالم الأخبار',
635 'uploader_id': 'news_al3alm',
3b65a6fb 636 'duration': 277.4,
18ca61c5
RA
637 'timestamp': 1492000653,
638 'upload_date': '20170412',
6014355c 639 'display_id': '852138619213144067',
640 'age_limit': 0,
641 'uploader_url': 'https://twitter.com/news_al3alm',
642 'thumbnail': r're:^https?://.*\.jpg',
643 'tags': [],
644 'repost_count': int,
6014355c 645 'like_count': int,
646 'comment_count': int,
1c54a98e 647 '_old_archive_ids': ['twitter 852138619213144067'],
2edfd745 648 },
5c1452e8
GF
649 }, {
650 'url': 'https://twitter.com/i/web/status/910031516746514432',
651 'info_dict': {
13b2ae29
SS
652 'id': '910030238373089285',
653 'display_id': '910031516746514432',
5c1452e8
GF
654 'ext': 'mp4',
655 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
656 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 657 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
5c1452e8
GF
658 'uploader': 'Préfet de Guadeloupe',
659 'uploader_id': 'Prefet971',
660 'duration': 47.48,
18ca61c5
RA
661 'timestamp': 1505803395,
662 'upload_date': '20170919',
13b2ae29 663 'uploader_url': 'https://twitter.com/Prefet971',
b03fa783 664 'comment_count': int,
665 'repost_count': int,
13b2ae29
SS
666 'like_count': int,
667 'tags': ['Maria'],
668 'age_limit': 0,
1c54a98e 669 '_old_archive_ids': ['twitter 910031516746514432'],
5c1452e8
GF
670 },
671 'params': {
672 'skip_download': True, # requires ffmpeg
673 },
2593725a
S
674 }, {
675 # card via api.twitter.com/1.1/videos/tweet/config
676 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
677 'info_dict': {
13b2ae29
SS
678 'id': '1001551417340022785',
679 'display_id': '1001551623938805763',
2593725a
S
680 'ext': 'mp4',
681 'title': 're:.*?Shep is on a roll today.*?',
682 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 683 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
2593725a
S
684 'uploader': 'Lis Power',
685 'uploader_id': 'LisPower1',
686 'duration': 111.278,
18ca61c5
RA
687 'timestamp': 1527623489,
688 'upload_date': '20180529',
13b2ae29 689 'uploader_url': 'https://twitter.com/LisPower1',
b03fa783 690 'comment_count': int,
691 'repost_count': int,
13b2ae29
SS
692 'like_count': int,
693 'tags': [],
694 'age_limit': 0,
1c54a98e 695 '_old_archive_ids': ['twitter 1001551623938805763'],
2593725a
S
696 },
697 'params': {
698 'skip_download': True, # requires ffmpeg
699 },
b7ef93f0
S
700 }, {
701 'url': 'https://twitter.com/foobar/status/1087791357756956680',
702 'info_dict': {
13b2ae29
SS
703 'id': '1087791272830607360',
704 'display_id': '1087791357756956680',
b7ef93f0 705 'ext': 'mp4',
6014355c 706 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
b7ef93f0 707 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 708 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
6014355c 709 'uploader': 'X',
710 'uploader_id': 'X',
b7ef93f0 711 'duration': 61.567,
18ca61c5
RA
712 'timestamp': 1548184644,
713 'upload_date': '20190122',
6014355c 714 'uploader_url': 'https://twitter.com/X',
b03fa783 715 'comment_count': int,
716 'repost_count': int,
13b2ae29 717 'like_count': int,
b03fa783 718 'view_count': int,
13b2ae29
SS
719 'tags': [],
720 'age_limit': 0,
18ca61c5 721 },
a006ce2b 722 'skip': 'This Tweet is unavailable',
18ca61c5
RA
723 }, {
724 # not available in Periscope
725 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
726 'info_dict': {
727 'id': '1vOGwqejwoWxB',
728 'ext': 'mp4',
729 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
730 'uploader': 'Vivi',
731 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
732 'thumbnail': r're:^https?://.*\.jpg',
733 'tags': ['EduTECH2019'],
734 'view_count': int,
b7ef93f0 735 },
18ca61c5 736 'add_ie': ['TwitterBroadcast'],
a006ce2b 737 'skip': 'Broadcast no longer exists',
30a074c2 738 }, {
739 # unified card
740 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
741 'info_dict': {
13b2ae29
SS
742 'id': '1349774757969989634',
743 'display_id': '1349794411333394432',
30a074c2 744 'ext': 'mp4',
745 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
746 'thumbnail': r're:^https?://.*\.jpg',
747 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
748 'uploader': 'Brooklyn Nets',
749 'uploader_id': 'BrooklynNets',
750 'duration': 324.484,
751 'timestamp': 1610651040,
752 'upload_date': '20210114',
13b2ae29 753 'uploader_url': 'https://twitter.com/BrooklynNets',
b03fa783 754 'comment_count': int,
755 'repost_count': int,
13b2ae29
SS
756 'like_count': int,
757 'tags': [],
758 'age_limit': 0,
1c54a98e 759 '_old_archive_ids': ['twitter 1349794411333394432'],
30a074c2 760 },
761 'params': {
762 'skip_download': True,
763 },
13b2ae29
SS
764 }, {
765 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
766 'info_dict': {
767 'id': '1577855447914409984',
768 'display_id': '1577855540407197696',
769 'ext': 'mp4',
352e7d98 770 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
771 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 772 'upload_date': '20221006',
352e7d98 773 'uploader': 'oshtru',
13b2ae29
SS
774 'uploader_id': 'oshtru',
775 'uploader_url': 'https://twitter.com/oshtru',
776 'thumbnail': r're:^https?://.*\.jpg',
777 'duration': 30.03,
7a26ce26 778 'timestamp': 1665025050,
b03fa783 779 'comment_count': int,
780 'repost_count': int,
13b2ae29
SS
781 'like_count': int,
782 'tags': [],
783 'age_limit': 0,
1c54a98e 784 '_old_archive_ids': ['twitter 1577855540407197696'],
13b2ae29
SS
785 },
786 'params': {'skip_download': True},
787 }, {
788 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
789 'info_dict': {
790 'id': '1577719286659006464',
1c54a98e 791 'title': 'Ultima - Test',
13b2ae29 792 'description': 'Test https://t.co/Y3KEZD7Dad',
1c54a98e 793 'uploader': 'Ultima',
13b2ae29
SS
794 'uploader_id': 'UltimaShadowX',
795 'uploader_url': 'https://twitter.com/UltimaShadowX',
796 'upload_date': '20221005',
7a26ce26 797 'timestamp': 1664992565,
b03fa783 798 'comment_count': int,
799 'repost_count': int,
13b2ae29
SS
800 'like_count': int,
801 'tags': [],
802 'age_limit': 0,
803 },
804 'playlist_count': 4,
805 'params': {'skip_download': True},
7a26ce26
SS
806 }, {
807 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
808 'info_dict': {
809 'id': '1575559336759263233',
810 'display_id': '1575560063510810624',
811 'ext': 'mp4',
812 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
813 'thumbnail': r're:^https?://.*\.jpg',
814 'description': 'md5:95aea692fda36a12081b9629b02daa92',
815 'uploader': 'Max Olson',
816 'uploader_id': 'MesoMax919',
817 'uploader_url': 'https://twitter.com/MesoMax919',
818 'duration': 21.321,
819 'timestamp': 1664477766,
820 'upload_date': '20220929',
b03fa783 821 'comment_count': int,
822 'repost_count': int,
7a26ce26
SS
823 'like_count': int,
824 'tags': ['HurricaneIan'],
825 'age_limit': 0,
1c54a98e 826 '_old_archive_ids': ['twitter 1575560063510810624'],
7a26ce26
SS
827 },
828 }, {
a006ce2b 829 # Adult content, fails if not logged in
7a26ce26
SS
830 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
831 'info_dict': {
832 'id': '1575199163847000068',
833 'display_id': '1575199173472927762',
834 'ext': 'mp4',
835 'title': str,
836 'description': str,
837 'uploader': str,
838 'uploader_id': 'Rizdraws',
839 'uploader_url': 'https://twitter.com/Rizdraws',
840 'upload_date': '20220928',
841 'timestamp': 1664391723,
16bed382 842 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
843 'like_count': int,
844 'repost_count': int,
845 'comment_count': int,
846 'age_limit': 18,
847 'tags': []
848 },
a006ce2b 849 'params': {'skip_download': 'The media could not be played'},
147e62fc 850 'skip': 'Requires authentication',
7a26ce26 851 }, {
a006ce2b 852 # Playlist result only with graphql API
7a26ce26
SS
853 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
854 'playlist_mincount': 2,
855 'info_dict': {
856 'id': '1395079556562706435',
857 'title': str,
858 'tags': [],
859 'uploader': str,
860 'like_count': int,
861 'upload_date': '20210519',
862 'age_limit': 0,
863 'repost_count': int,
147e62fc 864 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
7a26ce26
SS
865 'uploader_id': 'Srirachachau',
866 'comment_count': int,
867 'uploader_url': 'https://twitter.com/Srirachachau',
868 'timestamp': 1621447860,
869 },
870 }, {
7a26ce26
SS
871 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
872 'playlist_mincount': 2,
873 'info_dict': {
874 'id': '1578353380363501568',
875 'title': str,
876 'uploader_id': 'DavidToons_',
877 'repost_count': int,
878 'like_count': int,
879 'uploader': str,
880 'timestamp': 1665143744,
881 'uploader_url': 'https://twitter.com/DavidToons_',
147e62fc 882 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
7a26ce26
SS
883 'tags': [],
884 'comment_count': int,
885 'upload_date': '20221007',
886 'age_limit': 0,
887 },
888 }, {
889 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
890 'playlist_count': 2,
891 'info_dict': {
892 'id': '1578401165338976258',
893 'title': str,
894 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
895 'uploader': str,
896 'uploader_id': 'primevideouk',
897 'timestamp': 1665155137,
898 'upload_date': '20221007',
899 'age_limit': 0,
900 'uploader_url': 'https://twitter.com/primevideouk',
b03fa783 901 'comment_count': int,
902 'repost_count': int,
7a26ce26
SS
903 'like_count': int,
904 'tags': ['TheRingsOfPower'],
905 },
906 }, {
907 # Twitter Spaces
908 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
909 'info_dict': {
910 'id': '1lPJqmBeeNAJb',
911 'ext': 'm4a',
912 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
913 'uploader': r're:Monique Camarra.+?',
914 'uploader_id': 'MoniqueCamarra',
915 'live_status': 'was_live',
1c16d9df 916 'release_timestamp': 1658417414,
a006ce2b 917 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
1cffd621 918 'timestamp': 1658407771,
919 'release_date': '20220721',
920 'upload_date': '20220721',
7a26ce26
SS
921 },
922 'add_ie': ['TwitterSpaces'],
923 'params': {'skip_download': 'm3u8'},
92315c03 924 'skip': 'Requires authentication',
16bed382 925 }, {
926 # URL specifies video number but --yes-playlist
927 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
928 'playlist_mincount': 2,
929 'info_dict': {
930 'id': '1600649710662213632',
931 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
932 'timestamp': 1670459604.0,
933 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
b03fa783 934 'comment_count': int,
16bed382 935 'uploader_id': 'CTVJLaidlaw',
b03fa783 936 'repost_count': int,
16bed382 937 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
938 'upload_date': '20221208',
939 'age_limit': 0,
940 'uploader': 'Jocelyn Laidlaw',
941 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
942 'like_count': int,
943 },
944 }, {
945 # URL specifies video number and --no-playlist
946 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
947 'info_dict': {
948 'id': '1600649511827013632',
949 'ext': 'mp4',
147e62fc 950 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
16bed382 951 'thumbnail': r're:^https?://.+\.jpg',
952 'timestamp': 1670459604.0,
953 'uploader_id': 'CTVJLaidlaw',
954 'uploader': 'Jocelyn Laidlaw',
b03fa783 955 'repost_count': int,
956 'comment_count': int,
16bed382 957 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
958 'duration': 102.226,
959 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
960 'display_id': '1600649710662213632',
961 'like_count': int,
962 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
963 'upload_date': '20221208',
964 'age_limit': 0,
1c54a98e 965 '_old_archive_ids': ['twitter 1600649710662213632'],
16bed382 966 },
967 'params': {'noplaylist': True},
7543c9c9 968 }, {
969 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
970 # note the id different between extraction and url
971 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
972 'info_dict': {
973 'id': '1621117577354424321',
974 'display_id': '1621117700482416640',
975 'ext': 'mp4',
976 'title': '뽀 - 아 최우제 이동속도 봐',
977 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
978 'duration': 24.598,
979 'uploader': '뽀',
980 'uploader_id': 's2FAKER',
981 'uploader_url': 'https://twitter.com/s2FAKER',
982 'upload_date': '20230202',
983 'timestamp': 1675339553.0,
984 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
985 'age_limit': 18,
986 'tags': [],
987 'like_count': int,
b03fa783 988 'repost_count': int,
989 'comment_count': int,
1c54a98e 990 '_old_archive_ids': ['twitter 1621117700482416640'],
7543c9c9 991 },
b6795fd3
SS
992 }, {
993 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
994 'info_dict': {
995 'id': '1599108643743473680',
996 'display_id': '1599108751385972737',
997 'ext': 'mp4',
998 'title': '\u06ea - \U0001F48B',
999 'uploader_url': 'https://twitter.com/hlo_again',
1000 'like_count': int,
1001 'uploader_id': 'hlo_again',
1002 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
b03fa783 1003 'repost_count': int,
b6795fd3 1004 'duration': 9.531,
b03fa783 1005 'comment_count': int,
b6795fd3
SS
1006 'upload_date': '20221203',
1007 'age_limit': 0,
1008 'timestamp': 1670092210.0,
1009 'tags': [],
1010 'uploader': '\u06ea',
1011 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1c54a98e 1012 '_old_archive_ids': ['twitter 1599108751385972737'],
b6795fd3
SS
1013 },
1014 'params': {'noplaylist': True},
1015 }, {
b6795fd3
SS
1016 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1017 'info_dict': {
1018 'id': '1600009362759733248',
1019 'display_id': '1600009574919962625',
1020 'ext': 'mp4',
1021 'uploader_url': 'https://twitter.com/MunTheShinobi',
1022 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
b6795fd3
SS
1023 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1024 'age_limit': 0,
a006ce2b 1025 'uploader': 'Mün',
b03fa783 1026 'repost_count': int,
b6795fd3 1027 'upload_date': '20221206',
a006ce2b 1028 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
b03fa783 1029 'comment_count': int,
b6795fd3
SS
1030 'like_count': int,
1031 'tags': [],
1032 'uploader_id': 'MunTheShinobi',
1033 'duration': 139.987,
1034 'timestamp': 1670306984.0,
1c54a98e 1035 '_old_archive_ids': ['twitter 1600009574919962625'],
b6795fd3 1036 },
cf605226 1037 }, {
a006ce2b 1038 # retweeted_status (private)
cf605226 1039 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1040 'info_dict': {
1041 'id': '1623274794488659969',
1042 'display_id': '1623739803874349067',
1043 'ext': 'mp4',
1044 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
92315c03 1045 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
cf605226 1046 'uploader': 'Johnny Bullets',
1047 'uploader_id': 'Johnnybull3ts',
1048 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1049 'age_limit': 0,
1050 'tags': [],
1051 'duration': 8.033,
1052 'timestamp': 1675853859.0,
1053 'upload_date': '20230208',
1054 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1055 'like_count': int,
b03fa783 1056 'repost_count': int,
cf605226 1057 },
6014355c 1058 'skip': 'Protected tweet',
92315c03 1059 }, {
a006ce2b 1060 # retweeted_status
1061 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
92315c03 1062 'info_dict': {
a006ce2b 1063 'id': '1694928337846538240',
92315c03 1064 'ext': 'mp4',
a006ce2b 1065 'display_id': '1695424220702888009',
1066 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1067 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1068 'uploader': 'Benny Johnson',
1069 'uploader_id': 'bennyjohnson',
1070 'uploader_url': 'https://twitter.com/bennyjohnson',
92315c03 1071 'age_limit': 0,
1072 'tags': [],
a006ce2b 1073 'duration': 45.001,
1074 'timestamp': 1692962814.0,
1075 'upload_date': '20230825',
1076 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
92315c03 1077 'like_count': int,
92315c03 1078 'repost_count': int,
1079 'comment_count': int,
1c54a98e 1080 '_old_archive_ids': ['twitter 1695424220702888009'],
92315c03 1081 },
a006ce2b 1082 }, {
1083 # retweeted_status w/ legacy API
1084 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1085 'info_dict': {
1086 'id': '1694928337846538240',
1087 'ext': 'mp4',
1088 'display_id': '1695424220702888009',
1089 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1090 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1091 'uploader': 'Benny Johnson',
1092 'uploader_id': 'bennyjohnson',
1093 'uploader_url': 'https://twitter.com/bennyjohnson',
1094 'age_limit': 0,
1095 'tags': [],
1096 'duration': 45.001,
1097 'timestamp': 1692962814.0,
1098 'upload_date': '20230825',
1099 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1100 'like_count': int,
1101 'repost_count': int,
1c54a98e 1102 '_old_archive_ids': ['twitter 1695424220702888009'],
a006ce2b 1103 },
1104 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1105 }, {
1106 # Broadcast embedded in tweet
1c54a98e 1107 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
a006ce2b 1108 'info_dict': {
1c54a98e 1109 'id': '1rmxPMjLzAXKN',
a006ce2b 1110 'ext': 'mp4',
1c54a98e 1111 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
a006ce2b 1112 'uploader': 'Jessica Dobson',
1c54a98e 1113 'uploader_id': 'JessicaDobsonWX',
1114 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1115 'timestamp': 1701566398,
1116 'upload_date': '20231203',
1117 'live_status': 'was_live',
1118 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1119 'concurrent_view_count': int,
a006ce2b 1120 'view_count': int,
1121 },
1122 'add_ie': ['TwitterBroadcast'],
1123 }, {
1124 # Animated gif and quote tweet video, with syndication API
1125 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1126 'playlist_mincount': 2,
1127 'info_dict': {
1128 'id': '1696256659889565950',
1129 'title': 'BAKOON - https://t.co/zom968d0a0',
1130 'description': 'https://t.co/zom968d0a0',
1131 'tags': [],
1132 'uploader': 'BAKOON',
1133 'uploader_id': 'BAKKOOONN',
1134 'uploader_url': 'https://twitter.com/BAKKOOONN',
1135 'age_limit': 18,
1136 'timestamp': 1693254077.0,
1137 'upload_date': '20230828',
1138 'like_count': int,
1139 },
1140 'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
1141 'expected_warnings': ['Not all metadata'],
1c54a98e 1142 }, {
1143 # "stale tweet" with typename "TweetWithVisibilityResults"
1144 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1145 'md5': '62b1e11cdc2cdd0e527f83adb081f536',
1146 'info_dict': {
1147 'id': '1724883339285544960',
1148 'ext': 'mp4',
1149 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1150 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1151 'display_id': '1724884212803834154',
1152 'uploader': 'Robert F. Kennedy Jr',
1153 'uploader_id': 'RobertKennedyJr',
1154 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1155 'upload_date': '20231115',
1156 'timestamp': 1700079417.0,
1157 'duration': 341.048,
1158 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1159 'tags': ['Kennedy24'],
1160 'repost_count': int,
1161 'like_count': int,
1162 'comment_count': int,
1163 'age_limit': 0,
1164 '_old_archive_ids': ['twitter 1724884212803834154'],
1165 },
82fb2357 1166 }, {
1167 # onion route
1168 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1169 'only_matching': True,
18ca61c5
RA
1170 }, {
1171 # Twitch Clip Embed
1172 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1173 'only_matching': True,
10a5091e
RA
1174 }, {
1175 # promo_video_website card
1176 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1177 'only_matching': True,
00dd0cd5 1178 }, {
1179 # promo_video_convo card
1180 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1181 'only_matching': True,
1182 }, {
1183 # appplayer card
1184 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1185 'only_matching': True,
30a074c2 1186 }, {
1187 # video_direct_message card
1188 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1189 'only_matching': True,
1190 }, {
1191 # poll2choice_video card
1192 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1193 'only_matching': True,
1194 }, {
1195 # poll3choice_video card
1196 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1197 'only_matching': True,
1198 }, {
1199 # poll4choice_video card
1200 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1201 'only_matching': True,
cf5881fc 1202 }]
f57f84f6 1203
a006ce2b 1204 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1205
1206 @property
1207 def _GRAPHQL_ENDPOINT(self):
1208 if self.is_logged_in:
1209 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1210 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1211
7a26ce26
SS
1212 def _graphql_to_legacy(self, data, twid):
1213 result = traverse_obj(data, (
1214 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1215 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
92315c03 1216 'tweet_results', 'result', ('tweet', None), {dict},
1217 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1218 data, ('tweetResult', 'result', {dict}), default={})
7a26ce26 1219
1c54a98e 1220 typename = result.get('__typename')
1221 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1222 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
7543c9c9 1223
7a26ce26 1224 if 'tombstone' in result:
147e62fc 1225 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
7a26ce26 1226 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1c54a98e 1227 elif typename == 'TweetUnavailable':
92315c03 1228 reason = result.get('reason')
1229 if reason == 'NsfwLoggedOut':
1230 self.raise_login_required('NSFW tweet requires authentication')
6014355c 1231 elif reason == 'Protected':
1232 self.raise_login_required('You are not authorized to view this protected tweet')
92315c03 1233 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1c54a98e 1234 # Result for "stale tweet" needs additional transformation
1235 elif typename == 'TweetWithVisibilityResults':
1236 result = traverse_obj(result, ('tweet', {dict})) or {}
7a26ce26
SS
1237
1238 status = result.get('legacy', {})
1239 status.update(traverse_obj(result, {
1240 'user': ('core', 'user_results', 'result', 'legacy'),
1241 'card': ('card', 'legacy'),
1242 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
a006ce2b 1243 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
7a26ce26
SS
1244 }, expected_type=dict, default={}))
1245
a006ce2b 1246 # extra transformations needed since result does not match legacy format
1247 if status.get('retweeted_status'):
1248 status['retweeted_status']['user'] = traverse_obj(status, (
1249 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1250
7a26ce26
SS
1251 binding_values = {
1252 binding_value.get('key'): binding_value.get('value')
147e62fc 1253 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
7a26ce26
SS
1254 }
1255 if binding_values:
1256 status['card']['binding_values'] = binding_values
1257
1258 return status
1259
1260 def _build_graphql_query(self, media_id):
1261 return {
1262 'variables': {
1263 'focalTweetId': media_id,
1264 'includePromotedContent': True,
1265 'with_rux_injections': False,
1266 'withBirdwatchNotes': True,
1267 'withCommunity': True,
1268 'withDownvotePerspective': False,
1269 'withQuickPromoteEligibilityTweetFields': True,
1270 'withReactionsMetadata': False,
1271 'withReactionsPerspective': False,
1272 'withSuperFollowsTweetFields': True,
1273 'withSuperFollowsUserFields': True,
1274 'withV2Timeline': True,
1275 'withVoice': True,
1276 },
1277 'features': {
1278 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1279 'interactive_text_enabled': True,
1280 'responsive_web_edit_tweet_api_enabled': True,
1281 'responsive_web_enhance_cards_enabled': True,
1282 'responsive_web_graphql_timeline_navigation_enabled': False,
1283 'responsive_web_text_conversations_enabled': False,
1284 'responsive_web_uc_gql_enabled': True,
1285 'standardized_nudges_misinfo': True,
1286 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1287 'tweetypie_unmention_optimization_enabled': True,
1288 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1289 'verified_phone_label_enabled': False,
1290 'vibe_api_enabled': True,
1291 },
92315c03 1292 } if self.is_logged_in else {
1293 'variables': {
1294 'tweetId': media_id,
1295 'withCommunity': False,
1296 'includePromotedContent': False,
1297 'withVoice': False,
1298 },
1299 'features': {
1300 'creator_subscriptions_tweet_preview_api_enabled': True,
1301 'tweetypie_unmention_optimization_enabled': True,
1302 'responsive_web_edit_tweet_api_enabled': True,
1303 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1304 'view_counts_everywhere_api_enabled': True,
1305 'longform_notetweets_consumption_enabled': True,
1306 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1307 'tweet_awards_web_tipping_enabled': False,
1308 'freedom_of_speech_not_reach_fetch_enabled': True,
1309 'standardized_nudges_misinfo': True,
1310 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1311 'longform_notetweets_rich_text_read_enabled': True,
1312 'longform_notetweets_inline_media_enabled': True,
1313 'responsive_web_graphql_exclude_directive_enabled': True,
1314 'verified_phone_label_enabled': False,
1315 'responsive_web_media_download_video_enabled': False,
1316 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1317 'responsive_web_graphql_timeline_navigation_enabled': True,
1318 'responsive_web_enhance_cards_enabled': False
1319 },
1320 'fieldToggles': {
1321 'withArticleRichContentState': False
1322 }
7a26ce26
SS
1323 }
1324
116c2684 1325 def _call_syndication_api(self, twid):
1326 self.report_warning(
1327 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1328 status = self._download_json(
1329 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1330 headers={'User-Agent': 'Googlebot'}, query={
1331 'id': twid,
1332 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1333 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
a006ce2b 1334 })
116c2684 1335 if not status:
1336 raise ExtractorError('Syndication endpoint returned empty JSON response')
1337 # Transform the result so its structure matches that of legacy/graphql
1338 media = []
1339 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1340 detail['id_str'] = traverse_obj(detail, (
1341 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1342 media.append(detail)
1343 status['extended_entities'] = {'media': media}
1344
1345 return status
6014355c 1346
116c2684 1347 def _extract_status(self, twid):
1348 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1349 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1350
1351 try:
1352 if self.is_logged_in or self._selected_api == 'graphql':
1353 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1354 elif self._selected_api == 'legacy':
1355 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1356 'cards_platform': 'Web-12',
1357 'include_cards': 1,
1358 'include_reply_count': 1,
1359 'include_user_entities': 0,
1360 'tweet_mode': 'extended',
a006ce2b 1361 })
116c2684 1362 except ExtractorError as e:
1363 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1364 raise
1365 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1366 status = self._call_syndication_api(twid)
6014355c 1367
116c2684 1368 if self._selected_api == 'syndication':
1369 status = self._call_syndication_api(twid)
a006ce2b 1370
1371 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
6014355c 1372
1373 def _real_extract(self, url):
1374 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1375 status = self._extract_status(twid)
575036b4 1376
92315c03 1377 title = description = traverse_obj(
1378 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
575036b4 1379 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 1380 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
1381 user = status.get('user') or {}
1382 uploader = user.get('name')
1383 if uploader:
7a26ce26 1384 title = f'{uploader} - {title}'
18ca61c5
RA
1385 uploader_id = user.get('screen_name')
1386
cf5881fc 1387 info = {
18ca61c5
RA
1388 'id': twid,
1389 'title': title,
1390 'description': description,
1391 'uploader': uploader,
1392 'timestamp': unified_timestamp(status.get('created_at')),
1393 'uploader_id': uploader_id,
a70635b8 1394 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
1395 'like_count': int_or_none(status.get('favorite_count')),
1396 'repost_count': int_or_none(status.get('retweet_count')),
1397 'comment_count': int_or_none(status.get('reply_count')),
1398 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 1399 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 1400 }
cf5881fc 1401
30a074c2 1402 def extract_from_video_info(media):
a006ce2b 1403 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
13b2ae29 1404 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
1405
1406 formats = []
4bed4363 1407 subtitles = {}
92315c03 1408 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
4bed4363
F
1409 fmts, subs = self._extract_variant_formats(variant, twid)
1410 subtitles = self._merge_subtitles(subtitles, subs)
1411 formats.extend(fmts)
18ca61c5
RA
1412
1413 thumbnails = []
1414 media_url = media.get('media_url_https') or media.get('media_url')
1415 if media_url:
1416 def add_thumbnail(name, size):
1417 thumbnails.append({
1418 'id': name,
1419 'url': update_url_query(media_url, {'name': name}),
1420 'width': int_or_none(size.get('w') or size.get('width')),
1421 'height': int_or_none(size.get('h') or size.get('height')),
1422 })
1423 for name, size in media.get('sizes', {}).items():
1424 add_thumbnail(name, size)
1425 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1426
13b2ae29 1427 return {
b03fa783 1428 'id': media_id,
18ca61c5 1429 'formats': formats,
4bed4363 1430 'subtitles': subtitles,
18ca61c5 1431 'thumbnails': thumbnails,
1c54a98e 1432 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
92315c03 1433 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
e7d22348 1434 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1435 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
13b2ae29 1436 }
30a074c2 1437
13b2ae29
SS
1438 def extract_from_card_info(card):
1439 if not card:
1440 return
1441
1442 self.write_debug(f'Extracting from card info: {card.get("url")}')
1443 binding_values = card['binding_values']
1444
1445 def get_binding_value(k):
1446 o = binding_values.get(k) or {}
1447 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1448
1449 card_name = card['name'].split(':')[-1]
1450 if card_name == 'player':
7a26ce26 1451 yield {
13b2ae29
SS
1452 '_type': 'url',
1453 'url': get_binding_value('player_url'),
1454 }
1455 elif card_name == 'periscope_broadcast':
7a26ce26 1456 yield {
13b2ae29
SS
1457 '_type': 'url',
1458 'url': get_binding_value('url') or get_binding_value('player_url'),
1459 'ie_key': PeriscopeIE.ie_key(),
1460 }
1461 elif card_name == 'broadcast':
7a26ce26 1462 yield {
13b2ae29
SS
1463 '_type': 'url',
1464 'url': get_binding_value('broadcast_url'),
1465 'ie_key': TwitterBroadcastIE.ie_key(),
1466 }
7a26ce26
SS
1467 elif card_name == 'audiospace':
1468 yield {
1469 '_type': 'url',
1470 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1471 'ie_key': TwitterSpacesIE.ie_key(),
1472 }
13b2ae29 1473 elif card_name == 'summary':
7a26ce26 1474 yield {
18ca61c5 1475 '_type': 'url',
13b2ae29
SS
1476 'url': get_binding_value('card_url'),
1477 }
1478 elif card_name == 'unified_card':
7a26ce26
SS
1479 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1480 yield from map(extract_from_video_info, traverse_obj(
1481 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1482 # amplify, promo_video_website, promo_video_convo, appplayer,
1483 # video_direct_message, poll2choice_video, poll3choice_video,
1484 # poll4choice_video, ...
1485 else:
1486 is_amplify = card_name == 'amplify'
1487 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1488 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1489 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1490
1491 thumbnails = []
1492 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1493 image = get_binding_value('player_image' + suffix) or {}
1494 image_url = image.get('url')
1495 if not image_url or '/player-placeholder' in image_url:
1496 continue
1497 thumbnails.append({
1498 'id': suffix[1:] if suffix else 'medium',
1499 'url': image_url,
1500 'width': int_or_none(image.get('width')),
1501 'height': int_or_none(image.get('height')),
1502 })
1503
7a26ce26 1504 yield {
13b2ae29
SS
1505 'formats': formats,
1506 'subtitles': subtitles,
1507 'thumbnails': thumbnails,
1508 'duration': int_or_none(get_binding_value(
1509 'content_duration_seconds')),
1510 }
1511
b6795fd3 1512 videos = traverse_obj(status, (
b03fa783 1513 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1514
b6795fd3
SS
1515 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1516 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1517 else:
92315c03 1518 desired_obj = traverse_obj(status, (
1519 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
b6795fd3
SS
1520 if not desired_obj:
1521 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1522 elif desired_obj.get('type') != 'video':
1523 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1524
1525 # Restore original archive id and video index in title
1526 for index, entry in enumerate(videos, 1):
1527 if entry.get('id') != desired_obj.get('id'):
1528 continue
1529 if index == 1:
1530 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1531 if len(videos) != 1:
1532 info['title'] += f' #{index}'
1533 break
1534
1535 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1536
1537 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1538 if not entries:
1539 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1540 if not expanded_url or expanded_url == url:
147e62fc 1541 self.raise_no_formats('No video could be found in this tweet', expected=True)
1542 return info
13b2ae29
SS
1543
1544 return self.url_result(expanded_url, display_id=twid, **info)
1545
1546 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1547
1548 if len(entries) == 1:
1549 return entries[0]
1550
1551 for index, entry in enumerate(entries, 1):
1552 entry['title'] += f' #{index}'
1553
1554 return self.playlist_result(entries, **info)
445d72b8
YCH
1555
1556
1557class TwitterAmplifyIE(TwitterBaseIE):
1558 IE_NAME = 'twitter:amplify'
25042f73 1559 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1560
1561 _TEST = {
1562 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1563 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1564 'info_dict': {
1565 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1566 'ext': 'mp4',
1567 'title': 'Twitter Video',
bdbf4ba4 1568 'thumbnail': 're:^https?://.*',
445d72b8 1569 },
7a26ce26 1570 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1571 }
1572
1573 def _real_extract(self, url):
1574 video_id = self._match_id(url)
1575 webpage = self._download_webpage(url, video_id)
1576
1577 vmap_url = self._html_search_meta(
1578 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1579 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1580
bdbf4ba4
YCH
1581 thumbnails = []
1582 thumbnail = self._html_search_meta(
1583 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1584
1585 def _find_dimension(target):
1586 w = int_or_none(self._html_search_meta(
1587 'twitter:%s:width' % target, webpage, fatal=False))
1588 h = int_or_none(self._html_search_meta(
1589 'twitter:%s:height' % target, webpage, fatal=False))
1590 return w, h
1591
1592 if thumbnail:
1593 thumbnail_w, thumbnail_h = _find_dimension('image')
1594 thumbnails.append({
1595 'url': thumbnail,
1596 'width': thumbnail_w,
1597 'height': thumbnail_h,
1598 })
1599
1600 video_w, video_h = _find_dimension('player')
9be31e77 1601 formats[0].update({
bdbf4ba4
YCH
1602 'width': video_w,
1603 'height': video_h,
9be31e77 1604 })
bdbf4ba4 1605
445d72b8
YCH
1606 return {
1607 'id': video_id,
1608 'title': 'Twitter Video',
bdbf4ba4
YCH
1609 'formats': formats,
1610 'thumbnails': thumbnails,
445d72b8 1611 }
18ca61c5
RA
1612
1613
1614class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1615 IE_NAME = 'twitter:broadcast'
1616 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1617
7d337ca9 1618 _TESTS = [{
7b0b53ea
S
1619 # untitled Periscope video
1620 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1621 'info_dict': {
1622 'id': '1yNGaQLWpejGj',
1623 'ext': 'mp4',
1624 'title': 'Andrea May Sahouri - Periscope Broadcast',
1625 'uploader': 'Andrea May Sahouri',
7d337ca9
H
1626 'uploader_id': 'andreamsahouri',
1627 'uploader_url': 'https://twitter.com/andreamsahouri',
1628 'timestamp': 1590973638,
1629 'upload_date': '20200601',
7a26ce26
SS
1630 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1631 'view_count': int,
7b0b53ea 1632 },
7d337ca9
H
1633 }, {
1634 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1635 'info_dict': {
1636 'id': '1ZkKzeyrPbaxv',
1637 'ext': 'mp4',
1638 'title': 'Starship | SN10 | High-Altitude Flight Test',
1639 'uploader': 'SpaceX',
1640 'uploader_id': 'SpaceX',
1641 'uploader_url': 'https://twitter.com/SpaceX',
1642 'timestamp': 1614812942,
1643 'upload_date': '20210303',
1644 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1645 'view_count': int,
1646 },
1647 }, {
1648 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1649 'info_dict': {
1650 'id': '1OyKAVQrgzwGb',
1651 'ext': 'mp4',
1652 'title': 'Starship Flight Test',
1653 'uploader': 'SpaceX',
1654 'uploader_id': 'SpaceX',
1655 'uploader_url': 'https://twitter.com/SpaceX',
1656 'timestamp': 1681993964,
1657 'upload_date': '20230420',
1658 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1659 'view_count': int,
1660 },
1661 }]
7b0b53ea 1662
18ca61c5
RA
1663 def _real_extract(self, url):
1664 broadcast_id = self._match_id(url)
1665 broadcast = self._call_api(
1666 'broadcasts/show.json', broadcast_id,
1667 {'ids': broadcast_id})['broadcasts'][broadcast_id]
a006ce2b 1668 if not broadcast:
1669 raise ExtractorError('Broadcast no longer exists', expected=True)
18ca61c5 1670 info = self._parse_broadcast_data(broadcast, broadcast_id)
7d337ca9
H
1671 info['title'] = broadcast.get('status') or info.get('title')
1672 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1673 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
f6e97090 1674 if info['live_status'] == 'is_upcoming':
1675 return info
1676
18ca61c5
RA
1677 media_key = broadcast['media_key']
1678 source = self._call_api(
7a26ce26 1679 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1680 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1681 if '/live_video_stream/geoblocked/' in m3u8_url:
1682 self.raise_geo_restricted()
1683 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1684 m3u8_url).query).get('type', [None])[0]
1685 state, width, height = self._extract_common_format_info(broadcast)
1686 info['formats'] = self._extract_pscp_m3u8_formats(
1687 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1688 return info
86b868c6
U
1689
1690
7a26ce26
SS
1691class TwitterSpacesIE(TwitterBaseIE):
1692 IE_NAME = 'twitter:spaces'
1693 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1694
1695 _TESTS = [{
1696 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1697 'info_dict': {
1698 'id': '1RDxlgyvNXzJL',
1699 'ext': 'm4a',
1700 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1701 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1702 'uploader': r're:Lucio Di Gaetano.*?',
1703 'uploader_id': 'luciodigaetano',
1704 'live_status': 'was_live',
1cffd621 1705 'timestamp': 1659877956,
1706 'upload_date': '20220807',
1707 'release_timestamp': 1659904215,
1708 'release_date': '20220807',
7a26ce26
SS
1709 },
1710 'params': {'skip_download': 'm3u8'},
613dbce1 1711 }, {
1712 # post_live/TimedOut but downloadable
1713 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1714 'info_dict': {
1715 'id': '1vAxRAVQWONJl',
1716 'ext': 'm4a',
1717 'title': 'Framing Up FinOps: Billing Tools',
1718 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1719 'uploader': 'Google Cloud',
1720 'uploader_id': 'googlecloud',
1721 'live_status': 'post_live',
1722 'timestamp': 1681409554,
1723 'upload_date': '20230413',
1724 'release_timestamp': 1681839000,
1725 'release_date': '20230418',
1726 },
1727 'params': {'skip_download': 'm3u8'},
1728 }, {
1729 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1730 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1731 'info_dict': {
1732 'id': '1eaKbrQbjoRKX',
1733 'ext': 'm4a',
1734 'title': 'あ',
1735 'description': 'Twitter Space participated by nobody yet',
1736 'uploader': '息根とめる🔪Twitchで復活',
1737 'uploader_id': 'tomeru_ikinone',
1738 'live_status': 'was_live',
1739 'timestamp': 1685617198,
1740 'upload_date': '20230601',
1741 },
1742 'params': {'skip_download': 'm3u8'},
7a26ce26
SS
1743 }]
1744
1745 SPACE_STATUS = {
1746 'notstarted': 'is_upcoming',
1747 'ended': 'was_live',
1748 'running': 'is_live',
1749 'timedout': 'post_live',
1750 }
1751
1752 def _build_graphql_query(self, space_id):
1753 return {
1754 'variables': {
1755 'id': space_id,
1756 'isMetatagsQuery': True,
1757 'withDownvotePerspective': False,
1758 'withReactionsMetadata': False,
1759 'withReactionsPerspective': False,
1760 'withReplays': True,
1761 'withSuperFollowsUserFields': True,
1762 'withSuperFollowsTweetFields': True,
1763 },
1764 'features': {
1765 'dont_mention_me_view_api_enabled': True,
1766 'interactive_text_enabled': True,
1767 'responsive_web_edit_tweet_api_enabled': True,
1768 'responsive_web_enhance_cards_enabled': True,
1769 'responsive_web_uc_gql_enabled': True,
1770 'spaces_2022_h2_clipping': True,
1771 'spaces_2022_h2_spaces_communities': False,
1772 'standardized_nudges_misinfo': True,
1773 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1774 'vibe_api_enabled': True,
1775 },
1776 }
1777
1778 def _real_extract(self, url):
1779 space_id = self._match_id(url)
92315c03 1780 if not self.is_logged_in:
1781 self.raise_login_required('Twitter Spaces require authentication')
7a26ce26
SS
1782 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1783 if not space_data:
1784 raise ExtractorError('Twitter Space not found', expected=True)
1785
1786 metadata = space_data['metadata']
1787 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1cffd621 1788 is_live = live_status == 'is_live'
7a26ce26
SS
1789
1790 formats = []
c6ef5537 1791 headers = {'Referer': 'https://twitter.com/'}
7a26ce26
SS
1792 if live_status == 'is_upcoming':
1793 self.raise_no_formats('Twitter Space not started yet', expected=True)
1cffd621 1794 elif not is_live and not metadata.get('is_space_available_for_replay'):
1795 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1796 elif metadata.get('media_key'):
1797 source = traverse_obj(
1798 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1799 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
613dbce1 1800 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1801 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
c6ef5537 1802 headers=headers, fatal=False) if source else []
7a26ce26
SS
1803 for fmt in formats:
1804 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1cffd621 1805 if not is_live:
1806 fmt['container'] = 'm4a_dash'
7a26ce26
SS
1807
1808 participants = ', '.join(traverse_obj(
1809 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1cffd621 1810
1811 if not formats and live_status == 'post_live':
1812 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1813
7a26ce26
SS
1814 return {
1815 'id': space_id,
1816 'title': metadata.get('title'),
1817 'description': f'Twitter Space participated by {participants}',
1818 'uploader': traverse_obj(
1819 metadata, ('creator_results', 'result', 'legacy', 'name')),
1820 'uploader_id': traverse_obj(
1821 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1822 'live_status': live_status,
1c16d9df
C
1823 'release_timestamp': try_call(
1824 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1cffd621 1825 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
7a26ce26 1826 'formats': formats,
c6ef5537 1827 'http_headers': headers,
7a26ce26
SS
1828 }
1829
1830
86b868c6
U
1831class TwitterShortenerIE(TwitterBaseIE):
1832 IE_NAME = 'twitter:shortener'
b634ba74 1833 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
a537ab1a 1834 _BASE_URL = 'https://t.co/'
86b868c6
U
1835
1836 def _real_extract(self, url):
5ad28e7f 1837 mobj = self._match_valid_url(url)
a537ab1a
U
1838 eid, id = mobj.group('eid', 'id')
1839 if eid:
1840 id = eid
1841 url = self._BASE_URL + id
3d2623a8 1842 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
a537ab1a
U
1843 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1844 if new_url.startswith(__UNSAFE_LINK):
1845 new_url = new_url.replace(__UNSAFE_LINK, "")
9e20a9c4 1846 return self.url_result(new_url)