]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[ie/twitter] Prioritize m3u8 formats (#8826)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
7a26ce26 1import json
a006ce2b 2import random
23e7cba8
S
3import re
4
5from .common import InfoExtractor
13b2ae29 6from .periscope import PeriscopeBaseIE, PeriscopeIE
a006ce2b 7from ..compat import functools # isort: split
18ca61c5 8from ..compat import (
18ca61c5
RA
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12)
23e7cba8 13from ..utils import (
2edfd745 14 ExtractorError,
13b2ae29 15 dict_get,
92315c03 16 filter_dict,
23e7cba8 17 float_or_none,
13b2ae29 18 format_field,
cf5881fc 19 int_or_none,
13b2ae29 20 make_archive_id,
147e62fc 21 remove_end,
13b2ae29
SS
22 str_or_none,
23 strip_or_none,
f1150b9e 24 traverse_obj,
7a26ce26 25 try_call,
2edfd745 26 try_get,
18ca61c5
RA
27 unified_timestamp,
28 update_url_query,
41d1cca3 29 url_or_none,
2edfd745 30 xpath_text,
23e7cba8
S
31)
32
33
445d72b8 34class TwitterBaseIE(InfoExtractor):
d1795f4a 35 _NETRC_MACHINE = 'twitter'
18ca61c5 36 _API_BASE = 'https://api.twitter.com/1.1/'
7a26ce26 37 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
82fb2357 38 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
92315c03 39 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
40 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
d1795f4a 41 _flow_token = None
42
43 _LOGIN_INIT_DATA = json.dumps({
44 'input_flow_data': {
45 'flow_context': {
46 'debug_overrides': {},
47 'start_location': {
48 'location': 'unknown'
49 }
50 }
51 },
52 'subtask_versions': {
53 'action_list': 2,
54 'alert_dialog': 1,
55 'app_download_cta': 1,
56 'check_logged_in_account': 1,
57 'choice_selection': 3,
58 'contacts_live_sync_permission_prompt': 0,
59 'cta': 7,
60 'email_verification': 2,
61 'end_flow': 1,
62 'enter_date': 1,
63 'enter_email': 2,
64 'enter_password': 5,
65 'enter_phone': 2,
66 'enter_recaptcha': 1,
67 'enter_text': 5,
68 'enter_username': 2,
69 'generic_urt': 3,
70 'in_app_notification': 1,
71 'interest_picker': 3,
72 'js_instrumentation': 1,
73 'menu_dialog': 1,
74 'notifications_permission_prompt': 2,
75 'open_account': 2,
76 'open_home_timeline': 1,
77 'open_link': 1,
78 'phone_verification': 4,
79 'privacy_options': 1,
80 'security_key': 3,
81 'select_avatar': 4,
82 'select_banner': 2,
83 'settings_list': 7,
84 'show_code': 1,
85 'sign_up': 2,
86 'sign_up_review': 4,
87 'tweet_selection_urt': 1,
88 'update_users': 1,
89 'upload_media': 1,
90 'user_recommendations_list': 4,
91 'user_recommendations_urt': 1,
92 'wait_spinner': 3,
93 'web_modal': 1
94 }
95 }, separators=(',', ':')).encode()
18ca61c5
RA
96
97 def _extract_variant_formats(self, variant, video_id):
98 variant_url = variant.get('url')
99 if not variant_url:
4bed4363 100 return [], {}
18ca61c5 101 elif '.m3u8' in variant_url:
4bed4363 102 return self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
103 variant_url, video_id, 'mp4', 'm3u8_native',
104 m3u8_id='hls', fatal=False)
105 else:
106 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
107 f = {
108 'url': variant_url,
109 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
110 'tbr': tbr,
111 }
112 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 113 return [f], {}
18ca61c5 114
9be31e77 115 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 116 vmap_url = url_or_none(vmap_url)
117 if not vmap_url:
f1150b9e 118 return [], {}
445d72b8 119 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 120 formats = []
4bed4363 121 subtitles = {}
18ca61c5
RA
122 urls = []
123 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
124 video_variant.attrib['url'] = compat_urllib_parse_unquote(
125 video_variant.attrib['url'])
126 urls.append(video_variant.attrib['url'])
4bed4363
F
127 fmts, subs = self._extract_variant_formats(
128 video_variant.attrib, video_id)
129 formats.extend(fmts)
130 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
131 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
132 if video_url not in urls:
4bed4363
F
133 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
134 formats.extend(fmts)
135 subtitles = self._merge_subtitles(subtitles, subs)
136 return formats, subtitles
445d72b8 137
2edfd745
YCH
138 @staticmethod
139 def _search_dimensions_in_video_url(a_format, video_url):
140 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
141 if m:
142 a_format.update({
143 'width': int(m.group('width')),
144 'height': int(m.group('height')),
145 })
146
d1795f4a 147 @property
7a26ce26
SS
148 def is_logged_in(self):
149 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
150
a006ce2b 151 @functools.cached_property
152 def _selected_api(self):
153 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
154
92315c03 155 def _fetch_guest_token(self, display_id):
156 guest_token = traverse_obj(self._download_json(
157 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
a006ce2b 158 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
92315c03 159 ('guest_token', {str}))
160 if not guest_token:
b03fa783 161 raise ExtractorError('Could not retrieve guest token')
92315c03 162 return guest_token
b03fa783 163
92315c03 164 def _set_base_headers(self, legacy=False):
165 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
166 return filter_dict({
167 'Authorization': f'Bearer {bearer_token}',
168 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
169 })
d1795f4a 170
171 def _call_login_api(self, note, headers, query={}, data=None):
172 response = self._download_json(
173 f'{self._API_BASE}onboarding/task.json', None, note,
174 headers=headers, query=query, data=data, expected_status=400)
175 error = traverse_obj(response, ('errors', 0, 'message', {str}))
176 if error:
177 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
178 elif traverse_obj(response, 'status') != 'success':
179 raise ExtractorError('Login was unsuccessful')
180
181 subtask = traverse_obj(
182 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
183 if not subtask:
184 raise ExtractorError('Twitter API did not return next login subtask')
185
186 self._flow_token = response['flow_token']
7a26ce26 187
d1795f4a 188 return subtask
189
190 def _perform_login(self, username, password):
191 if self.is_logged_in:
192 return
193
92315c03 194 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
195 guest_token = self._search_regex(
196 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
197 headers = {
198 **self._set_base_headers(),
d1795f4a 199 'content-type': 'application/json',
92315c03 200 'x-guest-token': guest_token,
d1795f4a 201 'x-twitter-client-language': 'en',
202 'x-twitter-active-user': 'yes',
203 'Referer': 'https://twitter.com/',
204 'Origin': 'https://twitter.com',
92315c03 205 }
d1795f4a 206
207 def build_login_json(*subtask_inputs):
208 return json.dumps({
209 'flow_token': self._flow_token,
210 'subtask_inputs': subtask_inputs
211 }, separators=(',', ':')).encode()
212
213 def input_dict(subtask_id, text):
214 return {
215 'subtask_id': subtask_id,
216 'enter_text': {
217 'text': text,
218 'link': 'next_link'
219 }
220 }
7a26ce26 221
d1795f4a 222 next_subtask = self._call_login_api(
223 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
224
225 while not self.is_logged_in:
226 if next_subtask == 'LoginJsInstrumentationSubtask':
227 next_subtask = self._call_login_api(
228 'Submitting JS instrumentation response', headers, data=build_login_json({
229 'subtask_id': next_subtask,
230 'js_instrumentation': {
231 'response': '{}',
232 'link': 'next_link'
233 }
234 }))
235
236 elif next_subtask == 'LoginEnterUserIdentifierSSO':
237 next_subtask = self._call_login_api(
238 'Submitting username', headers, data=build_login_json({
239 'subtask_id': next_subtask,
240 'settings_list': {
241 'setting_responses': [{
242 'key': 'user_identifier',
243 'response_data': {
244 'text_data': {
245 'result': username
246 }
247 }
248 }],
249 'link': 'next_link'
250 }
251 }))
252
253 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
254 next_subtask = self._call_login_api(
255 'Submitting alternate identifier', headers,
256 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
257 'one of username, phone number or email that was not used as --username'))))
258
259 elif next_subtask == 'LoginEnterPassword':
260 next_subtask = self._call_login_api(
261 'Submitting password', headers, data=build_login_json({
262 'subtask_id': next_subtask,
263 'enter_password': {
264 'password': password,
265 'link': 'next_link'
266 }
267 }))
268
269 elif next_subtask == 'AccountDuplicationCheck':
270 next_subtask = self._call_login_api(
271 'Submitting account duplication check', headers, data=build_login_json({
272 'subtask_id': next_subtask,
273 'check_logged_in_account': {
274 'link': 'AccountDuplicationCheck_false'
275 }
276 }))
277
278 elif next_subtask == 'LoginTwoFactorAuthChallenge':
279 next_subtask = self._call_login_api(
280 'Submitting 2FA token', headers, data=build_login_json(input_dict(
281 next_subtask, self._get_tfa_info('two-factor authentication token'))))
282
283 elif next_subtask == 'LoginAcid':
284 next_subtask = self._call_login_api(
285 'Submitting confirmation code', headers, data=build_login_json(input_dict(
286 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
287
6014355c 288 elif next_subtask == 'ArkoseLogin':
289 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
290
291 elif next_subtask == 'DenyLoginSubtask':
292 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
293
d1795f4a 294 elif next_subtask == 'LoginSuccessSubtask':
295 raise ExtractorError('Twitter API did not grant auth token cookie')
296
297 else:
298 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
299
300 self.report_login()
301
302 def _call_api(self, path, video_id, query={}, graphql=False):
a006ce2b 303 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
92315c03 304 headers.update({
305 'x-twitter-auth-type': 'OAuth2Session',
306 'x-twitter-client-language': 'en',
307 'x-twitter-active-user': 'yes',
308 } if self.is_logged_in else {
309 'x-guest-token': self._fetch_guest_token(video_id)
310 })
311 allowed_status = {400, 401, 403, 404} if graphql else {403}
312 result = self._download_json(
313 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
314 video_id, headers=headers, query=query, expected_status=allowed_status,
315 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
b03fa783 316
92315c03 317 if result.get('errors'):
318 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
6014355c 319 if errors and 'not authorized' in errors:
320 self.raise_login_required(remove_end(errors, '.'))
321 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
b03fa783 322
92315c03 323 return result
7a26ce26
SS
324
325 def _build_graphql_query(self, media_id):
326 raise NotImplementedError('Method must be implemented to support GraphQL')
327
328 def _call_graphql_api(self, endpoint, media_id):
329 data = self._build_graphql_query(media_id)
330 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
331 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
332
333
334class TwitterCardIE(InfoExtractor):
014e8803 335 IE_NAME = 'twitter:card'
18ca61c5 336 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 337 _TESTS = [
338 {
339 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 340 # MD5 checksums are different in different places
c3dea3f8 341 'info_dict': {
7a26ce26 342 'id': '560070131976392705',
c3dea3f8 343 'ext': 'mp4',
18ca61c5
RA
344 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
345 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
346 'uploader': 'Twitter',
347 'uploader_id': 'Twitter',
348 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 349 'duration': 30.033,
18ca61c5
RA
350 'timestamp': 1422366112,
351 'upload_date': '20150127',
7a26ce26
SS
352 'age_limit': 0,
353 'comment_count': int,
354 'tags': [],
355 'repost_count': int,
356 'like_count': int,
357 'display_id': '560070183650213889',
358 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 359 },
23e7cba8 360 },
c3dea3f8 361 {
362 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 363 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 364 'info_dict': {
365 'id': '623160978427936768',
366 'ext': 'mp4',
18ca61c5
RA
367 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
368 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
369 'uploader': 'NASA',
370 'uploader_id': 'NASA',
371 'timestamp': 1437408129,
372 'upload_date': '20150720',
7a26ce26
SS
373 'uploader_url': 'https://twitter.com/NASA',
374 'age_limit': 0,
375 'comment_count': int,
376 'like_count': int,
377 'repost_count': int,
378 'tags': ['PlutoFlyby'],
c3dea3f8 379 },
7a26ce26 380 'params': {'format': '[protocol=https]'}
4a7b7903
YCH
381 },
382 {
383 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 384 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
385 'info_dict': {
386 'id': 'dq4Oj5quskI',
387 'ext': 'mp4',
388 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 389 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 390 'upload_date': '20111013',
18ca61c5 391 'uploader': 'OMG! UBUNTU!',
4a7b7903 392 'uploader_id': 'omgubuntu',
7a26ce26
SS
393 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
394 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
395 'channel_follower_count': int,
396 'chapters': 'count:8',
397 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
398 'duration': 138,
399 'categories': ['Film & Animation'],
400 'age_limit': 0,
401 'comment_count': int,
402 'availability': 'public',
403 'like_count': int,
404 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
405 'view_count': int,
406 'tags': 'count:12',
407 'channel': 'OMG! UBUNTU!',
408 'playable_in_embed': True,
4a7b7903 409 },
31752f76 410 'add_ie': ['Youtube'],
5f1b2aea
YCH
411 },
412 {
413 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
414 'info_dict': {
415 'id': 'iBb2x00UVlv',
416 'ext': 'mp4',
417 'upload_date': '20151113',
418 'uploader_id': '1189339351084113920',
acb6e97e
YCH
419 'uploader': 'ArsenalTerje',
420 'title': 'Vine by ArsenalTerje',
e8f20ffa 421 'timestamp': 1447451307,
7a26ce26
SS
422 'alt_title': 'Vine by ArsenalTerje',
423 'comment_count': int,
424 'like_count': int,
425 'thumbnail': r're:^https?://[^?#]+\.jpg',
426 'view_count': int,
427 'repost_count': int,
5f1b2aea
YCH
428 },
429 'add_ie': ['Vine'],
7a26ce26
SS
430 'params': {'skip_download': 'm3u8'},
431 },
432 {
0ae937a7 433 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 434 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
435 'info_dict': {
436 'id': '705235433198714880',
437 'ext': 'mp4',
18ca61c5
RA
438 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
439 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
440 'uploader': 'Brent Yarina',
441 'uploader_id': 'BTNBrentYarina',
442 'timestamp': 1456976204,
443 'upload_date': '20160303',
0ae937a7 444 },
18ca61c5 445 'skip': 'This content is no longer available.',
7a26ce26
SS
446 },
447 {
748a462f
S
448 'url': 'https://twitter.com/i/videos/752274308186120192',
449 'only_matching': True,
0ae937a7 450 },
c3dea3f8 451 ]
23e7cba8
S
452
453 def _real_extract(self, url):
18ca61c5
RA
454 status_id = self._match_id(url)
455 return self.url_result(
456 'https://twitter.com/statuses/' + status_id,
457 TwitterIE.ie_key(), status_id)
c8398a9b 458
03879ff0 459
18ca61c5 460class TwitterIE(TwitterBaseIE):
014e8803 461 IE_NAME = 'twitter'
b6795fd3 462 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 463
cf5881fc 464 _TESTS = [{
48aae2d2 465 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 466 'info_dict': {
13b2ae29
SS
467 'id': '643211870443208704',
468 'display_id': '643211948184596480',
f57f84f6 469 'ext': 'mp4',
575036b4 470 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 471 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 472 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
48aae2d2
YCH
473 'uploader': 'FREE THE NIPPLE',
474 'uploader_id': 'freethenipple',
3b65a6fb 475 'duration': 12.922,
18ca61c5
RA
476 'timestamp': 1442188653,
477 'upload_date': '20150913',
13b2ae29 478 'uploader_url': 'https://twitter.com/freethenipple',
b03fa783 479 'comment_count': int,
480 'repost_count': int,
13b2ae29
SS
481 'like_count': int,
482 'tags': [],
483 'age_limit': 18,
1c54a98e 484 '_old_archive_ids': ['twitter 643211948184596480'],
f57f84f6 485 },
cf5881fc
YCH
486 }, {
487 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
488 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
489 'info_dict': {
490 'id': '657991469417025536',
491 'ext': 'mp4',
492 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
493 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 494 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
495 'uploader': 'Gifs',
496 'uploader_id': 'giphz',
497 },
7efc1c2b 498 'expected_warnings': ['height', 'width'],
fc0a45fa 499 'skip': 'Account suspended',
b703ebee
JMF
500 }, {
501 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
502 'info_dict': {
503 'id': '665052190608723968',
13b2ae29 504 'display_id': '665052190608723968',
b703ebee 505 'ext': 'mp4',
b6795fd3 506 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 507 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
b703ebee 508 'uploader_id': 'starwars',
7a26ce26 509 'uploader': r're:Star Wars.*',
18ca61c5
RA
510 'timestamp': 1447395772,
511 'upload_date': '20151113',
13b2ae29 512 'uploader_url': 'https://twitter.com/starwars',
b03fa783 513 'comment_count': int,
514 'repost_count': int,
13b2ae29
SS
515 'like_count': int,
516 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
517 'age_limit': 0,
1c54a98e 518 '_old_archive_ids': ['twitter 665052190608723968'],
b703ebee 519 },
0ae937a7
YCH
520 }, {
521 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
522 'info_dict': {
523 'id': '705235433198714880',
524 'ext': 'mp4',
18ca61c5
RA
525 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
526 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
527 'uploader_id': 'BTNBrentYarina',
528 'uploader': 'Brent Yarina',
18ca61c5
RA
529 'timestamp': 1456976204,
530 'upload_date': '20160303',
13b2ae29
SS
531 'uploader_url': 'https://twitter.com/BTNBrentYarina',
532 'comment_count': int,
533 'repost_count': int,
534 'like_count': int,
535 'tags': [],
536 'age_limit': 0,
0ae937a7
YCH
537 },
538 'params': {
539 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
540 # Test case of TwitterCardIE
541 'skip_download': True,
542 },
352e7d98 543 'skip': 'Dead external link',
03879ff0
YCH
544 }, {
545 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 546 'info_dict': {
13b2ae29
SS
547 'id': '700207414000242688',
548 'display_id': '700207533655363584',
03879ff0 549 'ext': 'mp4',
13b2ae29 550 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 551 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 552 'thumbnail': r're:^https?://.*\.jpg',
13b2ae29
SS
553 'uploader': 'jaydin donte geer',
554 'uploader_id': 'jaydingeer',
3b65a6fb 555 'duration': 30.0,
18ca61c5
RA
556 'timestamp': 1455777459,
557 'upload_date': '20160218',
13b2ae29 558 'uploader_url': 'https://twitter.com/jaydingeer',
b03fa783 559 'comment_count': int,
560 'repost_count': int,
13b2ae29
SS
561 'like_count': int,
562 'tags': ['Damndaniel'],
563 'age_limit': 0,
1c54a98e 564 '_old_archive_ids': ['twitter 700207533655363584'],
03879ff0 565 },
395fd4b0
YCH
566 }, {
567 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
568 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
569 'info_dict': {
570 'id': 'MIOxnrUteUd',
571 'ext': 'mp4',
18ca61c5
RA
572 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
573 'uploader': 'TAKUMA',
574 'uploader_id': '1004126642786242560',
3615bfe1 575 'timestamp': 1402826626,
395fd4b0 576 'upload_date': '20140615',
13b2ae29
SS
577 'thumbnail': r're:^https?://.*\.jpg',
578 'alt_title': 'Vine by TAKUMA',
579 'comment_count': int,
580 'repost_count': int,
581 'like_count': int,
582 'view_count': int,
395fd4b0
YCH
583 },
584 'add_ie': ['Vine'],
36b7d9db
YCH
585 }, {
586 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 587 'info_dict': {
13b2ae29
SS
588 'id': '717462543795523584',
589 'display_id': '719944021058060289',
36b7d9db
YCH
590 'ext': 'mp4',
591 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5
RA
592 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
593 'uploader_id': 'CaptainAmerica',
36b7d9db 594 'uploader': 'Captain America',
3b65a6fb 595 'duration': 3.17,
18ca61c5
RA
596 'timestamp': 1460483005,
597 'upload_date': '20160412',
13b2ae29
SS
598 'uploader_url': 'https://twitter.com/CaptainAmerica',
599 'thumbnail': r're:^https?://.*\.jpg',
b03fa783 600 'comment_count': int,
601 'repost_count': int,
13b2ae29
SS
602 'like_count': int,
603 'tags': [],
604 'age_limit': 0,
1c54a98e 605 '_old_archive_ids': ['twitter 719944021058060289'],
36b7d9db 606 },
f0bc5a86
YCH
607 }, {
608 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
609 'info_dict': {
610 'id': '1zqKVVlkqLaKB',
611 'ext': 'mp4',
18ca61c5 612 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 613 'upload_date': '20160923',
18ca61c5
RA
614 'uploader_id': '1PmKqpJdOJQoY',
615 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 616 'timestamp': 1474613214,
13b2ae29 617 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
618 },
619 'add_ie': ['Periscope'],
1c54a98e 620 'skip': 'Broadcast not found',
2edfd745
YCH
621 }, {
622 # has mp4 formats via mobile API
623 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
624 'info_dict': {
6014355c 625 'id': '852077943283097602',
2edfd745
YCH
626 'ext': 'mp4',
627 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 628 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
2edfd745
YCH
629 'uploader': 'عالم الأخبار',
630 'uploader_id': 'news_al3alm',
3b65a6fb 631 'duration': 277.4,
18ca61c5
RA
632 'timestamp': 1492000653,
633 'upload_date': '20170412',
6014355c 634 'display_id': '852138619213144067',
635 'age_limit': 0,
636 'uploader_url': 'https://twitter.com/news_al3alm',
637 'thumbnail': r're:^https?://.*\.jpg',
638 'tags': [],
639 'repost_count': int,
6014355c 640 'like_count': int,
641 'comment_count': int,
1c54a98e 642 '_old_archive_ids': ['twitter 852138619213144067'],
2edfd745 643 },
5c1452e8
GF
644 }, {
645 'url': 'https://twitter.com/i/web/status/910031516746514432',
646 'info_dict': {
13b2ae29
SS
647 'id': '910030238373089285',
648 'display_id': '910031516746514432',
5c1452e8
GF
649 'ext': 'mp4',
650 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
651 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 652 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
5c1452e8
GF
653 'uploader': 'Préfet de Guadeloupe',
654 'uploader_id': 'Prefet971',
655 'duration': 47.48,
18ca61c5
RA
656 'timestamp': 1505803395,
657 'upload_date': '20170919',
13b2ae29 658 'uploader_url': 'https://twitter.com/Prefet971',
b03fa783 659 'comment_count': int,
660 'repost_count': int,
13b2ae29
SS
661 'like_count': int,
662 'tags': ['Maria'],
663 'age_limit': 0,
1c54a98e 664 '_old_archive_ids': ['twitter 910031516746514432'],
5c1452e8
GF
665 },
666 'params': {
667 'skip_download': True, # requires ffmpeg
668 },
2593725a
S
669 }, {
670 # card via api.twitter.com/1.1/videos/tweet/config
671 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
672 'info_dict': {
13b2ae29
SS
673 'id': '1001551417340022785',
674 'display_id': '1001551623938805763',
2593725a
S
675 'ext': 'mp4',
676 'title': 're:.*?Shep is on a roll today.*?',
677 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 678 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
2593725a
S
679 'uploader': 'Lis Power',
680 'uploader_id': 'LisPower1',
681 'duration': 111.278,
18ca61c5
RA
682 'timestamp': 1527623489,
683 'upload_date': '20180529',
13b2ae29 684 'uploader_url': 'https://twitter.com/LisPower1',
b03fa783 685 'comment_count': int,
686 'repost_count': int,
13b2ae29
SS
687 'like_count': int,
688 'tags': [],
689 'age_limit': 0,
1c54a98e 690 '_old_archive_ids': ['twitter 1001551623938805763'],
2593725a
S
691 },
692 'params': {
693 'skip_download': True, # requires ffmpeg
694 },
b7ef93f0
S
695 }, {
696 'url': 'https://twitter.com/foobar/status/1087791357756956680',
697 'info_dict': {
13b2ae29
SS
698 'id': '1087791272830607360',
699 'display_id': '1087791357756956680',
b7ef93f0 700 'ext': 'mp4',
6014355c 701 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
b7ef93f0 702 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 703 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
6014355c 704 'uploader': 'X',
705 'uploader_id': 'X',
b7ef93f0 706 'duration': 61.567,
18ca61c5
RA
707 'timestamp': 1548184644,
708 'upload_date': '20190122',
6014355c 709 'uploader_url': 'https://twitter.com/X',
b03fa783 710 'comment_count': int,
711 'repost_count': int,
13b2ae29 712 'like_count': int,
b03fa783 713 'view_count': int,
13b2ae29
SS
714 'tags': [],
715 'age_limit': 0,
18ca61c5 716 },
a006ce2b 717 'skip': 'This Tweet is unavailable',
18ca61c5
RA
718 }, {
719 # not available in Periscope
720 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
721 'info_dict': {
722 'id': '1vOGwqejwoWxB',
723 'ext': 'mp4',
724 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
725 'uploader': 'Vivi',
726 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
727 'thumbnail': r're:^https?://.*\.jpg',
728 'tags': ['EduTECH2019'],
729 'view_count': int,
b7ef93f0 730 },
18ca61c5 731 'add_ie': ['TwitterBroadcast'],
a006ce2b 732 'skip': 'Broadcast no longer exists',
30a074c2 733 }, {
734 # unified card
735 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
736 'info_dict': {
13b2ae29
SS
737 'id': '1349774757969989634',
738 'display_id': '1349794411333394432',
30a074c2 739 'ext': 'mp4',
740 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
741 'thumbnail': r're:^https?://.*\.jpg',
742 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
743 'uploader': 'Brooklyn Nets',
744 'uploader_id': 'BrooklynNets',
745 'duration': 324.484,
746 'timestamp': 1610651040,
747 'upload_date': '20210114',
13b2ae29 748 'uploader_url': 'https://twitter.com/BrooklynNets',
b03fa783 749 'comment_count': int,
750 'repost_count': int,
13b2ae29
SS
751 'like_count': int,
752 'tags': [],
753 'age_limit': 0,
1c54a98e 754 '_old_archive_ids': ['twitter 1349794411333394432'],
30a074c2 755 },
756 'params': {
757 'skip_download': True,
758 },
13b2ae29
SS
759 }, {
760 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
761 'info_dict': {
762 'id': '1577855447914409984',
763 'display_id': '1577855540407197696',
764 'ext': 'mp4',
352e7d98 765 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
766 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 767 'upload_date': '20221006',
352e7d98 768 'uploader': 'oshtru',
13b2ae29
SS
769 'uploader_id': 'oshtru',
770 'uploader_url': 'https://twitter.com/oshtru',
771 'thumbnail': r're:^https?://.*\.jpg',
772 'duration': 30.03,
7a26ce26 773 'timestamp': 1665025050,
b03fa783 774 'comment_count': int,
775 'repost_count': int,
13b2ae29
SS
776 'like_count': int,
777 'tags': [],
778 'age_limit': 0,
1c54a98e 779 '_old_archive_ids': ['twitter 1577855540407197696'],
13b2ae29
SS
780 },
781 'params': {'skip_download': True},
782 }, {
783 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
784 'info_dict': {
785 'id': '1577719286659006464',
1c54a98e 786 'title': 'Ultima - Test',
13b2ae29 787 'description': 'Test https://t.co/Y3KEZD7Dad',
1c54a98e 788 'uploader': 'Ultima',
13b2ae29
SS
789 'uploader_id': 'UltimaShadowX',
790 'uploader_url': 'https://twitter.com/UltimaShadowX',
791 'upload_date': '20221005',
7a26ce26 792 'timestamp': 1664992565,
b03fa783 793 'comment_count': int,
794 'repost_count': int,
13b2ae29
SS
795 'like_count': int,
796 'tags': [],
797 'age_limit': 0,
798 },
799 'playlist_count': 4,
800 'params': {'skip_download': True},
7a26ce26
SS
801 }, {
802 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
803 'info_dict': {
804 'id': '1575559336759263233',
805 'display_id': '1575560063510810624',
806 'ext': 'mp4',
807 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
808 'thumbnail': r're:^https?://.*\.jpg',
809 'description': 'md5:95aea692fda36a12081b9629b02daa92',
810 'uploader': 'Max Olson',
811 'uploader_id': 'MesoMax919',
812 'uploader_url': 'https://twitter.com/MesoMax919',
813 'duration': 21.321,
814 'timestamp': 1664477766,
815 'upload_date': '20220929',
b03fa783 816 'comment_count': int,
817 'repost_count': int,
7a26ce26
SS
818 'like_count': int,
819 'tags': ['HurricaneIan'],
820 'age_limit': 0,
1c54a98e 821 '_old_archive_ids': ['twitter 1575560063510810624'],
7a26ce26
SS
822 },
823 }, {
a006ce2b 824 # Adult content, fails if not logged in
7a26ce26
SS
825 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
826 'info_dict': {
827 'id': '1575199163847000068',
828 'display_id': '1575199173472927762',
829 'ext': 'mp4',
830 'title': str,
831 'description': str,
832 'uploader': str,
833 'uploader_id': 'Rizdraws',
834 'uploader_url': 'https://twitter.com/Rizdraws',
835 'upload_date': '20220928',
836 'timestamp': 1664391723,
16bed382 837 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
838 'like_count': int,
839 'repost_count': int,
840 'comment_count': int,
841 'age_limit': 18,
842 'tags': []
843 },
a006ce2b 844 'params': {'skip_download': 'The media could not be played'},
147e62fc 845 'skip': 'Requires authentication',
7a26ce26 846 }, {
a006ce2b 847 # Playlist result only with graphql API
7a26ce26
SS
848 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
849 'playlist_mincount': 2,
850 'info_dict': {
851 'id': '1395079556562706435',
852 'title': str,
853 'tags': [],
854 'uploader': str,
855 'like_count': int,
856 'upload_date': '20210519',
857 'age_limit': 0,
858 'repost_count': int,
147e62fc 859 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
7a26ce26
SS
860 'uploader_id': 'Srirachachau',
861 'comment_count': int,
862 'uploader_url': 'https://twitter.com/Srirachachau',
863 'timestamp': 1621447860,
864 },
865 }, {
7a26ce26
SS
866 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
867 'playlist_mincount': 2,
868 'info_dict': {
869 'id': '1578353380363501568',
870 'title': str,
871 'uploader_id': 'DavidToons_',
872 'repost_count': int,
873 'like_count': int,
874 'uploader': str,
875 'timestamp': 1665143744,
876 'uploader_url': 'https://twitter.com/DavidToons_',
147e62fc 877 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
7a26ce26
SS
878 'tags': [],
879 'comment_count': int,
880 'upload_date': '20221007',
881 'age_limit': 0,
882 },
883 }, {
884 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
885 'playlist_count': 2,
886 'info_dict': {
887 'id': '1578401165338976258',
888 'title': str,
889 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
890 'uploader': str,
891 'uploader_id': 'primevideouk',
892 'timestamp': 1665155137,
893 'upload_date': '20221007',
894 'age_limit': 0,
895 'uploader_url': 'https://twitter.com/primevideouk',
b03fa783 896 'comment_count': int,
897 'repost_count': int,
7a26ce26
SS
898 'like_count': int,
899 'tags': ['TheRingsOfPower'],
900 },
901 }, {
902 # Twitter Spaces
903 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
904 'info_dict': {
905 'id': '1lPJqmBeeNAJb',
906 'ext': 'm4a',
907 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
908 'uploader': r're:Monique Camarra.+?',
909 'uploader_id': 'MoniqueCamarra',
910 'live_status': 'was_live',
1c16d9df 911 'release_timestamp': 1658417414,
a006ce2b 912 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
1cffd621 913 'timestamp': 1658407771,
914 'release_date': '20220721',
915 'upload_date': '20220721',
7a26ce26
SS
916 },
917 'add_ie': ['TwitterSpaces'],
918 'params': {'skip_download': 'm3u8'},
92315c03 919 'skip': 'Requires authentication',
16bed382 920 }, {
921 # URL specifies video number but --yes-playlist
922 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
923 'playlist_mincount': 2,
924 'info_dict': {
925 'id': '1600649710662213632',
926 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
927 'timestamp': 1670459604.0,
928 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
b03fa783 929 'comment_count': int,
16bed382 930 'uploader_id': 'CTVJLaidlaw',
b03fa783 931 'repost_count': int,
16bed382 932 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
933 'upload_date': '20221208',
934 'age_limit': 0,
935 'uploader': 'Jocelyn Laidlaw',
936 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
937 'like_count': int,
938 },
939 }, {
940 # URL specifies video number and --no-playlist
941 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
942 'info_dict': {
943 'id': '1600649511827013632',
944 'ext': 'mp4',
147e62fc 945 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
16bed382 946 'thumbnail': r're:^https?://.+\.jpg',
947 'timestamp': 1670459604.0,
948 'uploader_id': 'CTVJLaidlaw',
949 'uploader': 'Jocelyn Laidlaw',
b03fa783 950 'repost_count': int,
951 'comment_count': int,
16bed382 952 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
953 'duration': 102.226,
954 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
955 'display_id': '1600649710662213632',
956 'like_count': int,
957 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
958 'upload_date': '20221208',
959 'age_limit': 0,
1c54a98e 960 '_old_archive_ids': ['twitter 1600649710662213632'],
16bed382 961 },
962 'params': {'noplaylist': True},
7543c9c9 963 }, {
964 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
965 # note the id different between extraction and url
966 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
967 'info_dict': {
968 'id': '1621117577354424321',
969 'display_id': '1621117700482416640',
970 'ext': 'mp4',
971 'title': '뽀 - 아 최우제 이동속도 봐',
972 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
973 'duration': 24.598,
974 'uploader': '뽀',
975 'uploader_id': 's2FAKER',
976 'uploader_url': 'https://twitter.com/s2FAKER',
977 'upload_date': '20230202',
978 'timestamp': 1675339553.0,
979 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
980 'age_limit': 18,
981 'tags': [],
982 'like_count': int,
b03fa783 983 'repost_count': int,
984 'comment_count': int,
1c54a98e 985 '_old_archive_ids': ['twitter 1621117700482416640'],
7543c9c9 986 },
b6795fd3
SS
987 }, {
988 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
989 'info_dict': {
990 'id': '1599108643743473680',
991 'display_id': '1599108751385972737',
992 'ext': 'mp4',
993 'title': '\u06ea - \U0001F48B',
994 'uploader_url': 'https://twitter.com/hlo_again',
995 'like_count': int,
996 'uploader_id': 'hlo_again',
997 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
b03fa783 998 'repost_count': int,
b6795fd3 999 'duration': 9.531,
b03fa783 1000 'comment_count': int,
b6795fd3
SS
1001 'upload_date': '20221203',
1002 'age_limit': 0,
1003 'timestamp': 1670092210.0,
1004 'tags': [],
1005 'uploader': '\u06ea',
1006 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1c54a98e 1007 '_old_archive_ids': ['twitter 1599108751385972737'],
b6795fd3
SS
1008 },
1009 'params': {'noplaylist': True},
1010 }, {
b6795fd3
SS
1011 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1012 'info_dict': {
1013 'id': '1600009362759733248',
1014 'display_id': '1600009574919962625',
1015 'ext': 'mp4',
1016 'uploader_url': 'https://twitter.com/MunTheShinobi',
1017 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
b6795fd3
SS
1018 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1019 'age_limit': 0,
a006ce2b 1020 'uploader': 'Mün',
b03fa783 1021 'repost_count': int,
b6795fd3 1022 'upload_date': '20221206',
a006ce2b 1023 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
b03fa783 1024 'comment_count': int,
b6795fd3
SS
1025 'like_count': int,
1026 'tags': [],
1027 'uploader_id': 'MunTheShinobi',
1028 'duration': 139.987,
1029 'timestamp': 1670306984.0,
1c54a98e 1030 '_old_archive_ids': ['twitter 1600009574919962625'],
b6795fd3 1031 },
cf605226 1032 }, {
a006ce2b 1033 # retweeted_status (private)
cf605226 1034 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1035 'info_dict': {
1036 'id': '1623274794488659969',
1037 'display_id': '1623739803874349067',
1038 'ext': 'mp4',
1039 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
92315c03 1040 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
cf605226 1041 'uploader': 'Johnny Bullets',
1042 'uploader_id': 'Johnnybull3ts',
1043 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1044 'age_limit': 0,
1045 'tags': [],
1046 'duration': 8.033,
1047 'timestamp': 1675853859.0,
1048 'upload_date': '20230208',
1049 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1050 'like_count': int,
b03fa783 1051 'repost_count': int,
cf605226 1052 },
6014355c 1053 'skip': 'Protected tweet',
92315c03 1054 }, {
a006ce2b 1055 # retweeted_status
1056 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
92315c03 1057 'info_dict': {
a006ce2b 1058 'id': '1694928337846538240',
92315c03 1059 'ext': 'mp4',
a006ce2b 1060 'display_id': '1695424220702888009',
1061 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1062 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1063 'uploader': 'Benny Johnson',
1064 'uploader_id': 'bennyjohnson',
1065 'uploader_url': 'https://twitter.com/bennyjohnson',
92315c03 1066 'age_limit': 0,
1067 'tags': [],
a006ce2b 1068 'duration': 45.001,
1069 'timestamp': 1692962814.0,
1070 'upload_date': '20230825',
1071 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
92315c03 1072 'like_count': int,
92315c03 1073 'repost_count': int,
1074 'comment_count': int,
1c54a98e 1075 '_old_archive_ids': ['twitter 1695424220702888009'],
92315c03 1076 },
a006ce2b 1077 }, {
1078 # retweeted_status w/ legacy API
1079 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1080 'info_dict': {
1081 'id': '1694928337846538240',
1082 'ext': 'mp4',
1083 'display_id': '1695424220702888009',
1084 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1085 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1086 'uploader': 'Benny Johnson',
1087 'uploader_id': 'bennyjohnson',
1088 'uploader_url': 'https://twitter.com/bennyjohnson',
1089 'age_limit': 0,
1090 'tags': [],
1091 'duration': 45.001,
1092 'timestamp': 1692962814.0,
1093 'upload_date': '20230825',
1094 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1095 'like_count': int,
1096 'repost_count': int,
1c54a98e 1097 '_old_archive_ids': ['twitter 1695424220702888009'],
a006ce2b 1098 },
1099 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1100 }, {
1101 # Broadcast embedded in tweet
1c54a98e 1102 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
a006ce2b 1103 'info_dict': {
1c54a98e 1104 'id': '1rmxPMjLzAXKN',
a006ce2b 1105 'ext': 'mp4',
1c54a98e 1106 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
a006ce2b 1107 'uploader': 'Jessica Dobson',
1c54a98e 1108 'uploader_id': 'JessicaDobsonWX',
1109 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1110 'timestamp': 1701566398,
1111 'upload_date': '20231203',
1112 'live_status': 'was_live',
1113 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1114 'concurrent_view_count': int,
a006ce2b 1115 'view_count': int,
1116 },
1117 'add_ie': ['TwitterBroadcast'],
1118 }, {
1119 # Animated gif and quote tweet video, with syndication API
1120 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1121 'playlist_mincount': 2,
1122 'info_dict': {
1123 'id': '1696256659889565950',
1124 'title': 'BAKOON - https://t.co/zom968d0a0',
1125 'description': 'https://t.co/zom968d0a0',
1126 'tags': [],
1127 'uploader': 'BAKOON',
1128 'uploader_id': 'BAKKOOONN',
1129 'uploader_url': 'https://twitter.com/BAKKOOONN',
1130 'age_limit': 18,
1131 'timestamp': 1693254077.0,
1132 'upload_date': '20230828',
1133 'like_count': int,
1134 },
1135 'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
1136 'expected_warnings': ['Not all metadata'],
1c54a98e 1137 }, {
1138 # "stale tweet" with typename "TweetWithVisibilityResults"
1139 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1140 'md5': '62b1e11cdc2cdd0e527f83adb081f536',
1141 'info_dict': {
1142 'id': '1724883339285544960',
1143 'ext': 'mp4',
1144 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1145 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1146 'display_id': '1724884212803834154',
1147 'uploader': 'Robert F. Kennedy Jr',
1148 'uploader_id': 'RobertKennedyJr',
1149 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1150 'upload_date': '20231115',
1151 'timestamp': 1700079417.0,
1152 'duration': 341.048,
1153 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1154 'tags': ['Kennedy24'],
1155 'repost_count': int,
1156 'like_count': int,
1157 'comment_count': int,
1158 'age_limit': 0,
1159 '_old_archive_ids': ['twitter 1724884212803834154'],
1160 },
82fb2357 1161 }, {
1162 # onion route
1163 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1164 'only_matching': True,
18ca61c5
RA
1165 }, {
1166 # Twitch Clip Embed
1167 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1168 'only_matching': True,
10a5091e
RA
1169 }, {
1170 # promo_video_website card
1171 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1172 'only_matching': True,
00dd0cd5 1173 }, {
1174 # promo_video_convo card
1175 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1176 'only_matching': True,
1177 }, {
1178 # appplayer card
1179 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1180 'only_matching': True,
30a074c2 1181 }, {
1182 # video_direct_message card
1183 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1184 'only_matching': True,
1185 }, {
1186 # poll2choice_video card
1187 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1188 'only_matching': True,
1189 }, {
1190 # poll3choice_video card
1191 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1192 'only_matching': True,
1193 }, {
1194 # poll4choice_video card
1195 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1196 'only_matching': True,
cf5881fc 1197 }]
f57f84f6 1198
a006ce2b 1199 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1200
1201 @property
1202 def _GRAPHQL_ENDPOINT(self):
1203 if self.is_logged_in:
1204 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1205 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1206
7a26ce26
SS
1207 def _graphql_to_legacy(self, data, twid):
1208 result = traverse_obj(data, (
1209 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1210 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
92315c03 1211 'tweet_results', 'result', ('tweet', None), {dict},
1212 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1213 data, ('tweetResult', 'result', {dict}), default={})
7a26ce26 1214
1c54a98e 1215 typename = result.get('__typename')
1216 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1217 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
7543c9c9 1218
7a26ce26 1219 if 'tombstone' in result:
147e62fc 1220 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
7a26ce26 1221 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1c54a98e 1222 elif typename == 'TweetUnavailable':
92315c03 1223 reason = result.get('reason')
1224 if reason == 'NsfwLoggedOut':
1225 self.raise_login_required('NSFW tweet requires authentication')
6014355c 1226 elif reason == 'Protected':
1227 self.raise_login_required('You are not authorized to view this protected tweet')
92315c03 1228 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1c54a98e 1229 # Result for "stale tweet" needs additional transformation
1230 elif typename == 'TweetWithVisibilityResults':
1231 result = traverse_obj(result, ('tweet', {dict})) or {}
7a26ce26
SS
1232
1233 status = result.get('legacy', {})
1234 status.update(traverse_obj(result, {
1235 'user': ('core', 'user_results', 'result', 'legacy'),
1236 'card': ('card', 'legacy'),
1237 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
a006ce2b 1238 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
7a26ce26
SS
1239 }, expected_type=dict, default={}))
1240
a006ce2b 1241 # extra transformations needed since result does not match legacy format
1242 if status.get('retweeted_status'):
1243 status['retweeted_status']['user'] = traverse_obj(status, (
1244 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1245
7a26ce26
SS
1246 binding_values = {
1247 binding_value.get('key'): binding_value.get('value')
147e62fc 1248 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
7a26ce26
SS
1249 }
1250 if binding_values:
1251 status['card']['binding_values'] = binding_values
1252
1253 return status
1254
1255 def _build_graphql_query(self, media_id):
1256 return {
1257 'variables': {
1258 'focalTweetId': media_id,
1259 'includePromotedContent': True,
1260 'with_rux_injections': False,
1261 'withBirdwatchNotes': True,
1262 'withCommunity': True,
1263 'withDownvotePerspective': False,
1264 'withQuickPromoteEligibilityTweetFields': True,
1265 'withReactionsMetadata': False,
1266 'withReactionsPerspective': False,
1267 'withSuperFollowsTweetFields': True,
1268 'withSuperFollowsUserFields': True,
1269 'withV2Timeline': True,
1270 'withVoice': True,
1271 },
1272 'features': {
1273 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1274 'interactive_text_enabled': True,
1275 'responsive_web_edit_tweet_api_enabled': True,
1276 'responsive_web_enhance_cards_enabled': True,
1277 'responsive_web_graphql_timeline_navigation_enabled': False,
1278 'responsive_web_text_conversations_enabled': False,
1279 'responsive_web_uc_gql_enabled': True,
1280 'standardized_nudges_misinfo': True,
1281 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1282 'tweetypie_unmention_optimization_enabled': True,
1283 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1284 'verified_phone_label_enabled': False,
1285 'vibe_api_enabled': True,
1286 },
92315c03 1287 } if self.is_logged_in else {
1288 'variables': {
1289 'tweetId': media_id,
1290 'withCommunity': False,
1291 'includePromotedContent': False,
1292 'withVoice': False,
1293 },
1294 'features': {
1295 'creator_subscriptions_tweet_preview_api_enabled': True,
1296 'tweetypie_unmention_optimization_enabled': True,
1297 'responsive_web_edit_tweet_api_enabled': True,
1298 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1299 'view_counts_everywhere_api_enabled': True,
1300 'longform_notetweets_consumption_enabled': True,
1301 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1302 'tweet_awards_web_tipping_enabled': False,
1303 'freedom_of_speech_not_reach_fetch_enabled': True,
1304 'standardized_nudges_misinfo': True,
1305 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1306 'longform_notetweets_rich_text_read_enabled': True,
1307 'longform_notetweets_inline_media_enabled': True,
1308 'responsive_web_graphql_exclude_directive_enabled': True,
1309 'verified_phone_label_enabled': False,
1310 'responsive_web_media_download_video_enabled': False,
1311 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1312 'responsive_web_graphql_timeline_navigation_enabled': True,
1313 'responsive_web_enhance_cards_enabled': False
1314 },
1315 'fieldToggles': {
1316 'withArticleRichContentState': False
1317 }
7a26ce26
SS
1318 }
1319
6014355c 1320 def _extract_status(self, twid):
a006ce2b 1321 if self.is_logged_in or self._selected_api == 'graphql':
1322 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1323
1324 elif self._selected_api == 'legacy':
1325 status = self._call_api(f'statuses/show/{twid}.json', twid, {
b03fa783 1326 'cards_platform': 'Web-12',
1327 'include_cards': 1,
1328 'include_reply_count': 1,
1329 'include_user_entities': 0,
1330 'tweet_mode': 'extended',
a006ce2b 1331 })
6014355c 1332
a006ce2b 1333 elif self._selected_api == 'syndication':
6014355c 1334 self.report_warning(
a006ce2b 1335 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1336 status = self._download_json(
1337 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1338 headers={'User-Agent': 'Googlebot'}, query={
1339 'id': twid,
1340 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1341 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1342 })
1343 if not status:
1344 raise ExtractorError('Syndication endpoint returned empty JSON response')
1345 # Transform the result so its structure matches that of legacy/graphql
1346 media = []
1347 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1348 detail['id_str'] = traverse_obj(detail, (
1349 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1350 media.append(detail)
1351 status['extended_entities'] = {'media': media}
6014355c 1352
a006ce2b 1353 else:
1354 raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
1355
1356 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
6014355c 1357
1358 def _real_extract(self, url):
1359 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1360 status = self._extract_status(twid)
575036b4 1361
92315c03 1362 title = description = traverse_obj(
1363 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
575036b4 1364 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 1365 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
1366 user = status.get('user') or {}
1367 uploader = user.get('name')
1368 if uploader:
7a26ce26 1369 title = f'{uploader} - {title}'
18ca61c5
RA
1370 uploader_id = user.get('screen_name')
1371
cf5881fc 1372 info = {
18ca61c5
RA
1373 'id': twid,
1374 'title': title,
1375 'description': description,
1376 'uploader': uploader,
1377 'timestamp': unified_timestamp(status.get('created_at')),
1378 'uploader_id': uploader_id,
a70635b8 1379 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
1380 'like_count': int_or_none(status.get('favorite_count')),
1381 'repost_count': int_or_none(status.get('retweet_count')),
1382 'comment_count': int_or_none(status.get('reply_count')),
1383 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 1384 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 1385 }
cf5881fc 1386
30a074c2 1387 def extract_from_video_info(media):
a006ce2b 1388 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
13b2ae29 1389 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
1390
1391 formats = []
4bed4363 1392 subtitles = {}
92315c03 1393 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
4bed4363
F
1394 fmts, subs = self._extract_variant_formats(variant, twid)
1395 subtitles = self._merge_subtitles(subtitles, subs)
1396 formats.extend(fmts)
18ca61c5
RA
1397
1398 thumbnails = []
1399 media_url = media.get('media_url_https') or media.get('media_url')
1400 if media_url:
1401 def add_thumbnail(name, size):
1402 thumbnails.append({
1403 'id': name,
1404 'url': update_url_query(media_url, {'name': name}),
1405 'width': int_or_none(size.get('w') or size.get('width')),
1406 'height': int_or_none(size.get('h') or size.get('height')),
1407 })
1408 for name, size in media.get('sizes', {}).items():
1409 add_thumbnail(name, size)
1410 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1411
13b2ae29 1412 return {
b03fa783 1413 'id': media_id,
18ca61c5 1414 'formats': formats,
4bed4363 1415 'subtitles': subtitles,
18ca61c5 1416 'thumbnails': thumbnails,
1c54a98e 1417 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
92315c03 1418 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
e7d22348 1419 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1420 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
13b2ae29 1421 }
30a074c2 1422
13b2ae29
SS
1423 def extract_from_card_info(card):
1424 if not card:
1425 return
1426
1427 self.write_debug(f'Extracting from card info: {card.get("url")}')
1428 binding_values = card['binding_values']
1429
1430 def get_binding_value(k):
1431 o = binding_values.get(k) or {}
1432 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1433
1434 card_name = card['name'].split(':')[-1]
1435 if card_name == 'player':
7a26ce26 1436 yield {
13b2ae29
SS
1437 '_type': 'url',
1438 'url': get_binding_value('player_url'),
1439 }
1440 elif card_name == 'periscope_broadcast':
7a26ce26 1441 yield {
13b2ae29
SS
1442 '_type': 'url',
1443 'url': get_binding_value('url') or get_binding_value('player_url'),
1444 'ie_key': PeriscopeIE.ie_key(),
1445 }
1446 elif card_name == 'broadcast':
7a26ce26 1447 yield {
13b2ae29
SS
1448 '_type': 'url',
1449 'url': get_binding_value('broadcast_url'),
1450 'ie_key': TwitterBroadcastIE.ie_key(),
1451 }
7a26ce26
SS
1452 elif card_name == 'audiospace':
1453 yield {
1454 '_type': 'url',
1455 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1456 'ie_key': TwitterSpacesIE.ie_key(),
1457 }
13b2ae29 1458 elif card_name == 'summary':
7a26ce26 1459 yield {
18ca61c5 1460 '_type': 'url',
13b2ae29
SS
1461 'url': get_binding_value('card_url'),
1462 }
1463 elif card_name == 'unified_card':
7a26ce26
SS
1464 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1465 yield from map(extract_from_video_info, traverse_obj(
1466 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1467 # amplify, promo_video_website, promo_video_convo, appplayer,
1468 # video_direct_message, poll2choice_video, poll3choice_video,
1469 # poll4choice_video, ...
1470 else:
1471 is_amplify = card_name == 'amplify'
1472 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1473 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1474 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1475
1476 thumbnails = []
1477 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1478 image = get_binding_value('player_image' + suffix) or {}
1479 image_url = image.get('url')
1480 if not image_url or '/player-placeholder' in image_url:
1481 continue
1482 thumbnails.append({
1483 'id': suffix[1:] if suffix else 'medium',
1484 'url': image_url,
1485 'width': int_or_none(image.get('width')),
1486 'height': int_or_none(image.get('height')),
1487 })
1488
7a26ce26 1489 yield {
13b2ae29
SS
1490 'formats': formats,
1491 'subtitles': subtitles,
1492 'thumbnails': thumbnails,
1493 'duration': int_or_none(get_binding_value(
1494 'content_duration_seconds')),
1495 }
1496
b6795fd3 1497 videos = traverse_obj(status, (
b03fa783 1498 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1499
b6795fd3
SS
1500 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1501 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1502 else:
92315c03 1503 desired_obj = traverse_obj(status, (
1504 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
b6795fd3
SS
1505 if not desired_obj:
1506 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1507 elif desired_obj.get('type') != 'video':
1508 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1509
1510 # Restore original archive id and video index in title
1511 for index, entry in enumerate(videos, 1):
1512 if entry.get('id') != desired_obj.get('id'):
1513 continue
1514 if index == 1:
1515 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1516 if len(videos) != 1:
1517 info['title'] += f' #{index}'
1518 break
1519
1520 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1521
1522 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1523 if not entries:
1524 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1525 if not expanded_url or expanded_url == url:
147e62fc 1526 self.raise_no_formats('No video could be found in this tweet', expected=True)
1527 return info
13b2ae29
SS
1528
1529 return self.url_result(expanded_url, display_id=twid, **info)
1530
1531 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1532
1533 if len(entries) == 1:
1534 return entries[0]
1535
1536 for index, entry in enumerate(entries, 1):
1537 entry['title'] += f' #{index}'
1538
1539 return self.playlist_result(entries, **info)
445d72b8
YCH
1540
1541
1542class TwitterAmplifyIE(TwitterBaseIE):
1543 IE_NAME = 'twitter:amplify'
25042f73 1544 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1545
1546 _TEST = {
1547 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1548 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1549 'info_dict': {
1550 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1551 'ext': 'mp4',
1552 'title': 'Twitter Video',
bdbf4ba4 1553 'thumbnail': 're:^https?://.*',
445d72b8 1554 },
7a26ce26 1555 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1556 }
1557
1558 def _real_extract(self, url):
1559 video_id = self._match_id(url)
1560 webpage = self._download_webpage(url, video_id)
1561
1562 vmap_url = self._html_search_meta(
1563 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1564 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1565
bdbf4ba4
YCH
1566 thumbnails = []
1567 thumbnail = self._html_search_meta(
1568 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1569
1570 def _find_dimension(target):
1571 w = int_or_none(self._html_search_meta(
1572 'twitter:%s:width' % target, webpage, fatal=False))
1573 h = int_or_none(self._html_search_meta(
1574 'twitter:%s:height' % target, webpage, fatal=False))
1575 return w, h
1576
1577 if thumbnail:
1578 thumbnail_w, thumbnail_h = _find_dimension('image')
1579 thumbnails.append({
1580 'url': thumbnail,
1581 'width': thumbnail_w,
1582 'height': thumbnail_h,
1583 })
1584
1585 video_w, video_h = _find_dimension('player')
9be31e77 1586 formats[0].update({
bdbf4ba4
YCH
1587 'width': video_w,
1588 'height': video_h,
9be31e77 1589 })
bdbf4ba4 1590
445d72b8
YCH
1591 return {
1592 'id': video_id,
1593 'title': 'Twitter Video',
bdbf4ba4
YCH
1594 'formats': formats,
1595 'thumbnails': thumbnails,
445d72b8 1596 }
18ca61c5
RA
1597
1598
1599class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1600 IE_NAME = 'twitter:broadcast'
1601 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1602
7d337ca9 1603 _TESTS = [{
7b0b53ea
S
1604 # untitled Periscope video
1605 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1606 'info_dict': {
1607 'id': '1yNGaQLWpejGj',
1608 'ext': 'mp4',
1609 'title': 'Andrea May Sahouri - Periscope Broadcast',
1610 'uploader': 'Andrea May Sahouri',
7d337ca9
H
1611 'uploader_id': 'andreamsahouri',
1612 'uploader_url': 'https://twitter.com/andreamsahouri',
1613 'timestamp': 1590973638,
1614 'upload_date': '20200601',
7a26ce26
SS
1615 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1616 'view_count': int,
7b0b53ea 1617 },
7d337ca9
H
1618 }, {
1619 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1620 'info_dict': {
1621 'id': '1ZkKzeyrPbaxv',
1622 'ext': 'mp4',
1623 'title': 'Starship | SN10 | High-Altitude Flight Test',
1624 'uploader': 'SpaceX',
1625 'uploader_id': 'SpaceX',
1626 'uploader_url': 'https://twitter.com/SpaceX',
1627 'timestamp': 1614812942,
1628 'upload_date': '20210303',
1629 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1630 'view_count': int,
1631 },
1632 }, {
1633 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1634 'info_dict': {
1635 'id': '1OyKAVQrgzwGb',
1636 'ext': 'mp4',
1637 'title': 'Starship Flight Test',
1638 'uploader': 'SpaceX',
1639 'uploader_id': 'SpaceX',
1640 'uploader_url': 'https://twitter.com/SpaceX',
1641 'timestamp': 1681993964,
1642 'upload_date': '20230420',
1643 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1644 'view_count': int,
1645 },
1646 }]
7b0b53ea 1647
18ca61c5
RA
1648 def _real_extract(self, url):
1649 broadcast_id = self._match_id(url)
1650 broadcast = self._call_api(
1651 'broadcasts/show.json', broadcast_id,
1652 {'ids': broadcast_id})['broadcasts'][broadcast_id]
a006ce2b 1653 if not broadcast:
1654 raise ExtractorError('Broadcast no longer exists', expected=True)
18ca61c5 1655 info = self._parse_broadcast_data(broadcast, broadcast_id)
7d337ca9
H
1656 info['title'] = broadcast.get('status') or info.get('title')
1657 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1658 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
f6e97090 1659 if info['live_status'] == 'is_upcoming':
1660 return info
1661
18ca61c5
RA
1662 media_key = broadcast['media_key']
1663 source = self._call_api(
7a26ce26 1664 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1665 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1666 if '/live_video_stream/geoblocked/' in m3u8_url:
1667 self.raise_geo_restricted()
1668 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1669 m3u8_url).query).get('type', [None])[0]
1670 state, width, height = self._extract_common_format_info(broadcast)
1671 info['formats'] = self._extract_pscp_m3u8_formats(
1672 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1673 return info
86b868c6
U
1674
1675
7a26ce26
SS
1676class TwitterSpacesIE(TwitterBaseIE):
1677 IE_NAME = 'twitter:spaces'
1678 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1679
1680 _TESTS = [{
1681 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1682 'info_dict': {
1683 'id': '1RDxlgyvNXzJL',
1684 'ext': 'm4a',
1685 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1686 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1687 'uploader': r're:Lucio Di Gaetano.*?',
1688 'uploader_id': 'luciodigaetano',
1689 'live_status': 'was_live',
1cffd621 1690 'timestamp': 1659877956,
1691 'upload_date': '20220807',
1692 'release_timestamp': 1659904215,
1693 'release_date': '20220807',
7a26ce26
SS
1694 },
1695 'params': {'skip_download': 'm3u8'},
613dbce1 1696 }, {
1697 # post_live/TimedOut but downloadable
1698 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1699 'info_dict': {
1700 'id': '1vAxRAVQWONJl',
1701 'ext': 'm4a',
1702 'title': 'Framing Up FinOps: Billing Tools',
1703 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1704 'uploader': 'Google Cloud',
1705 'uploader_id': 'googlecloud',
1706 'live_status': 'post_live',
1707 'timestamp': 1681409554,
1708 'upload_date': '20230413',
1709 'release_timestamp': 1681839000,
1710 'release_date': '20230418',
1711 },
1712 'params': {'skip_download': 'm3u8'},
1713 }, {
1714 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1715 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1716 'info_dict': {
1717 'id': '1eaKbrQbjoRKX',
1718 'ext': 'm4a',
1719 'title': 'あ',
1720 'description': 'Twitter Space participated by nobody yet',
1721 'uploader': '息根とめる🔪Twitchで復活',
1722 'uploader_id': 'tomeru_ikinone',
1723 'live_status': 'was_live',
1724 'timestamp': 1685617198,
1725 'upload_date': '20230601',
1726 },
1727 'params': {'skip_download': 'm3u8'},
7a26ce26
SS
1728 }]
1729
1730 SPACE_STATUS = {
1731 'notstarted': 'is_upcoming',
1732 'ended': 'was_live',
1733 'running': 'is_live',
1734 'timedout': 'post_live',
1735 }
1736
1737 def _build_graphql_query(self, space_id):
1738 return {
1739 'variables': {
1740 'id': space_id,
1741 'isMetatagsQuery': True,
1742 'withDownvotePerspective': False,
1743 'withReactionsMetadata': False,
1744 'withReactionsPerspective': False,
1745 'withReplays': True,
1746 'withSuperFollowsUserFields': True,
1747 'withSuperFollowsTweetFields': True,
1748 },
1749 'features': {
1750 'dont_mention_me_view_api_enabled': True,
1751 'interactive_text_enabled': True,
1752 'responsive_web_edit_tweet_api_enabled': True,
1753 'responsive_web_enhance_cards_enabled': True,
1754 'responsive_web_uc_gql_enabled': True,
1755 'spaces_2022_h2_clipping': True,
1756 'spaces_2022_h2_spaces_communities': False,
1757 'standardized_nudges_misinfo': True,
1758 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1759 'vibe_api_enabled': True,
1760 },
1761 }
1762
1763 def _real_extract(self, url):
1764 space_id = self._match_id(url)
92315c03 1765 if not self.is_logged_in:
1766 self.raise_login_required('Twitter Spaces require authentication')
7a26ce26
SS
1767 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1768 if not space_data:
1769 raise ExtractorError('Twitter Space not found', expected=True)
1770
1771 metadata = space_data['metadata']
1772 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1cffd621 1773 is_live = live_status == 'is_live'
7a26ce26
SS
1774
1775 formats = []
c6ef5537 1776 headers = {'Referer': 'https://twitter.com/'}
7a26ce26
SS
1777 if live_status == 'is_upcoming':
1778 self.raise_no_formats('Twitter Space not started yet', expected=True)
1cffd621 1779 elif not is_live and not metadata.get('is_space_available_for_replay'):
1780 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1781 elif metadata.get('media_key'):
1782 source = traverse_obj(
1783 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1784 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
613dbce1 1785 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1786 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
c6ef5537 1787 headers=headers, fatal=False) if source else []
7a26ce26
SS
1788 for fmt in formats:
1789 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1cffd621 1790 if not is_live:
1791 fmt['container'] = 'm4a_dash'
7a26ce26
SS
1792
1793 participants = ', '.join(traverse_obj(
1794 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1cffd621 1795
1796 if not formats and live_status == 'post_live':
1797 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1798
7a26ce26
SS
1799 return {
1800 'id': space_id,
1801 'title': metadata.get('title'),
1802 'description': f'Twitter Space participated by {participants}',
1803 'uploader': traverse_obj(
1804 metadata, ('creator_results', 'result', 'legacy', 'name')),
1805 'uploader_id': traverse_obj(
1806 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1807 'live_status': live_status,
1c16d9df
C
1808 'release_timestamp': try_call(
1809 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1cffd621 1810 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
7a26ce26 1811 'formats': formats,
c6ef5537 1812 'http_headers': headers,
7a26ce26
SS
1813 }
1814
1815
86b868c6
U
1816class TwitterShortenerIE(TwitterBaseIE):
1817 IE_NAME = 'twitter:shortener'
b634ba74 1818 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
a537ab1a 1819 _BASE_URL = 'https://t.co/'
86b868c6
U
1820
1821 def _real_extract(self, url):
5ad28e7f 1822 mobj = self._match_valid_url(url)
a537ab1a
U
1823 eid, id = mobj.group('eid', 'id')
1824 if eid:
1825 id = eid
1826 url = self._BASE_URL + id
3d2623a8 1827 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
a537ab1a
U
1828 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1829 if new_url.startswith(__UNSAFE_LINK):
1830 new_url = new_url.replace(__UNSAFE_LINK, "")
9e20a9c4 1831 return self.url_result(new_url)