]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import functools
2 import json
3 import random
4 import re
5 import urllib.parse
6
7 from .common import InfoExtractor
8 from .periscope import PeriscopeBaseIE, PeriscopeIE
9 from ..networking.exceptions import HTTPError
10 from ..utils import (
11 ExtractorError,
12 dict_get,
13 filter_dict,
14 float_or_none,
15 format_field,
16 int_or_none,
17 make_archive_id,
18 remove_end,
19 str_or_none,
20 strip_or_none,
21 traverse_obj,
22 try_call,
23 try_get,
24 unified_timestamp,
25 update_url_query,
26 url_or_none,
27 xpath_text,
28 )
29
30
31 class TwitterBaseIE(InfoExtractor):
32 _NETRC_MACHINE = 'twitter'
33 _API_BASE = 'https://api.x.com/1.1/'
34 _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
35 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
36 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
37 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
38 _flow_token = None
39
40 _LOGIN_INIT_DATA = json.dumps({
41 'input_flow_data': {
42 'flow_context': {
43 'debug_overrides': {},
44 'start_location': {
45 'location': 'unknown',
46 },
47 },
48 },
49 'subtask_versions': {
50 'action_list': 2,
51 'alert_dialog': 1,
52 'app_download_cta': 1,
53 'check_logged_in_account': 1,
54 'choice_selection': 3,
55 'contacts_live_sync_permission_prompt': 0,
56 'cta': 7,
57 'email_verification': 2,
58 'end_flow': 1,
59 'enter_date': 1,
60 'enter_email': 2,
61 'enter_password': 5,
62 'enter_phone': 2,
63 'enter_recaptcha': 1,
64 'enter_text': 5,
65 'enter_username': 2,
66 'generic_urt': 3,
67 'in_app_notification': 1,
68 'interest_picker': 3,
69 'js_instrumentation': 1,
70 'menu_dialog': 1,
71 'notifications_permission_prompt': 2,
72 'open_account': 2,
73 'open_home_timeline': 1,
74 'open_link': 1,
75 'phone_verification': 4,
76 'privacy_options': 1,
77 'security_key': 3,
78 'select_avatar': 4,
79 'select_banner': 2,
80 'settings_list': 7,
81 'show_code': 1,
82 'sign_up': 2,
83 'sign_up_review': 4,
84 'tweet_selection_urt': 1,
85 'update_users': 1,
86 'upload_media': 1,
87 'user_recommendations_list': 4,
88 'user_recommendations_urt': 1,
89 'wait_spinner': 3,
90 'web_modal': 1,
91 },
92 }, separators=(',', ':')).encode()
93
94 def _extract_variant_formats(self, variant, video_id):
95 variant_url = variant.get('url')
96 if not variant_url:
97 return [], {}
98 elif '.m3u8' in variant_url:
99 fmts, subs = self._extract_m3u8_formats_and_subtitles(
100 variant_url, video_id, 'mp4', 'm3u8_native',
101 m3u8_id='hls', fatal=False)
102 for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
103 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
104 f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
105 return fmts, subs
106 else:
107 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
108 f = {
109 'url': variant_url,
110 'format_id': 'http' + (f'-{tbr}' if tbr else ''),
111 'tbr': tbr,
112 }
113 self._search_dimensions_in_video_url(f, variant_url)
114 return [f], {}
115
116 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
117 vmap_url = url_or_none(vmap_url)
118 if not vmap_url:
119 return [], {}
120 vmap_data = self._download_xml(vmap_url, video_id)
121 formats = []
122 subtitles = {}
123 urls = []
124 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
125 video_variant.attrib['url'] = urllib.parse.unquote(
126 video_variant.attrib['url'])
127 urls.append(video_variant.attrib['url'])
128 fmts, subs = self._extract_variant_formats(
129 video_variant.attrib, video_id)
130 formats.extend(fmts)
131 subtitles = self._merge_subtitles(subtitles, subs)
132 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
133 if video_url not in urls:
134 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
135 formats.extend(fmts)
136 subtitles = self._merge_subtitles(subtitles, subs)
137 return formats, subtitles
138
139 @staticmethod
140 def _search_dimensions_in_video_url(a_format, video_url):
141 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
142 if m:
143 a_format.update({
144 'width': int(m.group('width')),
145 'height': int(m.group('height')),
146 })
147
148 @property
149 def is_logged_in(self):
150 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
151
152 # XXX: Temporary workaround until twitter.com => x.com migration is completed
153 def _real_initialize(self):
154 if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
155 return
156 # User has not yet been migrated to x.com and has passed twitter.com cookies
157 TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
158 TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
159
160 @functools.cached_property
161 def _selected_api(self):
162 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
163
164 def _fetch_guest_token(self, display_id):
165 guest_token = traverse_obj(self._download_json(
166 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
167 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
168 ('guest_token', {str}))
169 if not guest_token:
170 raise ExtractorError('Could not retrieve guest token')
171 return guest_token
172
173 def _set_base_headers(self, legacy=False):
174 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
175 return filter_dict({
176 'Authorization': f'Bearer {bearer_token}',
177 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
178 })
179
180 def _call_login_api(self, note, headers, query={}, data=None):
181 response = self._download_json(
182 f'{self._API_BASE}onboarding/task.json', None, note,
183 headers=headers, query=query, data=data, expected_status=400)
184 error = traverse_obj(response, ('errors', 0, 'message', {str}))
185 if error:
186 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
187 elif traverse_obj(response, 'status') != 'success':
188 raise ExtractorError('Login was unsuccessful')
189
190 subtask = traverse_obj(
191 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
192 if not subtask:
193 raise ExtractorError('Twitter API did not return next login subtask')
194
195 self._flow_token = response['flow_token']
196
197 return subtask
198
199 def _perform_login(self, username, password):
200 if self.is_logged_in:
201 return
202
203 guest_token = self._fetch_guest_token(None)
204 headers = {
205 **self._set_base_headers(),
206 'content-type': 'application/json',
207 'x-guest-token': guest_token,
208 'x-twitter-client-language': 'en',
209 'x-twitter-active-user': 'yes',
210 'Referer': 'https://x.com/',
211 'Origin': 'https://x.com',
212 }
213
214 def build_login_json(*subtask_inputs):
215 return json.dumps({
216 'flow_token': self._flow_token,
217 'subtask_inputs': subtask_inputs,
218 }, separators=(',', ':')).encode()
219
220 def input_dict(subtask_id, text):
221 return {
222 'subtask_id': subtask_id,
223 'enter_text': {
224 'text': text,
225 'link': 'next_link',
226 },
227 }
228
229 next_subtask = self._call_login_api(
230 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
231
232 while not self.is_logged_in:
233 if next_subtask == 'LoginJsInstrumentationSubtask':
234 next_subtask = self._call_login_api(
235 'Submitting JS instrumentation response', headers, data=build_login_json({
236 'subtask_id': next_subtask,
237 'js_instrumentation': {
238 'response': '{}',
239 'link': 'next_link',
240 },
241 }))
242
243 elif next_subtask == 'LoginEnterUserIdentifierSSO':
244 next_subtask = self._call_login_api(
245 'Submitting username', headers, data=build_login_json({
246 'subtask_id': next_subtask,
247 'settings_list': {
248 'setting_responses': [{
249 'key': 'user_identifier',
250 'response_data': {
251 'text_data': {
252 'result': username,
253 },
254 },
255 }],
256 'link': 'next_link',
257 },
258 }))
259
260 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
261 next_subtask = self._call_login_api(
262 'Submitting alternate identifier', headers,
263 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
264 'one of username, phone number or email that was not used as --username'))))
265
266 elif next_subtask == 'LoginEnterPassword':
267 next_subtask = self._call_login_api(
268 'Submitting password', headers, data=build_login_json({
269 'subtask_id': next_subtask,
270 'enter_password': {
271 'password': password,
272 'link': 'next_link',
273 },
274 }))
275
276 elif next_subtask == 'AccountDuplicationCheck':
277 next_subtask = self._call_login_api(
278 'Submitting account duplication check', headers, data=build_login_json({
279 'subtask_id': next_subtask,
280 'check_logged_in_account': {
281 'link': 'AccountDuplicationCheck_false',
282 },
283 }))
284
285 elif next_subtask == 'LoginTwoFactorAuthChallenge':
286 next_subtask = self._call_login_api(
287 'Submitting 2FA token', headers, data=build_login_json(input_dict(
288 next_subtask, self._get_tfa_info('two-factor authentication token'))))
289
290 elif next_subtask == 'LoginAcid':
291 next_subtask = self._call_login_api(
292 'Submitting confirmation code', headers, data=build_login_json(input_dict(
293 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
294
295 elif next_subtask == 'ArkoseLogin':
296 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
297
298 elif next_subtask == 'DenyLoginSubtask':
299 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
300
301 elif next_subtask == 'LoginSuccessSubtask':
302 raise ExtractorError('Twitter API did not grant auth token cookie')
303
304 else:
305 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
306
307 self.report_login()
308
309 def _call_api(self, path, video_id, query={}, graphql=False):
310 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
311 headers.update({
312 'x-twitter-auth-type': 'OAuth2Session',
313 'x-twitter-client-language': 'en',
314 'x-twitter-active-user': 'yes',
315 } if self.is_logged_in else {
316 'x-guest-token': self._fetch_guest_token(video_id),
317 })
318 allowed_status = {400, 401, 403, 404} if graphql else {403}
319 result = self._download_json(
320 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
321 video_id, headers=headers, query=query, expected_status=allowed_status,
322 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
323
324 if result.get('errors'):
325 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
326 if errors and 'not authorized' in errors:
327 self.raise_login_required(remove_end(errors, '.'))
328 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
329
330 return result
331
332 def _build_graphql_query(self, media_id):
333 raise NotImplementedError('Method must be implemented to support GraphQL')
334
335 def _call_graphql_api(self, endpoint, media_id):
336 data = self._build_graphql_query(media_id)
337 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
338 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
339
340
341 class TwitterCardIE(InfoExtractor):
342 IE_NAME = 'twitter:card'
343 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
344 _TESTS = [
345 {
346 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
347 # MD5 checksums are different in different places
348 'info_dict': {
349 'id': '560070131976392705',
350 'ext': 'mp4',
351 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
352 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
353 'uploader': 'Twitter',
354 'uploader_id': 'Twitter',
355 'thumbnail': r're:^https?://.*\.jpg',
356 'duration': 30.033,
357 'timestamp': 1422366112,
358 'upload_date': '20150127',
359 'age_limit': 0,
360 'comment_count': int,
361 'tags': [],
362 'repost_count': int,
363 'like_count': int,
364 'display_id': '560070183650213889',
365 'uploader_url': 'https://twitter.com/Twitter',
366 },
367 },
368 {
369 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
370 'md5': '7137eca597f72b9abbe61e5ae0161399',
371 'info_dict': {
372 'id': '623160978427936768',
373 'ext': 'mp4',
374 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
375 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
376 'uploader': 'NASA',
377 'uploader_id': 'NASA',
378 'timestamp': 1437408129,
379 'upload_date': '20150720',
380 'uploader_url': 'https://twitter.com/NASA',
381 'age_limit': 0,
382 'comment_count': int,
383 'like_count': int,
384 'repost_count': int,
385 'tags': ['PlutoFlyby'],
386 },
387 'params': {'format': '[protocol=https]'},
388 },
389 {
390 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
391 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
392 'info_dict': {
393 'id': 'dq4Oj5quskI',
394 'ext': 'mp4',
395 'title': 'Ubuntu 11.10 Overview',
396 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
397 'upload_date': '20111013',
398 'uploader': 'OMG! UBUNTU!',
399 'uploader_id': 'omgubuntu',
400 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
401 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
402 'channel_follower_count': int,
403 'chapters': 'count:8',
404 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
405 'duration': 138,
406 'categories': ['Film & Animation'],
407 'age_limit': 0,
408 'comment_count': int,
409 'availability': 'public',
410 'like_count': int,
411 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
412 'view_count': int,
413 'tags': 'count:12',
414 'channel': 'OMG! UBUNTU!',
415 'playable_in_embed': True,
416 },
417 'add_ie': ['Youtube'],
418 },
419 {
420 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
421 'info_dict': {
422 'id': 'iBb2x00UVlv',
423 'ext': 'mp4',
424 'upload_date': '20151113',
425 'uploader_id': '1189339351084113920',
426 'uploader': 'ArsenalTerje',
427 'title': 'Vine by ArsenalTerje',
428 'timestamp': 1447451307,
429 'alt_title': 'Vine by ArsenalTerje',
430 'comment_count': int,
431 'like_count': int,
432 'thumbnail': r're:^https?://[^?#]+\.jpg',
433 'view_count': int,
434 'repost_count': int,
435 },
436 'add_ie': ['Vine'],
437 'params': {'skip_download': 'm3u8'},
438 },
439 {
440 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
441 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
442 'info_dict': {
443 'id': '705235433198714880',
444 'ext': 'mp4',
445 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
446 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
447 'uploader': 'Brent Yarina',
448 'uploader_id': 'BTNBrentYarina',
449 'timestamp': 1456976204,
450 'upload_date': '20160303',
451 },
452 'skip': 'This content is no longer available.',
453 },
454 {
455 'url': 'https://twitter.com/i/videos/752274308186120192',
456 'only_matching': True,
457 },
458 ]
459
460 def _real_extract(self, url):
461 status_id = self._match_id(url)
462 return self.url_result(
463 'https://twitter.com/statuses/' + status_id,
464 TwitterIE.ie_key(), status_id)
465
466
467 class TwitterIE(TwitterBaseIE):
468 IE_NAME = 'twitter'
469 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
470
471 _TESTS = [{
472 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
473 'info_dict': {
474 'id': '643211870443208704',
475 'display_id': '643211948184596480',
476 'ext': 'mp4',
477 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
478 'thumbnail': r're:^https?://.*\.jpg',
479 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
480 'channel_id': '549749560',
481 'uploader': 'FREE THE NIPPLE',
482 'uploader_id': 'freethenipple',
483 'duration': 12.922,
484 'timestamp': 1442188653,
485 'upload_date': '20150913',
486 'uploader_url': 'https://twitter.com/freethenipple',
487 'comment_count': int,
488 'repost_count': int,
489 'like_count': int,
490 'tags': [],
491 'age_limit': 18,
492 '_old_archive_ids': ['twitter 643211948184596480'],
493 },
494 'skip': 'Requires authentication',
495 }, {
496 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
497 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
498 'info_dict': {
499 'id': '657991469417025536',
500 'ext': 'mp4',
501 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
502 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
503 'thumbnail': r're:^https?://.*\.png',
504 'uploader': 'Gifs',
505 'uploader_id': 'giphz',
506 },
507 'expected_warnings': ['height', 'width'],
508 'skip': 'Account suspended',
509 }, {
510 'url': 'https://twitter.com/starwars/status/665052190608723968',
511 'info_dict': {
512 'id': '665052190608723968',
513 'display_id': '665052190608723968',
514 'ext': 'mp4',
515 'title': r're:Star Wars.*A new beginning is coming December 18.*',
516 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
517 'channel_id': '20106852',
518 'uploader_id': 'starwars',
519 'uploader': r're:Star Wars.*',
520 'timestamp': 1447395772,
521 'upload_date': '20151113',
522 'uploader_url': 'https://twitter.com/starwars',
523 'comment_count': int,
524 'repost_count': int,
525 'like_count': int,
526 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
527 'age_limit': 0,
528 '_old_archive_ids': ['twitter 665052190608723968'],
529 },
530 }, {
531 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
532 'info_dict': {
533 'id': '705235433198714880',
534 'ext': 'mp4',
535 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
536 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
537 'uploader_id': 'BTNBrentYarina',
538 'uploader': 'Brent Yarina',
539 'timestamp': 1456976204,
540 'upload_date': '20160303',
541 'uploader_url': 'https://twitter.com/BTNBrentYarina',
542 'comment_count': int,
543 'repost_count': int,
544 'like_count': int,
545 'tags': [],
546 'age_limit': 0,
547 },
548 'params': {
549 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
550 # Test case of TwitterCardIE
551 'skip_download': True,
552 },
553 'skip': 'Dead external link',
554 }, {
555 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
556 'info_dict': {
557 'id': '700207414000242688',
558 'display_id': '700207533655363584',
559 'ext': 'mp4',
560 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
561 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
562 'thumbnail': r're:^https?://.*\.jpg',
563 'channel_id': '1383165541',
564 'uploader': 'jaydin donte geer',
565 'uploader_id': 'jaydingeer',
566 'duration': 30.0,
567 'timestamp': 1455777459,
568 'upload_date': '20160218',
569 'uploader_url': 'https://twitter.com/jaydingeer',
570 'comment_count': int,
571 'repost_count': int,
572 'like_count': int,
573 'tags': ['Damndaniel'],
574 'age_limit': 0,
575 '_old_archive_ids': ['twitter 700207533655363584'],
576 },
577 }, {
578 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
579 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
580 'info_dict': {
581 'id': 'MIOxnrUteUd',
582 'ext': 'mp4',
583 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
584 'uploader': 'TAKUMA',
585 'uploader_id': '1004126642786242560',
586 'timestamp': 1402826626,
587 'upload_date': '20140615',
588 'thumbnail': r're:^https?://.*\.jpg',
589 'alt_title': 'Vine by TAKUMA',
590 'comment_count': int,
591 'repost_count': int,
592 'like_count': int,
593 'view_count': int,
594 },
595 'add_ie': ['Vine'],
596 }, {
597 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
598 'info_dict': {
599 'id': '717462543795523584',
600 'display_id': '719944021058060289',
601 'ext': 'mp4',
602 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
603 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
604 'channel_id': '701615052',
605 'uploader_id': 'CaptainAmerica',
606 'uploader': 'Captain America',
607 'duration': 3.17,
608 'timestamp': 1460483005,
609 'upload_date': '20160412',
610 'uploader_url': 'https://twitter.com/CaptainAmerica',
611 'thumbnail': r're:^https?://.*\.jpg',
612 'comment_count': int,
613 'repost_count': int,
614 'like_count': int,
615 'tags': [],
616 'age_limit': 0,
617 '_old_archive_ids': ['twitter 719944021058060289'],
618 },
619 }, {
620 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
621 'info_dict': {
622 'id': '1zqKVVlkqLaKB',
623 'ext': 'mp4',
624 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
625 'upload_date': '20160923',
626 'uploader_id': '1PmKqpJdOJQoY',
627 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
628 'timestamp': 1474613214,
629 'thumbnail': r're:^https?://.*\.jpg',
630 },
631 'add_ie': ['Periscope'],
632 'skip': 'Broadcast not found',
633 }, {
634 # has mp4 formats via mobile API
635 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
636 'info_dict': {
637 'id': '852077943283097602',
638 'ext': 'mp4',
639 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
640 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
641 'channel_id': '2526757026',
642 'uploader': 'عالم الأخبار',
643 'uploader_id': 'news_al3alm',
644 'duration': 277.4,
645 'timestamp': 1492000653,
646 'upload_date': '20170412',
647 'display_id': '852138619213144067',
648 'age_limit': 0,
649 'uploader_url': 'https://twitter.com/news_al3alm',
650 'thumbnail': r're:^https?://.*\.jpg',
651 'tags': [],
652 'repost_count': int,
653 'like_count': int,
654 'comment_count': int,
655 '_old_archive_ids': ['twitter 852138619213144067'],
656 },
657 }, {
658 'url': 'https://twitter.com/i/web/status/910031516746514432',
659 'info_dict': {
660 'id': '910030238373089285',
661 'display_id': '910031516746514432',
662 'ext': 'mp4',
663 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
664 'thumbnail': r're:^https?://.*\.jpg',
665 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
666 'channel_id': '2319432498',
667 'uploader': 'Préfet de Guadeloupe',
668 'uploader_id': 'Prefet971',
669 'duration': 47.48,
670 'timestamp': 1505803395,
671 'upload_date': '20170919',
672 'uploader_url': 'https://twitter.com/Prefet971',
673 'comment_count': int,
674 'repost_count': int,
675 'like_count': int,
676 'tags': ['Maria'],
677 'age_limit': 0,
678 '_old_archive_ids': ['twitter 910031516746514432'],
679 },
680 'params': {
681 'skip_download': True, # requires ffmpeg
682 },
683 }, {
684 # card via api.twitter.com/1.1/videos/tweet/config
685 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
686 'info_dict': {
687 'id': '1001551417340022785',
688 'display_id': '1001551623938805763',
689 'ext': 'mp4',
690 'title': 're:.*?Shep is on a roll today.*?',
691 'thumbnail': r're:^https?://.*\.jpg',
692 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
693 'channel_id': '255036353',
694 'uploader': 'Lis Power',
695 'uploader_id': 'LisPower1',
696 'duration': 111.278,
697 'timestamp': 1527623489,
698 'upload_date': '20180529',
699 'uploader_url': 'https://twitter.com/LisPower1',
700 'comment_count': int,
701 'repost_count': int,
702 'like_count': int,
703 'tags': [],
704 'age_limit': 0,
705 '_old_archive_ids': ['twitter 1001551623938805763'],
706 },
707 'params': {
708 'skip_download': True, # requires ffmpeg
709 },
710 }, {
711 'url': 'https://twitter.com/foobar/status/1087791357756956680',
712 'info_dict': {
713 'id': '1087791272830607360',
714 'display_id': '1087791357756956680',
715 'ext': 'mp4',
716 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
717 'thumbnail': r're:^https?://.*\.jpg',
718 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
719 'uploader': 'X',
720 'uploader_id': 'X',
721 'duration': 61.567,
722 'timestamp': 1548184644,
723 'upload_date': '20190122',
724 'uploader_url': 'https://twitter.com/X',
725 'comment_count': int,
726 'repost_count': int,
727 'like_count': int,
728 'view_count': int,
729 'tags': [],
730 'age_limit': 0,
731 },
732 'skip': 'This Tweet is unavailable',
733 }, {
734 # not available in Periscope
735 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
736 'info_dict': {
737 'id': '1vOGwqejwoWxB',
738 'ext': 'mp4',
739 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
740 'uploader': 'Vivi',
741 'uploader_id': '1eVjYOLGkGrQL',
742 'thumbnail': r're:^https?://.*\.jpg',
743 'tags': ['EduTECH2019'],
744 'view_count': int,
745 },
746 'add_ie': ['TwitterBroadcast'],
747 'skip': 'Broadcast no longer exists',
748 }, {
749 # unified card
750 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
751 'info_dict': {
752 'id': '1349774757969989634',
753 'display_id': '1349794411333394432',
754 'ext': 'mp4',
755 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
756 'thumbnail': r're:^https?://.*\.jpg',
757 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
758 'channel_id': '18552281',
759 'uploader': 'Brooklyn Nets',
760 'uploader_id': 'BrooklynNets',
761 'duration': 324.484,
762 'timestamp': 1610651040,
763 'upload_date': '20210114',
764 'uploader_url': 'https://twitter.com/BrooklynNets',
765 'comment_count': int,
766 'repost_count': int,
767 'like_count': int,
768 'tags': [],
769 'age_limit': 0,
770 '_old_archive_ids': ['twitter 1349794411333394432'],
771 },
772 'params': {
773 'skip_download': True,
774 },
775 }, {
776 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
777 'info_dict': {
778 'id': '1577855447914409984',
779 'display_id': '1577855540407197696',
780 'ext': 'mp4',
781 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
782 'description': 'md5:b9c3699335447391d11753ab21c70a74',
783 'upload_date': '20221006',
784 'channel_id': '143077138',
785 'uploader': 'Oshtru',
786 'uploader_id': 'oshtru',
787 'uploader_url': 'https://twitter.com/oshtru',
788 'thumbnail': r're:^https?://.*\.jpg',
789 'duration': 30.03,
790 'timestamp': 1665025050,
791 'comment_count': int,
792 'repost_count': int,
793 'like_count': int,
794 'tags': [],
795 'age_limit': 0,
796 '_old_archive_ids': ['twitter 1577855540407197696'],
797 },
798 'params': {'skip_download': True},
799 }, {
800 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
801 'info_dict': {
802 'id': '1577719286659006464',
803 'title': 'Ultima Reload - Test',
804 'description': 'Test https://t.co/Y3KEZD7Dad',
805 'channel_id': '168922496',
806 'uploader': 'Ultima Reload',
807 'uploader_id': 'UltimaShadowX',
808 'uploader_url': 'https://twitter.com/UltimaShadowX',
809 'upload_date': '20221005',
810 'timestamp': 1664992565,
811 'comment_count': int,
812 'repost_count': int,
813 'like_count': int,
814 'tags': [],
815 'age_limit': 0,
816 },
817 'playlist_count': 4,
818 'params': {'skip_download': True},
819 }, {
820 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
821 'info_dict': {
822 'id': '1575559336759263233',
823 'display_id': '1575560063510810624',
824 'ext': 'mp4',
825 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
826 'thumbnail': r're:^https?://.*\.jpg',
827 'description': 'md5:95aea692fda36a12081b9629b02daa92',
828 'channel_id': '1094109584',
829 'uploader': 'Max Olson',
830 'uploader_id': 'MesoMax919',
831 'uploader_url': 'https://twitter.com/MesoMax919',
832 'duration': 21.321,
833 'timestamp': 1664477766,
834 'upload_date': '20220929',
835 'comment_count': int,
836 'repost_count': int,
837 'like_count': int,
838 'tags': ['HurricaneIan'],
839 'age_limit': 0,
840 '_old_archive_ids': ['twitter 1575560063510810624'],
841 },
842 }, {
843 # Adult content, fails if not logged in
844 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
845 'info_dict': {
846 'id': '1575199163847000068',
847 'display_id': '1575199173472927762',
848 'ext': 'mp4',
849 'title': str,
850 'description': str,
851 'channel_id': '1217167793541480450',
852 'uploader': str,
853 'uploader_id': 'Rizdraws',
854 'uploader_url': 'https://twitter.com/Rizdraws',
855 'upload_date': '20220928',
856 'timestamp': 1664391723,
857 'thumbnail': r're:^https?://.+\.jpg',
858 'like_count': int,
859 'repost_count': int,
860 'comment_count': int,
861 'age_limit': 18,
862 'tags': [],
863 '_old_archive_ids': ['twitter 1575199173472927762'],
864 },
865 'params': {'skip_download': 'The media could not be played'},
866 'skip': 'Requires authentication',
867 }, {
868 # Playlist result only with graphql API
869 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
870 'playlist_mincount': 2,
871 'info_dict': {
872 'id': '1395079556562706435',
873 'title': str,
874 'tags': [],
875 'channel_id': '21539378',
876 'uploader': str,
877 'like_count': int,
878 'upload_date': '20210519',
879 'age_limit': 0,
880 'repost_count': int,
881 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
882 'uploader_id': 'Srirachachau',
883 'comment_count': int,
884 'uploader_url': 'https://twitter.com/Srirachachau',
885 'timestamp': 1621447860,
886 },
887 }, {
888 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
889 'playlist_mincount': 2,
890 'info_dict': {
891 'id': '1578353380363501568',
892 'title': str,
893 'channel_id': '2195866214',
894 'uploader_id': 'DavidToons_',
895 'repost_count': int,
896 'like_count': int,
897 'uploader': str,
898 'timestamp': 1665143744,
899 'uploader_url': 'https://twitter.com/DavidToons_',
900 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
901 'tags': [],
902 'comment_count': int,
903 'upload_date': '20221007',
904 'age_limit': 0,
905 },
906 }, {
907 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
908 'playlist_count': 2,
909 'info_dict': {
910 'id': '1578401165338976258',
911 'title': str,
912 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
913 'channel_id': '19338359',
914 'uploader': str,
915 'uploader_id': 'primevideouk',
916 'timestamp': 1665155137,
917 'upload_date': '20221007',
918 'age_limit': 0,
919 'uploader_url': 'https://twitter.com/primevideouk',
920 'comment_count': int,
921 'repost_count': int,
922 'like_count': int,
923 'tags': ['TheRingsOfPower'],
924 },
925 }, {
926 # Twitter Spaces
927 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
928 'info_dict': {
929 'id': '1lPJqmBeeNAJb',
930 'ext': 'm4a',
931 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
932 'uploader': r're:Monique Camarra.+?',
933 'uploader_id': 'MoniqueCamarra',
934 'live_status': 'was_live',
935 'release_timestamp': 1658417414,
936 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
937 'timestamp': 1658407771,
938 'release_date': '20220721',
939 'upload_date': '20220721',
940 },
941 'add_ie': ['TwitterSpaces'],
942 'params': {'skip_download': 'm3u8'},
943 'skip': 'Requires authentication',
944 }, {
945 # URL specifies video number but --yes-playlist
946 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
947 'playlist_mincount': 2,
948 'info_dict': {
949 'id': '1600649710662213632',
950 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
951 'timestamp': 1670459604.0,
952 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
953 'comment_count': int,
954 'uploader_id': 'CTVJLaidlaw',
955 'channel_id': '80082014',
956 'repost_count': int,
957 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
958 'upload_date': '20221208',
959 'age_limit': 0,
960 'uploader': 'Jocelyn Laidlaw',
961 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
962 'like_count': int,
963 },
964 }, {
965 # URL specifies video number and --no-playlist
966 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
967 'info_dict': {
968 'id': '1600649511827013632',
969 'ext': 'mp4',
970 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
971 'thumbnail': r're:^https?://.+\.jpg',
972 'timestamp': 1670459604.0,
973 'channel_id': '80082014',
974 'uploader_id': 'CTVJLaidlaw',
975 'uploader': 'Jocelyn Laidlaw',
976 'repost_count': int,
977 'comment_count': int,
978 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
979 'duration': 102.226,
980 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
981 'display_id': '1600649710662213632',
982 'like_count': int,
983 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
984 'upload_date': '20221208',
985 'age_limit': 0,
986 '_old_archive_ids': ['twitter 1600649710662213632'],
987 },
988 'params': {'noplaylist': True},
989 }, {
990 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
991 # note the id different between extraction and url
992 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
993 'info_dict': {
994 'id': '1621117577354424321',
995 'display_id': '1621117700482416640',
996 'ext': 'mp4',
997 'title': '뽀 - 아 최우제 이동속도 봐',
998 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
999 'duration': 24.598,
1000 'channel_id': '1281839411068432384',
1001 'uploader': '뽀',
1002 'uploader_id': 's2FAKER',
1003 'uploader_url': 'https://twitter.com/s2FAKER',
1004 'upload_date': '20230202',
1005 'timestamp': 1675339553.0,
1006 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
1007 'age_limit': 18,
1008 'tags': [],
1009 'like_count': int,
1010 'repost_count': int,
1011 'comment_count': int,
1012 '_old_archive_ids': ['twitter 1621117700482416640'],
1013 },
1014 'skip': 'Requires authentication',
1015 }, {
1016 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1017 'info_dict': {
1018 'id': '1599108643743473680',
1019 'display_id': '1599108751385972737',
1020 'ext': 'mp4',
1021 'title': '\u06ea - \U0001F48B',
1022 'channel_id': '1347791436809441283',
1023 'uploader_url': 'https://twitter.com/hlo_again',
1024 'like_count': int,
1025 'uploader_id': 'hlo_again',
1026 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1027 'repost_count': int,
1028 'duration': 9.531,
1029 'comment_count': int,
1030 'upload_date': '20221203',
1031 'age_limit': 0,
1032 'timestamp': 1670092210.0,
1033 'tags': [],
1034 'uploader': '\u06ea',
1035 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1036 '_old_archive_ids': ['twitter 1599108751385972737'],
1037 },
1038 'params': {'noplaylist': True},
1039 }, {
1040 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1041 'info_dict': {
1042 'id': '1600009362759733248',
1043 'display_id': '1600009574919962625',
1044 'ext': 'mp4',
1045 'channel_id': '211814412',
1046 'uploader_url': 'https://twitter.com/MunTheShinobi',
1047 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1048 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1049 'age_limit': 0,
1050 'uploader': 'Mün',
1051 'repost_count': int,
1052 'upload_date': '20221206',
1053 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1054 'comment_count': int,
1055 'like_count': int,
1056 'tags': [],
1057 'uploader_id': 'MunTheShinobi',
1058 'duration': 139.987,
1059 'timestamp': 1670306984.0,
1060 '_old_archive_ids': ['twitter 1600009574919962625'],
1061 },
1062 }, {
1063 # retweeted_status (private)
1064 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1065 'info_dict': {
1066 'id': '1623274794488659969',
1067 'display_id': '1623739803874349067',
1068 'ext': 'mp4',
1069 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1070 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1071 'uploader': 'Johnny Bullets',
1072 'uploader_id': 'Johnnybull3ts',
1073 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1074 'age_limit': 0,
1075 'tags': [],
1076 'duration': 8.033,
1077 'timestamp': 1675853859.0,
1078 'upload_date': '20230208',
1079 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1080 'like_count': int,
1081 'repost_count': int,
1082 },
1083 'skip': 'Protected tweet',
1084 }, {
1085 # retweeted_status
1086 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1087 'info_dict': {
1088 'id': '1694928337846538240',
1089 'ext': 'mp4',
1090 'display_id': '1695424220702888009',
1091 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1092 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1093 'channel_id': '15212187',
1094 'uploader': 'Benny Johnson',
1095 'uploader_id': 'bennyjohnson',
1096 'uploader_url': 'https://twitter.com/bennyjohnson',
1097 'age_limit': 0,
1098 'tags': [],
1099 'duration': 45.001,
1100 'timestamp': 1692962814.0,
1101 'upload_date': '20230825',
1102 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1103 'like_count': int,
1104 'repost_count': int,
1105 'comment_count': int,
1106 '_old_archive_ids': ['twitter 1695424220702888009'],
1107 },
1108 }, {
1109 # retweeted_status w/ legacy API
1110 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1111 'info_dict': {
1112 'id': '1694928337846538240',
1113 'ext': 'mp4',
1114 'display_id': '1695424220702888009',
1115 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1116 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1117 'channel_id': '15212187',
1118 'uploader': 'Benny Johnson',
1119 'uploader_id': 'bennyjohnson',
1120 'uploader_url': 'https://twitter.com/bennyjohnson',
1121 'age_limit': 0,
1122 'tags': [],
1123 'duration': 45.001,
1124 'timestamp': 1692962814.0,
1125 'upload_date': '20230825',
1126 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1127 'like_count': int,
1128 'repost_count': int,
1129 '_old_archive_ids': ['twitter 1695424220702888009'],
1130 },
1131 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1132 }, {
1133 # Broadcast embedded in tweet
1134 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1135 'info_dict': {
1136 'id': '1rmxPMjLzAXKN',
1137 'ext': 'mp4',
1138 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1139 'uploader': 'Jessica Dobson',
1140 'uploader_id': 'JessicaDobsonWX',
1141 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1142 'timestamp': 1701566398,
1143 'upload_date': '20231203',
1144 'live_status': 'was_live',
1145 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1146 'concurrent_view_count': int,
1147 'view_count': int,
1148 },
1149 'add_ie': ['TwitterBroadcast'],
1150 }, {
1151 # Animated gif and quote tweet video
1152 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1153 'playlist_mincount': 2,
1154 'info_dict': {
1155 'id': '1696256659889565950',
1156 'title': 'BAKOON - https://t.co/zom968d0a0',
1157 'description': 'https://t.co/zom968d0a0',
1158 'tags': [],
1159 'channel_id': '1263540390',
1160 'uploader': 'BAKOON',
1161 'uploader_id': 'BAKKOOONN',
1162 'uploader_url': 'https://twitter.com/BAKKOOONN',
1163 'age_limit': 18,
1164 'timestamp': 1693254077.0,
1165 'upload_date': '20230828',
1166 'like_count': int,
1167 'comment_count': int,
1168 'repost_count': int,
1169 },
1170 'skip': 'Requires authentication',
1171 }, {
1172 # "stale tweet" with typename "TweetWithVisibilityResults"
1173 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1174 'md5': '511377ff8dfa7545307084dca4dce319',
1175 'info_dict': {
1176 'id': '1724883339285544960',
1177 'ext': 'mp4',
1178 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1179 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1180 'display_id': '1724884212803834154',
1181 'channel_id': '337808606',
1182 'uploader': 'Robert F. Kennedy Jr',
1183 'uploader_id': 'RobertKennedyJr',
1184 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1185 'upload_date': '20231115',
1186 'timestamp': 1700079417.0,
1187 'duration': 341.048,
1188 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1189 'tags': ['Kennedy24'],
1190 'repost_count': int,
1191 'like_count': int,
1192 'comment_count': int,
1193 'age_limit': 0,
1194 '_old_archive_ids': ['twitter 1724884212803834154'],
1195 },
1196 }, {
1197 # x.com
1198 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1199 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1200 'info_dict': {
1201 'id': '1790637589910654976',
1202 'ext': 'mp4',
1203 'title': 'Historic Vids - One of the most intense moments in history',
1204 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1205 'display_id': '1790637656616943991',
1206 'uploader': 'Historic Vids',
1207 'uploader_id': 'historyinmemes',
1208 'uploader_url': 'https://twitter.com/historyinmemes',
1209 'channel_id': '855481986290524160',
1210 'upload_date': '20240515',
1211 'timestamp': 1715756260.0,
1212 'duration': 15.488,
1213 'tags': [],
1214 'comment_count': int,
1215 'repost_count': int,
1216 'like_count': int,
1217 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1218 'age_limit': 0,
1219 '_old_archive_ids': ['twitter 1790637656616943991'],
1220 },
1221 }, {
1222 # onion route
1223 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1224 'only_matching': True,
1225 }, {
1226 # Twitch Clip Embed
1227 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1228 'only_matching': True,
1229 }, {
1230 # promo_video_website card
1231 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1232 'only_matching': True,
1233 }, {
1234 # promo_video_convo card
1235 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1236 'only_matching': True,
1237 }, {
1238 # appplayer card
1239 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1240 'only_matching': True,
1241 }, {
1242 # video_direct_message card
1243 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1244 'only_matching': True,
1245 }, {
1246 # poll2choice_video card
1247 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1248 'only_matching': True,
1249 }, {
1250 # poll3choice_video card
1251 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1252 'only_matching': True,
1253 }, {
1254 # poll4choice_video card
1255 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1256 'only_matching': True,
1257 }]
1258
1259 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1260
1261 @property
1262 def _GRAPHQL_ENDPOINT(self):
1263 if self.is_logged_in:
1264 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1265 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1266
1267 def _graphql_to_legacy(self, data, twid):
1268 result = traverse_obj(data, (
1269 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1270 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1271 'tweet_results', 'result', ('tweet', None), {dict},
1272 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1273 data, ('tweetResult', 'result', {dict}), default={})
1274
1275 typename = result.get('__typename')
1276 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1277 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1278
1279 if 'tombstone' in result:
1280 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1281 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1282 elif typename == 'TweetUnavailable':
1283 reason = result.get('reason')
1284 if reason == 'NsfwLoggedOut':
1285 self.raise_login_required('NSFW tweet requires authentication')
1286 elif reason == 'Protected':
1287 self.raise_login_required('You are not authorized to view this protected tweet')
1288 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1289 # Result for "stale tweet" needs additional transformation
1290 elif typename == 'TweetWithVisibilityResults':
1291 result = traverse_obj(result, ('tweet', {dict})) or {}
1292
1293 status = result.get('legacy', {})
1294 status.update(traverse_obj(result, {
1295 'user': ('core', 'user_results', 'result', 'legacy'),
1296 'card': ('card', 'legacy'),
1297 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1298 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1299 }, expected_type=dict, default={}))
1300
1301 # extra transformations needed since result does not match legacy format
1302 if status.get('retweeted_status'):
1303 status['retweeted_status']['user'] = traverse_obj(status, (
1304 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1305
1306 binding_values = {
1307 binding_value.get('key'): binding_value.get('value')
1308 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1309 }
1310 if binding_values:
1311 status['card']['binding_values'] = binding_values
1312
1313 return status
1314
1315 def _build_graphql_query(self, media_id):
1316 return {
1317 'variables': {
1318 'focalTweetId': media_id,
1319 'includePromotedContent': True,
1320 'with_rux_injections': False,
1321 'withBirdwatchNotes': True,
1322 'withCommunity': True,
1323 'withDownvotePerspective': False,
1324 'withQuickPromoteEligibilityTweetFields': True,
1325 'withReactionsMetadata': False,
1326 'withReactionsPerspective': False,
1327 'withSuperFollowsTweetFields': True,
1328 'withSuperFollowsUserFields': True,
1329 'withV2Timeline': True,
1330 'withVoice': True,
1331 },
1332 'features': {
1333 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1334 'interactive_text_enabled': True,
1335 'responsive_web_edit_tweet_api_enabled': True,
1336 'responsive_web_enhance_cards_enabled': True,
1337 'responsive_web_graphql_timeline_navigation_enabled': False,
1338 'responsive_web_text_conversations_enabled': False,
1339 'responsive_web_uc_gql_enabled': True,
1340 'standardized_nudges_misinfo': True,
1341 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1342 'tweetypie_unmention_optimization_enabled': True,
1343 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1344 'verified_phone_label_enabled': False,
1345 'vibe_api_enabled': True,
1346 },
1347 } if self.is_logged_in else {
1348 'variables': {
1349 'tweetId': media_id,
1350 'withCommunity': False,
1351 'includePromotedContent': False,
1352 'withVoice': False,
1353 },
1354 'features': {
1355 'creator_subscriptions_tweet_preview_api_enabled': True,
1356 'tweetypie_unmention_optimization_enabled': True,
1357 'responsive_web_edit_tweet_api_enabled': True,
1358 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1359 'view_counts_everywhere_api_enabled': True,
1360 'longform_notetweets_consumption_enabled': True,
1361 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1362 'tweet_awards_web_tipping_enabled': False,
1363 'freedom_of_speech_not_reach_fetch_enabled': True,
1364 'standardized_nudges_misinfo': True,
1365 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1366 'longform_notetweets_rich_text_read_enabled': True,
1367 'longform_notetweets_inline_media_enabled': True,
1368 'responsive_web_graphql_exclude_directive_enabled': True,
1369 'verified_phone_label_enabled': False,
1370 'responsive_web_media_download_video_enabled': False,
1371 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1372 'responsive_web_graphql_timeline_navigation_enabled': True,
1373 'responsive_web_enhance_cards_enabled': False,
1374 },
1375 'fieldToggles': {
1376 'withArticleRichContentState': False,
1377 },
1378 }
1379
1380 def _call_syndication_api(self, twid):
1381 self.report_warning(
1382 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1383 status = self._download_json(
1384 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1385 headers={'User-Agent': 'Googlebot'}, query={
1386 'id': twid,
1387 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1388 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1389 })
1390 if not status:
1391 raise ExtractorError('Syndication endpoint returned empty JSON response')
1392 # Transform the result so its structure matches that of legacy/graphql
1393 media = []
1394 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1395 detail['id_str'] = traverse_obj(detail, (
1396 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1397 media.append(detail)
1398 status['extended_entities'] = {'media': media}
1399
1400 return status
1401
1402 def _extract_status(self, twid):
1403 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1404 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1405
1406 try:
1407 if self.is_logged_in or self._selected_api == 'graphql':
1408 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1409 elif self._selected_api == 'legacy':
1410 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1411 'cards_platform': 'Web-12',
1412 'include_cards': 1,
1413 'include_reply_count': 1,
1414 'include_user_entities': 0,
1415 'tweet_mode': 'extended',
1416 })
1417 except ExtractorError as e:
1418 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1419 raise
1420 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1421 status = self._call_syndication_api(twid)
1422
1423 if self._selected_api == 'syndication':
1424 status = self._call_syndication_api(twid)
1425
1426 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1427
1428 def _real_extract(self, url):
1429 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1430 status = self._extract_status(twid)
1431
1432 title = description = traverse_obj(
1433 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1434 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1435 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1436 user = status.get('user') or {}
1437 uploader = user.get('name')
1438 if uploader:
1439 title = f'{uploader} - {title}'
1440 uploader_id = user.get('screen_name')
1441
1442 info = {
1443 'id': twid,
1444 'title': title,
1445 'description': description,
1446 'uploader': uploader,
1447 'timestamp': unified_timestamp(status.get('created_at')),
1448 'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
1449 'uploader_id': uploader_id,
1450 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1451 'like_count': int_or_none(status.get('favorite_count')),
1452 'repost_count': int_or_none(status.get('retweet_count')),
1453 'comment_count': int_or_none(status.get('reply_count')),
1454 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1455 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1456 }
1457
1458 def extract_from_video_info(media):
1459 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1460 self.write_debug(f'Extracting from video info: {media_id}')
1461
1462 formats = []
1463 subtitles = {}
1464 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1465 fmts, subs = self._extract_variant_formats(variant, twid)
1466 subtitles = self._merge_subtitles(subtitles, subs)
1467 formats.extend(fmts)
1468
1469 thumbnails = []
1470 media_url = media.get('media_url_https') or media.get('media_url')
1471 if media_url:
1472 def add_thumbnail(name, size):
1473 thumbnails.append({
1474 'id': name,
1475 'url': update_url_query(media_url, {'name': name}),
1476 'width': int_or_none(size.get('w') or size.get('width')),
1477 'height': int_or_none(size.get('h') or size.get('height')),
1478 })
1479 for name, size in media.get('sizes', {}).items():
1480 add_thumbnail(name, size)
1481 add_thumbnail('orig', media.get('original_info') or {})
1482
1483 return {
1484 'id': media_id,
1485 'formats': formats,
1486 'subtitles': subtitles,
1487 'thumbnails': thumbnails,
1488 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
1489 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1490 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1491 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1492 }
1493
1494 def extract_from_card_info(card):
1495 if not card:
1496 return
1497
1498 self.write_debug(f'Extracting from card info: {card.get("url")}')
1499 binding_values = card['binding_values']
1500
1501 def get_binding_value(k):
1502 o = binding_values.get(k) or {}
1503 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1504
1505 card_name = card['name'].split(':')[-1]
1506 if card_name == 'player':
1507 yield {
1508 '_type': 'url',
1509 'url': get_binding_value('player_url'),
1510 }
1511 elif card_name == 'periscope_broadcast':
1512 yield {
1513 '_type': 'url',
1514 'url': get_binding_value('url') or get_binding_value('player_url'),
1515 'ie_key': PeriscopeIE.ie_key(),
1516 }
1517 elif card_name == 'broadcast':
1518 yield {
1519 '_type': 'url',
1520 'url': get_binding_value('broadcast_url'),
1521 'ie_key': TwitterBroadcastIE.ie_key(),
1522 }
1523 elif card_name == 'audiospace':
1524 yield {
1525 '_type': 'url',
1526 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1527 'ie_key': TwitterSpacesIE.ie_key(),
1528 }
1529 elif card_name == 'summary':
1530 yield {
1531 '_type': 'url',
1532 'url': get_binding_value('card_url'),
1533 }
1534 elif card_name == 'unified_card':
1535 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1536 yield from map(extract_from_video_info, traverse_obj(
1537 unified_card, ('media_entities', ...), expected_type=dict))
1538 # amplify, promo_video_website, promo_video_convo, appplayer,
1539 # video_direct_message, poll2choice_video, poll3choice_video,
1540 # poll4choice_video, ...
1541 else:
1542 is_amplify = card_name == 'amplify'
1543 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1544 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1545 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1546
1547 thumbnails = []
1548 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1549 image = get_binding_value('player_image' + suffix) or {}
1550 image_url = image.get('url')
1551 if not image_url or '/player-placeholder' in image_url:
1552 continue
1553 thumbnails.append({
1554 'id': suffix[1:] if suffix else 'medium',
1555 'url': image_url,
1556 'width': int_or_none(image.get('width')),
1557 'height': int_or_none(image.get('height')),
1558 })
1559
1560 yield {
1561 'formats': formats,
1562 'subtitles': subtitles,
1563 'thumbnails': thumbnails,
1564 'duration': int_or_none(get_binding_value(
1565 'content_duration_seconds')),
1566 }
1567
1568 videos = traverse_obj(status, (
1569 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1570
1571 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1572 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1573 else:
1574 desired_obj = traverse_obj(status, (
1575 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1576 if not desired_obj:
1577 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1578 elif desired_obj.get('type') != 'video':
1579 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1580
1581 # Restore original archive id and video index in title
1582 for index, entry in enumerate(videos, 1):
1583 if entry.get('id') != desired_obj.get('id'):
1584 continue
1585 if index == 1:
1586 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1587 if len(videos) != 1:
1588 info['title'] += f' #{index}'
1589 break
1590
1591 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1592
1593 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1594 if not entries:
1595 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1596 if not expanded_url or expanded_url == url:
1597 self.raise_no_formats('No video could be found in this tweet', expected=True)
1598 return info
1599
1600 return self.url_result(expanded_url, display_id=twid, **info)
1601
1602 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1603
1604 if len(entries) == 1:
1605 return entries[0]
1606
1607 for index, entry in enumerate(entries, 1):
1608 entry['title'] += f' #{index}'
1609
1610 return self.playlist_result(entries, **info)
1611
1612
1613 class TwitterAmplifyIE(TwitterBaseIE):
1614 IE_NAME = 'twitter:amplify'
1615 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1616
1617 _TEST = {
1618 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1619 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1620 'info_dict': {
1621 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1622 'ext': 'mp4',
1623 'title': 'Twitter Video',
1624 'thumbnail': 're:^https?://.*',
1625 },
1626 'params': {'format': '[protocol=https]'},
1627 }
1628
1629 def _real_extract(self, url):
1630 video_id = self._match_id(url)
1631 webpage = self._download_webpage(url, video_id)
1632
1633 vmap_url = self._html_search_meta(
1634 'twitter:amplify:vmap', webpage, 'vmap url')
1635 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1636
1637 thumbnails = []
1638 thumbnail = self._html_search_meta(
1639 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1640
1641 def _find_dimension(target):
1642 w = int_or_none(self._html_search_meta(
1643 f'twitter:{target}:width', webpage, fatal=False))
1644 h = int_or_none(self._html_search_meta(
1645 f'twitter:{target}:height', webpage, fatal=False))
1646 return w, h
1647
1648 if thumbnail:
1649 thumbnail_w, thumbnail_h = _find_dimension('image')
1650 thumbnails.append({
1651 'url': thumbnail,
1652 'width': thumbnail_w,
1653 'height': thumbnail_h,
1654 })
1655
1656 video_w, video_h = _find_dimension('player')
1657 formats[0].update({
1658 'width': video_w,
1659 'height': video_h,
1660 })
1661
1662 return {
1663 'id': video_id,
1664 'title': 'Twitter Video',
1665 'formats': formats,
1666 'thumbnails': thumbnails,
1667 }
1668
1669
1670 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1671 IE_NAME = 'twitter:broadcast'
1672 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1673
1674 _TESTS = [{
1675 # untitled Periscope video
1676 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1677 'info_dict': {
1678 'id': '1yNGaQLWpejGj',
1679 'ext': 'mp4',
1680 'title': 'Andrea May Sahouri - Periscope Broadcast',
1681 'uploader': 'Andrea May Sahouri',
1682 'uploader_id': 'andreamsahouri',
1683 'uploader_url': 'https://twitter.com/andreamsahouri',
1684 'timestamp': 1590973638,
1685 'upload_date': '20200601',
1686 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1687 'view_count': int,
1688 },
1689 }, {
1690 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1691 'info_dict': {
1692 'id': '1ZkKzeyrPbaxv',
1693 'ext': 'mp4',
1694 'title': 'Starship | SN10 | High-Altitude Flight Test',
1695 'uploader': 'SpaceX',
1696 'uploader_id': 'SpaceX',
1697 'uploader_url': 'https://twitter.com/SpaceX',
1698 'timestamp': 1614812942,
1699 'upload_date': '20210303',
1700 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1701 'view_count': int,
1702 },
1703 }, {
1704 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1705 'info_dict': {
1706 'id': '1OyKAVQrgzwGb',
1707 'ext': 'mp4',
1708 'title': 'Starship Flight Test',
1709 'uploader': 'SpaceX',
1710 'uploader_id': 'SpaceX',
1711 'uploader_url': 'https://twitter.com/SpaceX',
1712 'timestamp': 1681993964,
1713 'upload_date': '20230420',
1714 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1715 'view_count': int,
1716 },
1717 }]
1718
1719 def _real_extract(self, url):
1720 broadcast_id = self._match_id(url)
1721 broadcast = self._call_api(
1722 'broadcasts/show.json', broadcast_id,
1723 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1724 if not broadcast:
1725 raise ExtractorError('Broadcast no longer exists', expected=True)
1726 info = self._parse_broadcast_data(broadcast, broadcast_id)
1727 info['title'] = broadcast.get('status') or info.get('title')
1728 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1729 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1730 if info['live_status'] == 'is_upcoming':
1731 return info
1732
1733 media_key = broadcast['media_key']
1734 source = self._call_api(
1735 f'live_video_stream/status/{media_key}', media_key)['source']
1736 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1737 if '/live_video_stream/geoblocked/' in m3u8_url:
1738 self.raise_geo_restricted()
1739 m3u8_id = urllib.parse.parse_qs(urllib.parse.urlparse(
1740 m3u8_url).query).get('type', [None])[0]
1741 state, width, height = self._extract_common_format_info(broadcast)
1742 info['formats'] = self._extract_pscp_m3u8_formats(
1743 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1744 return info
1745
1746
1747 class TwitterSpacesIE(TwitterBaseIE):
1748 IE_NAME = 'twitter:spaces'
1749 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1750
1751 _TESTS = [{
1752 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1753 'info_dict': {
1754 'id': '1RDxlgyvNXzJL',
1755 'ext': 'm4a',
1756 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1757 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1758 'uploader': r're:Lucio Di Gaetano.*?',
1759 'uploader_id': 'luciodigaetano',
1760 'live_status': 'was_live',
1761 'timestamp': 1659877956,
1762 'upload_date': '20220807',
1763 'release_timestamp': 1659904215,
1764 'release_date': '20220807',
1765 },
1766 'params': {'skip_download': 'm3u8'},
1767 }, {
1768 # post_live/TimedOut but downloadable
1769 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1770 'info_dict': {
1771 'id': '1vAxRAVQWONJl',
1772 'ext': 'm4a',
1773 'title': 'Framing Up FinOps: Billing Tools',
1774 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1775 'uploader': 'Google Cloud',
1776 'uploader_id': 'googlecloud',
1777 'live_status': 'post_live',
1778 'timestamp': 1681409554,
1779 'upload_date': '20230413',
1780 'release_timestamp': 1681839000,
1781 'release_date': '20230418',
1782 },
1783 'params': {'skip_download': 'm3u8'},
1784 }, {
1785 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1786 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1787 'info_dict': {
1788 'id': '1eaKbrQbjoRKX',
1789 'ext': 'm4a',
1790 'title': 'あ',
1791 'description': 'Twitter Space participated by nobody yet',
1792 'uploader': '息根とめる🔪Twitchで復活',
1793 'uploader_id': 'tomeru_ikinone',
1794 'live_status': 'was_live',
1795 'timestamp': 1685617198,
1796 'upload_date': '20230601',
1797 },
1798 'params': {'skip_download': 'm3u8'},
1799 }]
1800
1801 SPACE_STATUS = {
1802 'notstarted': 'is_upcoming',
1803 'ended': 'was_live',
1804 'running': 'is_live',
1805 'timedout': 'post_live',
1806 }
1807
1808 def _build_graphql_query(self, space_id):
1809 return {
1810 'variables': {
1811 'id': space_id,
1812 'isMetatagsQuery': True,
1813 'withDownvotePerspective': False,
1814 'withReactionsMetadata': False,
1815 'withReactionsPerspective': False,
1816 'withReplays': True,
1817 'withSuperFollowsUserFields': True,
1818 'withSuperFollowsTweetFields': True,
1819 },
1820 'features': {
1821 'dont_mention_me_view_api_enabled': True,
1822 'interactive_text_enabled': True,
1823 'responsive_web_edit_tweet_api_enabled': True,
1824 'responsive_web_enhance_cards_enabled': True,
1825 'responsive_web_uc_gql_enabled': True,
1826 'spaces_2022_h2_clipping': True,
1827 'spaces_2022_h2_spaces_communities': False,
1828 'standardized_nudges_misinfo': True,
1829 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1830 'vibe_api_enabled': True,
1831 },
1832 }
1833
1834 def _real_extract(self, url):
1835 space_id = self._match_id(url)
1836 if not self.is_logged_in:
1837 self.raise_login_required('Twitter Spaces require authentication')
1838 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1839 if not space_data:
1840 raise ExtractorError('Twitter Space not found', expected=True)
1841
1842 metadata = space_data['metadata']
1843 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1844 is_live = live_status == 'is_live'
1845
1846 formats = []
1847 headers = {'Referer': 'https://twitter.com/'}
1848 if live_status == 'is_upcoming':
1849 self.raise_no_formats('Twitter Space not started yet', expected=True)
1850 elif not is_live and not metadata.get('is_space_available_for_replay'):
1851 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1852 elif metadata.get('media_key'):
1853 source = traverse_obj(
1854 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1855 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1856 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1857 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
1858 headers=headers, fatal=False) if source else []
1859 for fmt in formats:
1860 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1861 if not is_live:
1862 fmt['container'] = 'm4a_dash'
1863
1864 participants = ', '.join(traverse_obj(
1865 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1866
1867 if not formats and live_status == 'post_live':
1868 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1869
1870 return {
1871 'id': space_id,
1872 'title': metadata.get('title'),
1873 'description': f'Twitter Space participated by {participants}',
1874 'uploader': traverse_obj(
1875 metadata, ('creator_results', 'result', 'legacy', 'name')),
1876 'uploader_id': traverse_obj(
1877 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1878 'live_status': live_status,
1879 'release_timestamp': try_call(
1880 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1881 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1882 'formats': formats,
1883 'http_headers': headers,
1884 }
1885
1886
1887 class TwitterShortenerIE(TwitterBaseIE):
1888 IE_NAME = 'twitter:shortener'
1889 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1890 _BASE_URL = 'https://t.co/'
1891
1892 def _real_extract(self, url):
1893 mobj = self._match_valid_url(url)
1894 eid, shortcode = mobj.group('eid', 'id')
1895 if eid:
1896 shortcode = eid
1897 url = self._BASE_URL + shortcode
1898 new_url = self._request_webpage(url, shortcode, headers={'User-Agent': 'curl'}).url
1899 __UNSAFE_LINK = 'https://twitter.com/safety/unsafe_link_warning?unsafe_link='
1900 if new_url.startswith(__UNSAFE_LINK):
1901 new_url = new_url.replace(__UNSAFE_LINK, '')
1902 return self.url_result(new_url)