]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
fix motherless
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import functools
2 import json
3 import random
4 import re
5 import urllib.parse
6
7 from .common import InfoExtractor
8 from .periscope import PeriscopeBaseIE, PeriscopeIE
9 from ..networking.exceptions import HTTPError
10 from ..utils import (
11 ExtractorError,
12 dict_get,
13 filter_dict,
14 float_or_none,
15 format_field,
16 int_or_none,
17 join_nonempty,
18 make_archive_id,
19 remove_end,
20 str_or_none,
21 strip_or_none,
22 traverse_obj,
23 try_call,
24 try_get,
25 unified_timestamp,
26 update_url_query,
27 url_or_none,
28 xpath_text,
29 )
30
31
32 class TwitterBaseIE(InfoExtractor):
33 _NETRC_MACHINE = 'twitter'
34 _API_BASE = 'https://api.x.com/1.1/'
35 _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
36 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
37 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
38 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
39 _flow_token = None
40
41 _LOGIN_INIT_DATA = json.dumps({
42 'input_flow_data': {
43 'flow_context': {
44 'debug_overrides': {},
45 'start_location': {
46 'location': 'unknown',
47 },
48 },
49 },
50 'subtask_versions': {
51 'action_list': 2,
52 'alert_dialog': 1,
53 'app_download_cta': 1,
54 'check_logged_in_account': 1,
55 'choice_selection': 3,
56 'contacts_live_sync_permission_prompt': 0,
57 'cta': 7,
58 'email_verification': 2,
59 'end_flow': 1,
60 'enter_date': 1,
61 'enter_email': 2,
62 'enter_password': 5,
63 'enter_phone': 2,
64 'enter_recaptcha': 1,
65 'enter_text': 5,
66 'enter_username': 2,
67 'generic_urt': 3,
68 'in_app_notification': 1,
69 'interest_picker': 3,
70 'js_instrumentation': 1,
71 'menu_dialog': 1,
72 'notifications_permission_prompt': 2,
73 'open_account': 2,
74 'open_home_timeline': 1,
75 'open_link': 1,
76 'phone_verification': 4,
77 'privacy_options': 1,
78 'security_key': 3,
79 'select_avatar': 4,
80 'select_banner': 2,
81 'settings_list': 7,
82 'show_code': 1,
83 'sign_up': 2,
84 'sign_up_review': 4,
85 'tweet_selection_urt': 1,
86 'update_users': 1,
87 'upload_media': 1,
88 'user_recommendations_list': 4,
89 'user_recommendations_urt': 1,
90 'wait_spinner': 3,
91 'web_modal': 1,
92 },
93 }, separators=(',', ':')).encode()
94
95 def _extract_variant_formats(self, variant, video_id):
96 variant_url = variant.get('url')
97 if not variant_url:
98 return [], {}
99 elif '.m3u8' in variant_url:
100 fmts, subs = self._extract_m3u8_formats_and_subtitles(
101 variant_url, video_id, 'mp4', 'm3u8_native',
102 m3u8_id='hls', fatal=False)
103 for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
104 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
105 f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
106 return fmts, subs
107 else:
108 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
109 f = {
110 'url': variant_url,
111 'format_id': join_nonempty('http', tbr),
112 'tbr': tbr,
113 }
114 self._search_dimensions_in_video_url(f, variant_url)
115 return [f], {}
116
117 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
118 vmap_url = url_or_none(vmap_url)
119 if not vmap_url:
120 return [], {}
121 vmap_data = self._download_xml(vmap_url, video_id)
122 formats = []
123 subtitles = {}
124 urls = []
125 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
126 video_variant.attrib['url'] = urllib.parse.unquote(
127 video_variant.attrib['url'])
128 urls.append(video_variant.attrib['url'])
129 fmts, subs = self._extract_variant_formats(
130 video_variant.attrib, video_id)
131 formats.extend(fmts)
132 subtitles = self._merge_subtitles(subtitles, subs)
133 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
134 if video_url not in urls:
135 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
136 formats.extend(fmts)
137 subtitles = self._merge_subtitles(subtitles, subs)
138 return formats, subtitles
139
140 @staticmethod
141 def _search_dimensions_in_video_url(a_format, video_url):
142 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
143 if m:
144 a_format.update({
145 'width': int(m.group('width')),
146 'height': int(m.group('height')),
147 })
148
149 @property
150 def is_logged_in(self):
151 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
152
153 # XXX: Temporary workaround until twitter.com => x.com migration is completed
154 def _real_initialize(self):
155 if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
156 return
157 # User has not yet been migrated to x.com and has passed twitter.com cookies
158 TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
159 TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
160
161 @functools.cached_property
162 def _selected_api(self):
163 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
164
165 def _fetch_guest_token(self, display_id):
166 guest_token = traverse_obj(self._download_json(
167 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
168 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
169 ('guest_token', {str}))
170 if not guest_token:
171 raise ExtractorError('Could not retrieve guest token')
172 return guest_token
173
174 def _set_base_headers(self, legacy=False):
175 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
176 return filter_dict({
177 'Authorization': f'Bearer {bearer_token}',
178 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
179 })
180
181 def _call_login_api(self, note, headers, query={}, data=None):
182 response = self._download_json(
183 f'{self._API_BASE}onboarding/task.json', None, note,
184 headers=headers, query=query, data=data, expected_status=400)
185 error = traverse_obj(response, ('errors', 0, 'message', {str}))
186 if error:
187 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
188 elif traverse_obj(response, 'status') != 'success':
189 raise ExtractorError('Login was unsuccessful')
190
191 subtask = traverse_obj(
192 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
193 if not subtask:
194 raise ExtractorError('Twitter API did not return next login subtask')
195
196 self._flow_token = response['flow_token']
197
198 return subtask
199
200 def _perform_login(self, username, password):
201 if self.is_logged_in:
202 return
203
204 guest_token = self._fetch_guest_token(None)
205 headers = {
206 **self._set_base_headers(),
207 'content-type': 'application/json',
208 'x-guest-token': guest_token,
209 'x-twitter-client-language': 'en',
210 'x-twitter-active-user': 'yes',
211 'Referer': 'https://x.com/',
212 'Origin': 'https://x.com',
213 }
214
215 def build_login_json(*subtask_inputs):
216 return json.dumps({
217 'flow_token': self._flow_token,
218 'subtask_inputs': subtask_inputs,
219 }, separators=(',', ':')).encode()
220
221 def input_dict(subtask_id, text):
222 return {
223 'subtask_id': subtask_id,
224 'enter_text': {
225 'text': text,
226 'link': 'next_link',
227 },
228 }
229
230 next_subtask = self._call_login_api(
231 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
232
233 while not self.is_logged_in:
234 if next_subtask == 'LoginJsInstrumentationSubtask':
235 next_subtask = self._call_login_api(
236 'Submitting JS instrumentation response', headers, data=build_login_json({
237 'subtask_id': next_subtask,
238 'js_instrumentation': {
239 'response': '{}',
240 'link': 'next_link',
241 },
242 }))
243
244 elif next_subtask == 'LoginEnterUserIdentifierSSO':
245 next_subtask = self._call_login_api(
246 'Submitting username', headers, data=build_login_json({
247 'subtask_id': next_subtask,
248 'settings_list': {
249 'setting_responses': [{
250 'key': 'user_identifier',
251 'response_data': {
252 'text_data': {
253 'result': username,
254 },
255 },
256 }],
257 'link': 'next_link',
258 },
259 }))
260
261 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
262 next_subtask = self._call_login_api(
263 'Submitting alternate identifier', headers,
264 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
265 'one of username, phone number or email that was not used as --username'))))
266
267 elif next_subtask == 'LoginEnterPassword':
268 next_subtask = self._call_login_api(
269 'Submitting password', headers, data=build_login_json({
270 'subtask_id': next_subtask,
271 'enter_password': {
272 'password': password,
273 'link': 'next_link',
274 },
275 }))
276
277 elif next_subtask == 'AccountDuplicationCheck':
278 next_subtask = self._call_login_api(
279 'Submitting account duplication check', headers, data=build_login_json({
280 'subtask_id': next_subtask,
281 'check_logged_in_account': {
282 'link': 'AccountDuplicationCheck_false',
283 },
284 }))
285
286 elif next_subtask == 'LoginTwoFactorAuthChallenge':
287 next_subtask = self._call_login_api(
288 'Submitting 2FA token', headers, data=build_login_json(input_dict(
289 next_subtask, self._get_tfa_info('two-factor authentication token'))))
290
291 elif next_subtask == 'LoginAcid':
292 next_subtask = self._call_login_api(
293 'Submitting confirmation code', headers, data=build_login_json(input_dict(
294 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
295
296 elif next_subtask == 'ArkoseLogin':
297 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
298
299 elif next_subtask == 'DenyLoginSubtask':
300 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
301
302 elif next_subtask == 'LoginSuccessSubtask':
303 raise ExtractorError('Twitter API did not grant auth token cookie')
304
305 else:
306 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
307
308 self.report_login()
309
310 def _call_api(self, path, video_id, query={}, graphql=False):
311 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
312 headers.update({
313 'x-twitter-auth-type': 'OAuth2Session',
314 'x-twitter-client-language': 'en',
315 'x-twitter-active-user': 'yes',
316 } if self.is_logged_in else {
317 'x-guest-token': self._fetch_guest_token(video_id),
318 })
319 allowed_status = {400, 401, 403, 404} if graphql else {403}
320 result = self._download_json(
321 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
322 video_id, headers=headers, query=query, expected_status=allowed_status,
323 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
324
325 if result.get('errors'):
326 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
327 if errors and 'not authorized' in errors:
328 self.raise_login_required(remove_end(errors, '.'))
329 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
330
331 return result
332
333 def _build_graphql_query(self, media_id):
334 raise NotImplementedError('Method must be implemented to support GraphQL')
335
336 def _call_graphql_api(self, endpoint, media_id):
337 data = self._build_graphql_query(media_id)
338 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
339 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
340
341
342 class TwitterCardIE(InfoExtractor):
343 IE_NAME = 'twitter:card'
344 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
345 _TESTS = [
346 {
347 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
348 # MD5 checksums are different in different places
349 'info_dict': {
350 'id': '560070131976392705',
351 'ext': 'mp4',
352 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
353 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
354 'uploader': 'Twitter',
355 'uploader_id': 'Twitter',
356 'thumbnail': r're:^https?://.*\.jpg',
357 'duration': 30.033,
358 'timestamp': 1422366112,
359 'upload_date': '20150127',
360 'age_limit': 0,
361 'comment_count': int,
362 'tags': [],
363 'repost_count': int,
364 'like_count': int,
365 'display_id': '560070183650213889',
366 'uploader_url': 'https://twitter.com/Twitter',
367 },
368 },
369 {
370 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
371 'md5': '7137eca597f72b9abbe61e5ae0161399',
372 'info_dict': {
373 'id': '623160978427936768',
374 'ext': 'mp4',
375 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
376 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
377 'uploader': 'NASA',
378 'uploader_id': 'NASA',
379 'timestamp': 1437408129,
380 'upload_date': '20150720',
381 'uploader_url': 'https://twitter.com/NASA',
382 'age_limit': 0,
383 'comment_count': int,
384 'like_count': int,
385 'repost_count': int,
386 'tags': ['PlutoFlyby'],
387 },
388 'params': {'format': '[protocol=https]'},
389 },
390 {
391 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
392 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
393 'info_dict': {
394 'id': 'dq4Oj5quskI',
395 'ext': 'mp4',
396 'title': 'Ubuntu 11.10 Overview',
397 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
398 'upload_date': '20111013',
399 'uploader': 'OMG! UBUNTU!',
400 'uploader_id': 'omgubuntu',
401 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
402 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
403 'channel_follower_count': int,
404 'chapters': 'count:8',
405 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
406 'duration': 138,
407 'categories': ['Film & Animation'],
408 'age_limit': 0,
409 'comment_count': int,
410 'availability': 'public',
411 'like_count': int,
412 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
413 'view_count': int,
414 'tags': 'count:12',
415 'channel': 'OMG! UBUNTU!',
416 'playable_in_embed': True,
417 },
418 'add_ie': ['Youtube'],
419 },
420 {
421 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
422 'info_dict': {
423 'id': 'iBb2x00UVlv',
424 'ext': 'mp4',
425 'upload_date': '20151113',
426 'uploader_id': '1189339351084113920',
427 'uploader': 'ArsenalTerje',
428 'title': 'Vine by ArsenalTerje',
429 'timestamp': 1447451307,
430 'alt_title': 'Vine by ArsenalTerje',
431 'comment_count': int,
432 'like_count': int,
433 'thumbnail': r're:^https?://[^?#]+\.jpg',
434 'view_count': int,
435 'repost_count': int,
436 },
437 'add_ie': ['Vine'],
438 'params': {'skip_download': 'm3u8'},
439 },
440 {
441 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
442 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
443 'info_dict': {
444 'id': '705235433198714880',
445 'ext': 'mp4',
446 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
447 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
448 'uploader': 'Brent Yarina',
449 'uploader_id': 'BTNBrentYarina',
450 'timestamp': 1456976204,
451 'upload_date': '20160303',
452 },
453 'skip': 'This content is no longer available.',
454 },
455 {
456 'url': 'https://twitter.com/i/videos/752274308186120192',
457 'only_matching': True,
458 },
459 ]
460
461 def _real_extract(self, url):
462 status_id = self._match_id(url)
463 return self.url_result(
464 'https://twitter.com/statuses/' + status_id,
465 TwitterIE.ie_key(), status_id)
466
467
468 class TwitterIE(TwitterBaseIE):
469 IE_NAME = 'twitter'
470 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
471
472 _TESTS = [{
473 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
474 'info_dict': {
475 'id': '643211870443208704',
476 'display_id': '643211948184596480',
477 'ext': 'mp4',
478 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
479 'thumbnail': r're:^https?://.*\.jpg',
480 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
481 'channel_id': '549749560',
482 'uploader': 'FREE THE NIPPLE',
483 'uploader_id': 'freethenipple',
484 'duration': 12.922,
485 'timestamp': 1442188653,
486 'upload_date': '20150913',
487 'uploader_url': 'https://twitter.com/freethenipple',
488 'comment_count': int,
489 'repost_count': int,
490 'like_count': int,
491 'tags': [],
492 'age_limit': 18,
493 '_old_archive_ids': ['twitter 643211948184596480'],
494 },
495 'skip': 'Requires authentication',
496 }, {
497 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
498 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
499 'info_dict': {
500 'id': '657991469417025536',
501 'ext': 'mp4',
502 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
503 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
504 'thumbnail': r're:^https?://.*\.png',
505 'uploader': 'Gifs',
506 'uploader_id': 'giphz',
507 },
508 'expected_warnings': ['height', 'width'],
509 'skip': 'Account suspended',
510 }, {
511 'url': 'https://twitter.com/starwars/status/665052190608723968',
512 'info_dict': {
513 'id': '665052190608723968',
514 'display_id': '665052190608723968',
515 'ext': 'mp4',
516 'title': r're:Star Wars.*A new beginning is coming December 18.*',
517 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
518 'channel_id': '20106852',
519 'uploader_id': 'starwars',
520 'uploader': r're:Star Wars.*',
521 'timestamp': 1447395772,
522 'upload_date': '20151113',
523 'uploader_url': 'https://twitter.com/starwars',
524 'comment_count': int,
525 'repost_count': int,
526 'like_count': int,
527 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
528 'age_limit': 0,
529 '_old_archive_ids': ['twitter 665052190608723968'],
530 },
531 }, {
532 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
533 'info_dict': {
534 'id': '705235433198714880',
535 'ext': 'mp4',
536 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
537 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
538 'uploader_id': 'BTNBrentYarina',
539 'uploader': 'Brent Yarina',
540 'timestamp': 1456976204,
541 'upload_date': '20160303',
542 'uploader_url': 'https://twitter.com/BTNBrentYarina',
543 'comment_count': int,
544 'repost_count': int,
545 'like_count': int,
546 'tags': [],
547 'age_limit': 0,
548 },
549 'params': {
550 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
551 # Test case of TwitterCardIE
552 'skip_download': True,
553 },
554 'skip': 'Dead external link',
555 }, {
556 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
557 'info_dict': {
558 'id': '700207414000242688',
559 'display_id': '700207533655363584',
560 'ext': 'mp4',
561 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
562 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
563 'thumbnail': r're:^https?://.*\.jpg',
564 'channel_id': '1383165541',
565 'uploader': 'jaydin donte geer',
566 'uploader_id': 'jaydingeer',
567 'duration': 30.0,
568 'timestamp': 1455777459,
569 'upload_date': '20160218',
570 'uploader_url': 'https://twitter.com/jaydingeer',
571 'comment_count': int,
572 'repost_count': int,
573 'like_count': int,
574 'tags': ['Damndaniel'],
575 'age_limit': 0,
576 '_old_archive_ids': ['twitter 700207533655363584'],
577 },
578 }, {
579 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
580 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
581 'info_dict': {
582 'id': 'MIOxnrUteUd',
583 'ext': 'mp4',
584 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
585 'uploader': 'TAKUMA',
586 'uploader_id': '1004126642786242560',
587 'timestamp': 1402826626,
588 'upload_date': '20140615',
589 'thumbnail': r're:^https?://.*\.jpg',
590 'alt_title': 'Vine by TAKUMA',
591 'comment_count': int,
592 'repost_count': int,
593 'like_count': int,
594 'view_count': int,
595 },
596 'add_ie': ['Vine'],
597 }, {
598 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
599 'info_dict': {
600 'id': '717462543795523584',
601 'display_id': '719944021058060289',
602 'ext': 'mp4',
603 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
604 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
605 'channel_id': '701615052',
606 'uploader_id': 'CaptainAmerica',
607 'uploader': 'Captain America',
608 'duration': 3.17,
609 'timestamp': 1460483005,
610 'upload_date': '20160412',
611 'uploader_url': 'https://twitter.com/CaptainAmerica',
612 'thumbnail': r're:^https?://.*\.jpg',
613 'comment_count': int,
614 'repost_count': int,
615 'like_count': int,
616 'tags': [],
617 'age_limit': 0,
618 '_old_archive_ids': ['twitter 719944021058060289'],
619 },
620 }, {
621 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
622 'info_dict': {
623 'id': '1zqKVVlkqLaKB',
624 'ext': 'mp4',
625 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
626 'upload_date': '20160923',
627 'uploader_id': '1PmKqpJdOJQoY',
628 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
629 'timestamp': 1474613214,
630 'thumbnail': r're:^https?://.*\.jpg',
631 },
632 'add_ie': ['Periscope'],
633 'skip': 'Broadcast not found',
634 }, {
635 # has mp4 formats via mobile API
636 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
637 'info_dict': {
638 'id': '852077943283097602',
639 'ext': 'mp4',
640 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
641 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
642 'channel_id': '2526757026',
643 'uploader': 'عالم الأخبار',
644 'uploader_id': 'news_al3alm',
645 'duration': 277.4,
646 'timestamp': 1492000653,
647 'upload_date': '20170412',
648 'display_id': '852138619213144067',
649 'age_limit': 0,
650 'uploader_url': 'https://twitter.com/news_al3alm',
651 'thumbnail': r're:^https?://.*\.jpg',
652 'tags': [],
653 'repost_count': int,
654 'like_count': int,
655 'comment_count': int,
656 '_old_archive_ids': ['twitter 852138619213144067'],
657 },
658 }, {
659 'url': 'https://twitter.com/i/web/status/910031516746514432',
660 'info_dict': {
661 'id': '910030238373089285',
662 'display_id': '910031516746514432',
663 'ext': 'mp4',
664 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
665 'thumbnail': r're:^https?://.*\.jpg',
666 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
667 'channel_id': '2319432498',
668 'uploader': 'Préfet de Guadeloupe',
669 'uploader_id': 'Prefet971',
670 'duration': 47.48,
671 'timestamp': 1505803395,
672 'upload_date': '20170919',
673 'uploader_url': 'https://twitter.com/Prefet971',
674 'comment_count': int,
675 'repost_count': int,
676 'like_count': int,
677 'tags': ['Maria'],
678 'age_limit': 0,
679 '_old_archive_ids': ['twitter 910031516746514432'],
680 },
681 'params': {
682 'skip_download': True, # requires ffmpeg
683 },
684 }, {
685 # card via api.twitter.com/1.1/videos/tweet/config
686 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
687 'info_dict': {
688 'id': '1001551417340022785',
689 'display_id': '1001551623938805763',
690 'ext': 'mp4',
691 'title': 're:.*?Shep is on a roll today.*?',
692 'thumbnail': r're:^https?://.*\.jpg',
693 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
694 'channel_id': '255036353',
695 'uploader': 'Lis Power',
696 'uploader_id': 'LisPower1',
697 'duration': 111.278,
698 'timestamp': 1527623489,
699 'upload_date': '20180529',
700 'uploader_url': 'https://twitter.com/LisPower1',
701 'comment_count': int,
702 'repost_count': int,
703 'like_count': int,
704 'tags': [],
705 'age_limit': 0,
706 '_old_archive_ids': ['twitter 1001551623938805763'],
707 },
708 'params': {
709 'skip_download': True, # requires ffmpeg
710 },
711 }, {
712 'url': 'https://twitter.com/foobar/status/1087791357756956680',
713 'info_dict': {
714 'id': '1087791272830607360',
715 'display_id': '1087791357756956680',
716 'ext': 'mp4',
717 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
718 'thumbnail': r're:^https?://.*\.jpg',
719 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
720 'uploader': 'X',
721 'uploader_id': 'X',
722 'duration': 61.567,
723 'timestamp': 1548184644,
724 'upload_date': '20190122',
725 'uploader_url': 'https://twitter.com/X',
726 'comment_count': int,
727 'repost_count': int,
728 'like_count': int,
729 'view_count': int,
730 'tags': [],
731 'age_limit': 0,
732 },
733 'skip': 'This Tweet is unavailable',
734 }, {
735 # not available in Periscope
736 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
737 'info_dict': {
738 'id': '1vOGwqejwoWxB',
739 'ext': 'mp4',
740 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
741 'uploader': 'Vivi',
742 'uploader_id': '1eVjYOLGkGrQL',
743 'thumbnail': r're:^https?://.*\.jpg',
744 'tags': ['EduTECH2019'],
745 'view_count': int,
746 },
747 'add_ie': ['TwitterBroadcast'],
748 'skip': 'Broadcast no longer exists',
749 }, {
750 # unified card
751 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
752 'info_dict': {
753 'id': '1349774757969989634',
754 'display_id': '1349794411333394432',
755 'ext': 'mp4',
756 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
757 'thumbnail': r're:^https?://.*\.jpg',
758 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
759 'channel_id': '18552281',
760 'uploader': 'Brooklyn Nets',
761 'uploader_id': 'BrooklynNets',
762 'duration': 324.484,
763 'timestamp': 1610651040,
764 'upload_date': '20210114',
765 'uploader_url': 'https://twitter.com/BrooklynNets',
766 'comment_count': int,
767 'repost_count': int,
768 'like_count': int,
769 'tags': [],
770 'age_limit': 0,
771 '_old_archive_ids': ['twitter 1349794411333394432'],
772 },
773 'params': {
774 'skip_download': True,
775 },
776 }, {
777 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
778 'info_dict': {
779 'id': '1577855447914409984',
780 'display_id': '1577855540407197696',
781 'ext': 'mp4',
782 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
783 'description': 'md5:b9c3699335447391d11753ab21c70a74',
784 'upload_date': '20221006',
785 'channel_id': '143077138',
786 'uploader': 'Oshtru',
787 'uploader_id': 'oshtru',
788 'uploader_url': 'https://twitter.com/oshtru',
789 'thumbnail': r're:^https?://.*\.jpg',
790 'duration': 30.03,
791 'timestamp': 1665025050,
792 'comment_count': int,
793 'repost_count': int,
794 'like_count': int,
795 'tags': [],
796 'age_limit': 0,
797 '_old_archive_ids': ['twitter 1577855540407197696'],
798 },
799 'params': {'skip_download': True},
800 }, {
801 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
802 'info_dict': {
803 'id': '1577719286659006464',
804 'title': 'Ultima Reload - Test',
805 'description': 'Test https://t.co/Y3KEZD7Dad',
806 'channel_id': '168922496',
807 'uploader': 'Ultima Reload',
808 'uploader_id': 'UltimaShadowX',
809 'uploader_url': 'https://twitter.com/UltimaShadowX',
810 'upload_date': '20221005',
811 'timestamp': 1664992565,
812 'comment_count': int,
813 'repost_count': int,
814 'like_count': int,
815 'tags': [],
816 'age_limit': 0,
817 },
818 'playlist_count': 4,
819 'params': {'skip_download': True},
820 }, {
821 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
822 'info_dict': {
823 'id': '1575559336759263233',
824 'display_id': '1575560063510810624',
825 'ext': 'mp4',
826 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
827 'thumbnail': r're:^https?://.*\.jpg',
828 'description': 'md5:95aea692fda36a12081b9629b02daa92',
829 'channel_id': '1094109584',
830 'uploader': 'Max Olson',
831 'uploader_id': 'MesoMax919',
832 'uploader_url': 'https://twitter.com/MesoMax919',
833 'duration': 21.321,
834 'timestamp': 1664477766,
835 'upload_date': '20220929',
836 'comment_count': int,
837 'repost_count': int,
838 'like_count': int,
839 'tags': ['HurricaneIan'],
840 'age_limit': 0,
841 '_old_archive_ids': ['twitter 1575560063510810624'],
842 },
843 }, {
844 # Adult content, fails if not logged in
845 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
846 'info_dict': {
847 'id': '1575199163847000068',
848 'display_id': '1575199173472927762',
849 'ext': 'mp4',
850 'title': str,
851 'description': str,
852 'channel_id': '1217167793541480450',
853 'uploader': str,
854 'uploader_id': 'Rizdraws',
855 'uploader_url': 'https://twitter.com/Rizdraws',
856 'upload_date': '20220928',
857 'timestamp': 1664391723,
858 'thumbnail': r're:^https?://.+\.jpg',
859 'like_count': int,
860 'repost_count': int,
861 'comment_count': int,
862 'age_limit': 18,
863 'tags': [],
864 '_old_archive_ids': ['twitter 1575199173472927762'],
865 },
866 'params': {'skip_download': 'The media could not be played'},
867 'skip': 'Requires authentication',
868 }, {
869 # Playlist result only with graphql API
870 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
871 'playlist_mincount': 2,
872 'info_dict': {
873 'id': '1395079556562706435',
874 'title': str,
875 'tags': [],
876 'channel_id': '21539378',
877 'uploader': str,
878 'like_count': int,
879 'upload_date': '20210519',
880 'age_limit': 0,
881 'repost_count': int,
882 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
883 'uploader_id': 'Srirachachau',
884 'comment_count': int,
885 'uploader_url': 'https://twitter.com/Srirachachau',
886 'timestamp': 1621447860,
887 },
888 }, {
889 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
890 'playlist_mincount': 2,
891 'info_dict': {
892 'id': '1578353380363501568',
893 'title': str,
894 'channel_id': '2195866214',
895 'uploader_id': 'DavidToons_',
896 'repost_count': int,
897 'like_count': int,
898 'uploader': str,
899 'timestamp': 1665143744,
900 'uploader_url': 'https://twitter.com/DavidToons_',
901 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
902 'tags': [],
903 'comment_count': int,
904 'upload_date': '20221007',
905 'age_limit': 0,
906 },
907 }, {
908 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
909 'playlist_count': 2,
910 'info_dict': {
911 'id': '1578401165338976258',
912 'title': str,
913 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
914 'channel_id': '19338359',
915 'uploader': str,
916 'uploader_id': 'primevideouk',
917 'timestamp': 1665155137,
918 'upload_date': '20221007',
919 'age_limit': 0,
920 'uploader_url': 'https://twitter.com/primevideouk',
921 'comment_count': int,
922 'repost_count': int,
923 'like_count': int,
924 'tags': ['TheRingsOfPower'],
925 },
926 }, {
927 # Twitter Spaces
928 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
929 'info_dict': {
930 'id': '1lPJqmBeeNAJb',
931 'ext': 'm4a',
932 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
933 'uploader': r're:Monique Camarra.+?',
934 'uploader_id': 'MoniqueCamarra',
935 'live_status': 'was_live',
936 'release_timestamp': 1658417414,
937 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
938 'timestamp': 1658407771,
939 'release_date': '20220721',
940 'upload_date': '20220721',
941 },
942 'add_ie': ['TwitterSpaces'],
943 'params': {'skip_download': 'm3u8'},
944 'skip': 'Requires authentication',
945 }, {
946 # URL specifies video number but --yes-playlist
947 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
948 'playlist_mincount': 2,
949 'info_dict': {
950 'id': '1600649710662213632',
951 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
952 'timestamp': 1670459604.0,
953 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
954 'comment_count': int,
955 'uploader_id': 'CTVJLaidlaw',
956 'channel_id': '80082014',
957 'repost_count': int,
958 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
959 'upload_date': '20221208',
960 'age_limit': 0,
961 'uploader': 'Jocelyn Laidlaw',
962 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
963 'like_count': int,
964 },
965 }, {
966 # URL specifies video number and --no-playlist
967 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
968 'info_dict': {
969 'id': '1600649511827013632',
970 'ext': 'mp4',
971 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
972 'thumbnail': r're:^https?://.+\.jpg',
973 'timestamp': 1670459604.0,
974 'channel_id': '80082014',
975 'uploader_id': 'CTVJLaidlaw',
976 'uploader': 'Jocelyn Laidlaw',
977 'repost_count': int,
978 'comment_count': int,
979 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
980 'duration': 102.226,
981 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
982 'display_id': '1600649710662213632',
983 'like_count': int,
984 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
985 'upload_date': '20221208',
986 'age_limit': 0,
987 '_old_archive_ids': ['twitter 1600649710662213632'],
988 },
989 'params': {'noplaylist': True},
990 }, {
991 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
992 # note the id different between extraction and url
993 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
994 'info_dict': {
995 'id': '1621117577354424321',
996 'display_id': '1621117700482416640',
997 'ext': 'mp4',
998 'title': '뽀 - 아 최우제 이동속도 봐',
999 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
1000 'duration': 24.598,
1001 'channel_id': '1281839411068432384',
1002 'uploader': '뽀',
1003 'uploader_id': 's2FAKER',
1004 'uploader_url': 'https://twitter.com/s2FAKER',
1005 'upload_date': '20230202',
1006 'timestamp': 1675339553.0,
1007 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
1008 'age_limit': 18,
1009 'tags': [],
1010 'like_count': int,
1011 'repost_count': int,
1012 'comment_count': int,
1013 '_old_archive_ids': ['twitter 1621117700482416640'],
1014 },
1015 'skip': 'Requires authentication',
1016 }, {
1017 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1018 'info_dict': {
1019 'id': '1599108643743473680',
1020 'display_id': '1599108751385972737',
1021 'ext': 'mp4',
1022 'title': '\u06ea - \U0001F48B',
1023 'channel_id': '1347791436809441283',
1024 'uploader_url': 'https://twitter.com/hlo_again',
1025 'like_count': int,
1026 'uploader_id': 'hlo_again',
1027 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1028 'repost_count': int,
1029 'duration': 9.531,
1030 'comment_count': int,
1031 'upload_date': '20221203',
1032 'age_limit': 0,
1033 'timestamp': 1670092210.0,
1034 'tags': [],
1035 'uploader': '\u06ea',
1036 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1037 '_old_archive_ids': ['twitter 1599108751385972737'],
1038 },
1039 'params': {'noplaylist': True},
1040 }, {
1041 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1042 'info_dict': {
1043 'id': '1600009362759733248',
1044 'display_id': '1600009574919962625',
1045 'ext': 'mp4',
1046 'channel_id': '211814412',
1047 'uploader_url': 'https://twitter.com/MunTheShinobi',
1048 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1049 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1050 'age_limit': 0,
1051 'uploader': 'Mün',
1052 'repost_count': int,
1053 'upload_date': '20221206',
1054 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1055 'comment_count': int,
1056 'like_count': int,
1057 'tags': [],
1058 'uploader_id': 'MunTheShinobi',
1059 'duration': 139.987,
1060 'timestamp': 1670306984.0,
1061 '_old_archive_ids': ['twitter 1600009574919962625'],
1062 },
1063 }, {
1064 # retweeted_status (private)
1065 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1066 'info_dict': {
1067 'id': '1623274794488659969',
1068 'display_id': '1623739803874349067',
1069 'ext': 'mp4',
1070 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1071 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1072 'uploader': 'Johnny Bullets',
1073 'uploader_id': 'Johnnybull3ts',
1074 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1075 'age_limit': 0,
1076 'tags': [],
1077 'duration': 8.033,
1078 'timestamp': 1675853859.0,
1079 'upload_date': '20230208',
1080 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1081 'like_count': int,
1082 'repost_count': int,
1083 },
1084 'skip': 'Protected tweet',
1085 }, {
1086 # retweeted_status
1087 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1088 'info_dict': {
1089 'id': '1694928337846538240',
1090 'ext': 'mp4',
1091 'display_id': '1695424220702888009',
1092 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1093 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1094 'channel_id': '15212187',
1095 'uploader': 'Benny Johnson',
1096 'uploader_id': 'bennyjohnson',
1097 'uploader_url': 'https://twitter.com/bennyjohnson',
1098 'age_limit': 0,
1099 'tags': [],
1100 'duration': 45.001,
1101 'timestamp': 1692962814.0,
1102 'upload_date': '20230825',
1103 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1104 'like_count': int,
1105 'repost_count': int,
1106 'comment_count': int,
1107 '_old_archive_ids': ['twitter 1695424220702888009'],
1108 },
1109 }, {
1110 # retweeted_status w/ legacy API
1111 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1112 'info_dict': {
1113 'id': '1694928337846538240',
1114 'ext': 'mp4',
1115 'display_id': '1695424220702888009',
1116 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1117 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1118 'channel_id': '15212187',
1119 'uploader': 'Benny Johnson',
1120 'uploader_id': 'bennyjohnson',
1121 'uploader_url': 'https://twitter.com/bennyjohnson',
1122 'age_limit': 0,
1123 'tags': [],
1124 'duration': 45.001,
1125 'timestamp': 1692962814.0,
1126 'upload_date': '20230825',
1127 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1128 'like_count': int,
1129 'repost_count': int,
1130 '_old_archive_ids': ['twitter 1695424220702888009'],
1131 },
1132 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1133 }, {
1134 # Broadcast embedded in tweet
1135 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1136 'info_dict': {
1137 'id': '1rmxPMjLzAXKN',
1138 'ext': 'mp4',
1139 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1140 'uploader': 'Jessica Dobson',
1141 'uploader_id': 'JessicaDobsonWX',
1142 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1143 'timestamp': 1701566398,
1144 'upload_date': '20231203',
1145 'live_status': 'was_live',
1146 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1147 'concurrent_view_count': int,
1148 'view_count': int,
1149 },
1150 'add_ie': ['TwitterBroadcast'],
1151 }, {
1152 # Animated gif and quote tweet video
1153 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1154 'playlist_mincount': 2,
1155 'info_dict': {
1156 'id': '1696256659889565950',
1157 'title': 'BAKOON - https://t.co/zom968d0a0',
1158 'description': 'https://t.co/zom968d0a0',
1159 'tags': [],
1160 'channel_id': '1263540390',
1161 'uploader': 'BAKOON',
1162 'uploader_id': 'BAKKOOONN',
1163 'uploader_url': 'https://twitter.com/BAKKOOONN',
1164 'age_limit': 18,
1165 'timestamp': 1693254077.0,
1166 'upload_date': '20230828',
1167 'like_count': int,
1168 'comment_count': int,
1169 'repost_count': int,
1170 },
1171 'skip': 'Requires authentication',
1172 }, {
1173 # "stale tweet" with typename "TweetWithVisibilityResults"
1174 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1175 'md5': '511377ff8dfa7545307084dca4dce319',
1176 'info_dict': {
1177 'id': '1724883339285544960',
1178 'ext': 'mp4',
1179 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1180 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1181 'display_id': '1724884212803834154',
1182 'channel_id': '337808606',
1183 'uploader': 'Robert F. Kennedy Jr',
1184 'uploader_id': 'RobertKennedyJr',
1185 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1186 'upload_date': '20231115',
1187 'timestamp': 1700079417.0,
1188 'duration': 341.048,
1189 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1190 'tags': ['Kennedy24'],
1191 'repost_count': int,
1192 'like_count': int,
1193 'comment_count': int,
1194 'age_limit': 0,
1195 '_old_archive_ids': ['twitter 1724884212803834154'],
1196 },
1197 }, {
1198 # x.com
1199 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1200 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1201 'info_dict': {
1202 'id': '1790637589910654976',
1203 'ext': 'mp4',
1204 'title': 'Historic Vids - One of the most intense moments in history',
1205 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1206 'display_id': '1790637656616943991',
1207 'uploader': 'Historic Vids',
1208 'uploader_id': 'historyinmemes',
1209 'uploader_url': 'https://twitter.com/historyinmemes',
1210 'channel_id': '855481986290524160',
1211 'upload_date': '20240515',
1212 'timestamp': 1715756260.0,
1213 'duration': 15.488,
1214 'tags': [],
1215 'comment_count': int,
1216 'repost_count': int,
1217 'like_count': int,
1218 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1219 'age_limit': 0,
1220 '_old_archive_ids': ['twitter 1790637656616943991'],
1221 },
1222 }, {
1223 # onion route
1224 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1225 'only_matching': True,
1226 }, {
1227 # Twitch Clip Embed
1228 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1229 'only_matching': True,
1230 }, {
1231 # promo_video_website card
1232 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1233 'only_matching': True,
1234 }, {
1235 # promo_video_convo card
1236 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1237 'only_matching': True,
1238 }, {
1239 # appplayer card
1240 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1241 'only_matching': True,
1242 }, {
1243 # video_direct_message card
1244 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1245 'only_matching': True,
1246 }, {
1247 # poll2choice_video card
1248 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1249 'only_matching': True,
1250 }, {
1251 # poll3choice_video card
1252 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1253 'only_matching': True,
1254 }, {
1255 # poll4choice_video card
1256 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1257 'only_matching': True,
1258 }]
1259
1260 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1261
1262 @property
1263 def _GRAPHQL_ENDPOINT(self):
1264 if self.is_logged_in:
1265 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1266 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1267
1268 def _graphql_to_legacy(self, data, twid):
1269 result = traverse_obj(data, (
1270 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1271 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1272 'tweet_results', 'result', ('tweet', None), {dict},
1273 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1274 data, ('tweetResult', 'result', {dict}), default={})
1275
1276 typename = result.get('__typename')
1277 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1278 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1279
1280 if 'tombstone' in result:
1281 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1282 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1283 elif typename == 'TweetUnavailable':
1284 reason = result.get('reason')
1285 if reason == 'NsfwLoggedOut':
1286 self.raise_login_required('NSFW tweet requires authentication')
1287 elif reason == 'Protected':
1288 self.raise_login_required('You are not authorized to view this protected tweet')
1289 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1290 # Result for "stale tweet" needs additional transformation
1291 elif typename == 'TweetWithVisibilityResults':
1292 result = traverse_obj(result, ('tweet', {dict})) or {}
1293
1294 status = result.get('legacy', {})
1295 status.update(traverse_obj(result, {
1296 'user': ('core', 'user_results', 'result', 'legacy'),
1297 'card': ('card', 'legacy'),
1298 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1299 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1300 }, expected_type=dict, default={}))
1301
1302 # extra transformations needed since result does not match legacy format
1303 if status.get('retweeted_status'):
1304 status['retweeted_status']['user'] = traverse_obj(status, (
1305 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1306
1307 binding_values = {
1308 binding_value.get('key'): binding_value.get('value')
1309 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1310 }
1311 if binding_values:
1312 status['card']['binding_values'] = binding_values
1313
1314 return status
1315
1316 def _build_graphql_query(self, media_id):
1317 return {
1318 'variables': {
1319 'focalTweetId': media_id,
1320 'includePromotedContent': True,
1321 'with_rux_injections': False,
1322 'withBirdwatchNotes': True,
1323 'withCommunity': True,
1324 'withDownvotePerspective': False,
1325 'withQuickPromoteEligibilityTweetFields': True,
1326 'withReactionsMetadata': False,
1327 'withReactionsPerspective': False,
1328 'withSuperFollowsTweetFields': True,
1329 'withSuperFollowsUserFields': True,
1330 'withV2Timeline': True,
1331 'withVoice': True,
1332 },
1333 'features': {
1334 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1335 'interactive_text_enabled': True,
1336 'responsive_web_edit_tweet_api_enabled': True,
1337 'responsive_web_enhance_cards_enabled': True,
1338 'responsive_web_graphql_timeline_navigation_enabled': False,
1339 'responsive_web_text_conversations_enabled': False,
1340 'responsive_web_uc_gql_enabled': True,
1341 'standardized_nudges_misinfo': True,
1342 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1343 'tweetypie_unmention_optimization_enabled': True,
1344 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1345 'verified_phone_label_enabled': False,
1346 'vibe_api_enabled': True,
1347 },
1348 } if self.is_logged_in else {
1349 'variables': {
1350 'tweetId': media_id,
1351 'withCommunity': False,
1352 'includePromotedContent': False,
1353 'withVoice': False,
1354 },
1355 'features': {
1356 'creator_subscriptions_tweet_preview_api_enabled': True,
1357 'tweetypie_unmention_optimization_enabled': True,
1358 'responsive_web_edit_tweet_api_enabled': True,
1359 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1360 'view_counts_everywhere_api_enabled': True,
1361 'longform_notetweets_consumption_enabled': True,
1362 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1363 'tweet_awards_web_tipping_enabled': False,
1364 'freedom_of_speech_not_reach_fetch_enabled': True,
1365 'standardized_nudges_misinfo': True,
1366 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1367 'longform_notetweets_rich_text_read_enabled': True,
1368 'longform_notetweets_inline_media_enabled': True,
1369 'responsive_web_graphql_exclude_directive_enabled': True,
1370 'verified_phone_label_enabled': False,
1371 'responsive_web_media_download_video_enabled': False,
1372 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1373 'responsive_web_graphql_timeline_navigation_enabled': True,
1374 'responsive_web_enhance_cards_enabled': False,
1375 },
1376 'fieldToggles': {
1377 'withArticleRichContentState': False,
1378 },
1379 }
1380
1381 def _call_syndication_api(self, twid):
1382 self.report_warning(
1383 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1384 status = self._download_json(
1385 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1386 headers={'User-Agent': 'Googlebot'}, query={
1387 'id': twid,
1388 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1389 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1390 })
1391 if not status:
1392 raise ExtractorError('Syndication endpoint returned empty JSON response')
1393 # Transform the result so its structure matches that of legacy/graphql
1394 media = []
1395 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1396 detail['id_str'] = traverse_obj(detail, (
1397 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1398 media.append(detail)
1399 status['extended_entities'] = {'media': media}
1400
1401 return status
1402
1403 def _extract_status(self, twid):
1404 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1405 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1406
1407 try:
1408 if self.is_logged_in or self._selected_api == 'graphql':
1409 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1410 elif self._selected_api == 'legacy':
1411 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1412 'cards_platform': 'Web-12',
1413 'include_cards': 1,
1414 'include_reply_count': 1,
1415 'include_user_entities': 0,
1416 'tweet_mode': 'extended',
1417 })
1418 except ExtractorError as e:
1419 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1420 raise
1421 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1422 status = self._call_syndication_api(twid)
1423
1424 if self._selected_api == 'syndication':
1425 status = self._call_syndication_api(twid)
1426
1427 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1428
1429 def _real_extract(self, url):
1430 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1431 status = self._extract_status(twid)
1432
1433 title = description = traverse_obj(
1434 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1435 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1436 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1437 user = status.get('user') or {}
1438 uploader = user.get('name')
1439 if uploader:
1440 title = f'{uploader} - {title}'
1441 uploader_id = user.get('screen_name')
1442
1443 info = {
1444 'id': twid,
1445 'title': title,
1446 'description': description,
1447 'uploader': uploader,
1448 'timestamp': unified_timestamp(status.get('created_at')),
1449 'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
1450 'uploader_id': uploader_id,
1451 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1452 'like_count': int_or_none(status.get('favorite_count')),
1453 'repost_count': int_or_none(status.get('retweet_count')),
1454 'comment_count': int_or_none(status.get('reply_count')),
1455 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1456 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1457 }
1458
1459 def extract_from_video_info(media):
1460 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1461 self.write_debug(f'Extracting from video info: {media_id}')
1462
1463 formats = []
1464 subtitles = {}
1465 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1466 fmts, subs = self._extract_variant_formats(variant, twid)
1467 subtitles = self._merge_subtitles(subtitles, subs)
1468 formats.extend(fmts)
1469
1470 thumbnails = []
1471 media_url = media.get('media_url_https') or media.get('media_url')
1472 if media_url:
1473 def add_thumbnail(name, size):
1474 thumbnails.append({
1475 'id': name,
1476 'url': update_url_query(media_url, {'name': name}),
1477 'width': int_or_none(size.get('w') or size.get('width')),
1478 'height': int_or_none(size.get('h') or size.get('height')),
1479 })
1480 for name, size in media.get('sizes', {}).items():
1481 add_thumbnail(name, size)
1482 add_thumbnail('orig', media.get('original_info') or {})
1483
1484 return {
1485 'id': media_id,
1486 'formats': formats,
1487 'subtitles': subtitles,
1488 'thumbnails': thumbnails,
1489 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
1490 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1491 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1492 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1493 }
1494
1495 def extract_from_card_info(card):
1496 if not card:
1497 return
1498
1499 self.write_debug(f'Extracting from card info: {card.get("url")}')
1500 binding_values = card['binding_values']
1501
1502 def get_binding_value(k):
1503 o = binding_values.get(k) or {}
1504 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1505
1506 card_name = card['name'].split(':')[-1]
1507 if card_name == 'player':
1508 yield {
1509 '_type': 'url',
1510 'url': get_binding_value('player_url'),
1511 }
1512 elif card_name == 'periscope_broadcast':
1513 yield {
1514 '_type': 'url',
1515 'url': get_binding_value('url') or get_binding_value('player_url'),
1516 'ie_key': PeriscopeIE.ie_key(),
1517 }
1518 elif card_name == 'broadcast':
1519 yield {
1520 '_type': 'url',
1521 'url': get_binding_value('broadcast_url'),
1522 'ie_key': TwitterBroadcastIE.ie_key(),
1523 }
1524 elif card_name == 'audiospace':
1525 yield {
1526 '_type': 'url',
1527 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1528 'ie_key': TwitterSpacesIE.ie_key(),
1529 }
1530 elif card_name == 'summary':
1531 yield {
1532 '_type': 'url',
1533 'url': get_binding_value('card_url'),
1534 }
1535 elif card_name == 'unified_card':
1536 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1537 yield from map(extract_from_video_info, traverse_obj(
1538 unified_card, ('media_entities', ...), expected_type=dict))
1539 # amplify, promo_video_website, promo_video_convo, appplayer,
1540 # video_direct_message, poll2choice_video, poll3choice_video,
1541 # poll4choice_video, ...
1542 else:
1543 is_amplify = card_name == 'amplify'
1544 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1545 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1546 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1547
1548 thumbnails = []
1549 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1550 image = get_binding_value('player_image' + suffix) or {}
1551 image_url = image.get('url')
1552 if not image_url or '/player-placeholder' in image_url:
1553 continue
1554 thumbnails.append({
1555 'id': suffix[1:] if suffix else 'medium',
1556 'url': image_url,
1557 'width': int_or_none(image.get('width')),
1558 'height': int_or_none(image.get('height')),
1559 })
1560
1561 yield {
1562 'formats': formats,
1563 'subtitles': subtitles,
1564 'thumbnails': thumbnails,
1565 'duration': int_or_none(get_binding_value(
1566 'content_duration_seconds')),
1567 }
1568
1569 videos = traverse_obj(status, (
1570 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1571
1572 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1573 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1574 else:
1575 desired_obj = traverse_obj(status, (
1576 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1577 if not desired_obj:
1578 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1579 elif desired_obj.get('type') != 'video':
1580 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1581
1582 # Restore original archive id and video index in title
1583 for index, entry in enumerate(videos, 1):
1584 if entry.get('id') != desired_obj.get('id'):
1585 continue
1586 if index == 1:
1587 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1588 if len(videos) != 1:
1589 info['title'] += f' #{index}'
1590 break
1591
1592 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1593
1594 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1595 if not entries:
1596 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1597 if not expanded_url or expanded_url == url:
1598 self.raise_no_formats('No video could be found in this tweet', expected=True)
1599 return info
1600
1601 return self.url_result(expanded_url, display_id=twid, **info)
1602
1603 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1604
1605 if len(entries) == 1:
1606 return entries[0]
1607
1608 for index, entry in enumerate(entries, 1):
1609 entry['title'] += f' #{index}'
1610
1611 return self.playlist_result(entries, **info)
1612
1613
1614 class TwitterAmplifyIE(TwitterBaseIE):
1615 IE_NAME = 'twitter:amplify'
1616 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1617
1618 _TEST = {
1619 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1620 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1621 'info_dict': {
1622 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1623 'ext': 'mp4',
1624 'title': 'Twitter Video',
1625 'thumbnail': 're:^https?://.*',
1626 },
1627 'params': {'format': '[protocol=https]'},
1628 }
1629
1630 def _real_extract(self, url):
1631 video_id = self._match_id(url)
1632 webpage = self._download_webpage(url, video_id)
1633
1634 vmap_url = self._html_search_meta(
1635 'twitter:amplify:vmap', webpage, 'vmap url')
1636 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1637
1638 thumbnails = []
1639 thumbnail = self._html_search_meta(
1640 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1641
1642 def _find_dimension(target):
1643 w = int_or_none(self._html_search_meta(
1644 f'twitter:{target}:width', webpage, fatal=False))
1645 h = int_or_none(self._html_search_meta(
1646 f'twitter:{target}:height', webpage, fatal=False))
1647 return w, h
1648
1649 if thumbnail:
1650 thumbnail_w, thumbnail_h = _find_dimension('image')
1651 thumbnails.append({
1652 'url': thumbnail,
1653 'width': thumbnail_w,
1654 'height': thumbnail_h,
1655 })
1656
1657 video_w, video_h = _find_dimension('player')
1658 formats[0].update({
1659 'width': video_w,
1660 'height': video_h,
1661 })
1662
1663 return {
1664 'id': video_id,
1665 'title': 'Twitter Video',
1666 'formats': formats,
1667 'thumbnails': thumbnails,
1668 }
1669
1670
1671 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1672 IE_NAME = 'twitter:broadcast'
1673 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1674
1675 _TESTS = [{
1676 # untitled Periscope video
1677 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1678 'info_dict': {
1679 'id': '1yNGaQLWpejGj',
1680 'ext': 'mp4',
1681 'title': 'Andrea May Sahouri - Periscope Broadcast',
1682 'uploader': 'Andrea May Sahouri',
1683 'uploader_id': 'andreamsahouri',
1684 'uploader_url': 'https://twitter.com/andreamsahouri',
1685 'timestamp': 1590973638,
1686 'upload_date': '20200601',
1687 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1688 'view_count': int,
1689 },
1690 }, {
1691 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1692 'info_dict': {
1693 'id': '1ZkKzeyrPbaxv',
1694 'ext': 'mp4',
1695 'title': 'Starship | SN10 | High-Altitude Flight Test',
1696 'uploader': 'SpaceX',
1697 'uploader_id': 'SpaceX',
1698 'uploader_url': 'https://twitter.com/SpaceX',
1699 'timestamp': 1614812942,
1700 'upload_date': '20210303',
1701 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1702 'view_count': int,
1703 },
1704 }, {
1705 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1706 'info_dict': {
1707 'id': '1OyKAVQrgzwGb',
1708 'ext': 'mp4',
1709 'title': 'Starship Flight Test',
1710 'uploader': 'SpaceX',
1711 'uploader_id': 'SpaceX',
1712 'uploader_url': 'https://twitter.com/SpaceX',
1713 'timestamp': 1681993964,
1714 'upload_date': '20230420',
1715 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1716 'view_count': int,
1717 },
1718 }]
1719
1720 def _real_extract(self, url):
1721 broadcast_id = self._match_id(url)
1722 broadcast = self._call_api(
1723 'broadcasts/show.json', broadcast_id,
1724 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1725 if not broadcast:
1726 raise ExtractorError('Broadcast no longer exists', expected=True)
1727 info = self._parse_broadcast_data(broadcast, broadcast_id)
1728 info['title'] = broadcast.get('status') or info.get('title')
1729 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1730 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1731 if info['live_status'] == 'is_upcoming':
1732 return info
1733
1734 media_key = broadcast['media_key']
1735 source = self._call_api(
1736 f'live_video_stream/status/{media_key}', media_key)['source']
1737 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1738 if '/live_video_stream/geoblocked/' in m3u8_url:
1739 self.raise_geo_restricted()
1740 m3u8_id = urllib.parse.parse_qs(urllib.parse.urlparse(
1741 m3u8_url).query).get('type', [None])[0]
1742 state, width, height = self._extract_common_format_info(broadcast)
1743 info['formats'] = self._extract_pscp_m3u8_formats(
1744 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1745 return info
1746
1747
1748 class TwitterSpacesIE(TwitterBaseIE):
1749 IE_NAME = 'twitter:spaces'
1750 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1751
1752 _TESTS = [{
1753 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1754 'info_dict': {
1755 'id': '1RDxlgyvNXzJL',
1756 'ext': 'm4a',
1757 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1758 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1759 'uploader': r're:Lucio Di Gaetano.*?',
1760 'uploader_id': 'luciodigaetano',
1761 'live_status': 'was_live',
1762 'timestamp': 1659877956,
1763 'upload_date': '20220807',
1764 'release_timestamp': 1659904215,
1765 'release_date': '20220807',
1766 },
1767 'params': {'skip_download': 'm3u8'},
1768 }, {
1769 # post_live/TimedOut but downloadable
1770 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1771 'info_dict': {
1772 'id': '1vAxRAVQWONJl',
1773 'ext': 'm4a',
1774 'title': 'Framing Up FinOps: Billing Tools',
1775 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1776 'uploader': 'Google Cloud',
1777 'uploader_id': 'googlecloud',
1778 'live_status': 'post_live',
1779 'timestamp': 1681409554,
1780 'upload_date': '20230413',
1781 'release_timestamp': 1681839000,
1782 'release_date': '20230418',
1783 },
1784 'params': {'skip_download': 'm3u8'},
1785 }, {
1786 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1787 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1788 'info_dict': {
1789 'id': '1eaKbrQbjoRKX',
1790 'ext': 'm4a',
1791 'title': 'あ',
1792 'description': 'Twitter Space participated by nobody yet',
1793 'uploader': '息根とめる🔪Twitchで復活',
1794 'uploader_id': 'tomeru_ikinone',
1795 'live_status': 'was_live',
1796 'timestamp': 1685617198,
1797 'upload_date': '20230601',
1798 },
1799 'params': {'skip_download': 'm3u8'},
1800 }]
1801
1802 SPACE_STATUS = {
1803 'notstarted': 'is_upcoming',
1804 'ended': 'was_live',
1805 'running': 'is_live',
1806 'timedout': 'post_live',
1807 }
1808
1809 def _build_graphql_query(self, space_id):
1810 return {
1811 'variables': {
1812 'id': space_id,
1813 'isMetatagsQuery': True,
1814 'withDownvotePerspective': False,
1815 'withReactionsMetadata': False,
1816 'withReactionsPerspective': False,
1817 'withReplays': True,
1818 'withSuperFollowsUserFields': True,
1819 'withSuperFollowsTweetFields': True,
1820 },
1821 'features': {
1822 'dont_mention_me_view_api_enabled': True,
1823 'interactive_text_enabled': True,
1824 'responsive_web_edit_tweet_api_enabled': True,
1825 'responsive_web_enhance_cards_enabled': True,
1826 'responsive_web_uc_gql_enabled': True,
1827 'spaces_2022_h2_clipping': True,
1828 'spaces_2022_h2_spaces_communities': False,
1829 'standardized_nudges_misinfo': True,
1830 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1831 'vibe_api_enabled': True,
1832 },
1833 }
1834
1835 def _real_extract(self, url):
1836 space_id = self._match_id(url)
1837 if not self.is_logged_in:
1838 self.raise_login_required('Twitter Spaces require authentication')
1839 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1840 if not space_data:
1841 raise ExtractorError('Twitter Space not found', expected=True)
1842
1843 metadata = space_data['metadata']
1844 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1845 is_live = live_status == 'is_live'
1846
1847 formats = []
1848 headers = {'Referer': 'https://twitter.com/'}
1849 if live_status == 'is_upcoming':
1850 self.raise_no_formats('Twitter Space not started yet', expected=True)
1851 elif not is_live and not metadata.get('is_space_available_for_replay'):
1852 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1853 elif metadata.get('media_key'):
1854 source = traverse_obj(
1855 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1856 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1857 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1858 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
1859 headers=headers, fatal=False) if source else []
1860 for fmt in formats:
1861 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1862 if not is_live:
1863 fmt['container'] = 'm4a_dash'
1864
1865 participants = ', '.join(traverse_obj(
1866 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1867
1868 if not formats and live_status == 'post_live':
1869 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1870
1871 return {
1872 'id': space_id,
1873 'title': metadata.get('title'),
1874 'description': f'Twitter Space participated by {participants}',
1875 'uploader': traverse_obj(
1876 metadata, ('creator_results', 'result', 'legacy', 'name')),
1877 'uploader_id': traverse_obj(
1878 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1879 'live_status': live_status,
1880 'release_timestamp': try_call(
1881 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1882 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1883 'formats': formats,
1884 'http_headers': headers,
1885 }
1886
1887
1888 class TwitterShortenerIE(TwitterBaseIE):
1889 IE_NAME = 'twitter:shortener'
1890 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1891 _BASE_URL = 'https://t.co/'
1892
1893 def _real_extract(self, url):
1894 mobj = self._match_valid_url(url)
1895 eid, shortcode = mobj.group('eid', 'id')
1896 if eid:
1897 shortcode = eid
1898 url = self._BASE_URL + shortcode
1899 new_url = self._request_webpage(url, shortcode, headers={'User-Agent': 'curl'}).url
1900 __UNSAFE_LINK = 'https://twitter.com/safety/unsafe_link_warning?unsafe_link='
1901 if new_url.startswith(__UNSAFE_LINK):
1902 new_url = new_url.replace(__UNSAFE_LINK, '')
1903 return self.url_result(new_url)