]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import functools
2 import json
3 import random
4 import re
5
6 from .common import InfoExtractor
7 from .periscope import PeriscopeBaseIE, PeriscopeIE
8 from ..compat import (
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12 )
13 from ..networking.exceptions import HTTPError
14 from ..utils import (
15 ExtractorError,
16 dict_get,
17 filter_dict,
18 float_or_none,
19 format_field,
20 int_or_none,
21 make_archive_id,
22 remove_end,
23 str_or_none,
24 strip_or_none,
25 traverse_obj,
26 try_call,
27 try_get,
28 unified_timestamp,
29 update_url_query,
30 url_or_none,
31 xpath_text,
32 )
33
34
35 class TwitterBaseIE(InfoExtractor):
36 _NETRC_MACHINE = 'twitter'
37 _API_BASE = 'https://api.x.com/1.1/'
38 _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
40 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
41 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
42 _flow_token = None
43
44 _LOGIN_INIT_DATA = json.dumps({
45 'input_flow_data': {
46 'flow_context': {
47 'debug_overrides': {},
48 'start_location': {
49 'location': 'unknown'
50 }
51 }
52 },
53 'subtask_versions': {
54 'action_list': 2,
55 'alert_dialog': 1,
56 'app_download_cta': 1,
57 'check_logged_in_account': 1,
58 'choice_selection': 3,
59 'contacts_live_sync_permission_prompt': 0,
60 'cta': 7,
61 'email_verification': 2,
62 'end_flow': 1,
63 'enter_date': 1,
64 'enter_email': 2,
65 'enter_password': 5,
66 'enter_phone': 2,
67 'enter_recaptcha': 1,
68 'enter_text': 5,
69 'enter_username': 2,
70 'generic_urt': 3,
71 'in_app_notification': 1,
72 'interest_picker': 3,
73 'js_instrumentation': 1,
74 'menu_dialog': 1,
75 'notifications_permission_prompt': 2,
76 'open_account': 2,
77 'open_home_timeline': 1,
78 'open_link': 1,
79 'phone_verification': 4,
80 'privacy_options': 1,
81 'security_key': 3,
82 'select_avatar': 4,
83 'select_banner': 2,
84 'settings_list': 7,
85 'show_code': 1,
86 'sign_up': 2,
87 'sign_up_review': 4,
88 'tweet_selection_urt': 1,
89 'update_users': 1,
90 'upload_media': 1,
91 'user_recommendations_list': 4,
92 'user_recommendations_urt': 1,
93 'wait_spinner': 3,
94 'web_modal': 1
95 }
96 }, separators=(',', ':')).encode()
97
98 def _extract_variant_formats(self, variant, video_id):
99 variant_url = variant.get('url')
100 if not variant_url:
101 return [], {}
102 elif '.m3u8' in variant_url:
103 fmts, subs = self._extract_m3u8_formats_and_subtitles(
104 variant_url, video_id, 'mp4', 'm3u8_native',
105 m3u8_id='hls', fatal=False)
106 for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
107 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
108 f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
109 return fmts, subs
110 else:
111 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
112 f = {
113 'url': variant_url,
114 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
115 'tbr': tbr,
116 }
117 self._search_dimensions_in_video_url(f, variant_url)
118 return [f], {}
119
120 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
121 vmap_url = url_or_none(vmap_url)
122 if not vmap_url:
123 return [], {}
124 vmap_data = self._download_xml(vmap_url, video_id)
125 formats = []
126 subtitles = {}
127 urls = []
128 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
129 video_variant.attrib['url'] = compat_urllib_parse_unquote(
130 video_variant.attrib['url'])
131 urls.append(video_variant.attrib['url'])
132 fmts, subs = self._extract_variant_formats(
133 video_variant.attrib, video_id)
134 formats.extend(fmts)
135 subtitles = self._merge_subtitles(subtitles, subs)
136 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
137 if video_url not in urls:
138 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
139 formats.extend(fmts)
140 subtitles = self._merge_subtitles(subtitles, subs)
141 return formats, subtitles
142
143 @staticmethod
144 def _search_dimensions_in_video_url(a_format, video_url):
145 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
146 if m:
147 a_format.update({
148 'width': int(m.group('width')),
149 'height': int(m.group('height')),
150 })
151
152 @property
153 def is_logged_in(self):
154 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
155
156 # XXX: Temporary workaround until twitter.com => x.com migration is completed
157 def _real_initialize(self):
158 if self.is_logged_in or not self._get_cookies('https://twitter.com/').get('auth_token'):
159 return
160 # User has not yet been migrated to x.com and has passed twitter.com cookies
161 TwitterBaseIE._API_BASE = 'https://api.twitter.com/1.1/'
162 TwitterBaseIE._GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
163
164 @functools.cached_property
165 def _selected_api(self):
166 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
167
168 def _fetch_guest_token(self, display_id):
169 guest_token = traverse_obj(self._download_json(
170 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
171 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
172 ('guest_token', {str}))
173 if not guest_token:
174 raise ExtractorError('Could not retrieve guest token')
175 return guest_token
176
177 def _set_base_headers(self, legacy=False):
178 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
179 return filter_dict({
180 'Authorization': f'Bearer {bearer_token}',
181 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
182 })
183
184 def _call_login_api(self, note, headers, query={}, data=None):
185 response = self._download_json(
186 f'{self._API_BASE}onboarding/task.json', None, note,
187 headers=headers, query=query, data=data, expected_status=400)
188 error = traverse_obj(response, ('errors', 0, 'message', {str}))
189 if error:
190 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
191 elif traverse_obj(response, 'status') != 'success':
192 raise ExtractorError('Login was unsuccessful')
193
194 subtask = traverse_obj(
195 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
196 if not subtask:
197 raise ExtractorError('Twitter API did not return next login subtask')
198
199 self._flow_token = response['flow_token']
200
201 return subtask
202
203 def _perform_login(self, username, password):
204 if self.is_logged_in:
205 return
206
207 guest_token = self._fetch_guest_token(None)
208 headers = {
209 **self._set_base_headers(),
210 'content-type': 'application/json',
211 'x-guest-token': guest_token,
212 'x-twitter-client-language': 'en',
213 'x-twitter-active-user': 'yes',
214 'Referer': 'https://x.com/',
215 'Origin': 'https://x.com',
216 }
217
218 def build_login_json(*subtask_inputs):
219 return json.dumps({
220 'flow_token': self._flow_token,
221 'subtask_inputs': subtask_inputs
222 }, separators=(',', ':')).encode()
223
224 def input_dict(subtask_id, text):
225 return {
226 'subtask_id': subtask_id,
227 'enter_text': {
228 'text': text,
229 'link': 'next_link'
230 }
231 }
232
233 next_subtask = self._call_login_api(
234 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
235
236 while not self.is_logged_in:
237 if next_subtask == 'LoginJsInstrumentationSubtask':
238 next_subtask = self._call_login_api(
239 'Submitting JS instrumentation response', headers, data=build_login_json({
240 'subtask_id': next_subtask,
241 'js_instrumentation': {
242 'response': '{}',
243 'link': 'next_link'
244 }
245 }))
246
247 elif next_subtask == 'LoginEnterUserIdentifierSSO':
248 next_subtask = self._call_login_api(
249 'Submitting username', headers, data=build_login_json({
250 'subtask_id': next_subtask,
251 'settings_list': {
252 'setting_responses': [{
253 'key': 'user_identifier',
254 'response_data': {
255 'text_data': {
256 'result': username
257 }
258 }
259 }],
260 'link': 'next_link'
261 }
262 }))
263
264 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
265 next_subtask = self._call_login_api(
266 'Submitting alternate identifier', headers,
267 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
268 'one of username, phone number or email that was not used as --username'))))
269
270 elif next_subtask == 'LoginEnterPassword':
271 next_subtask = self._call_login_api(
272 'Submitting password', headers, data=build_login_json({
273 'subtask_id': next_subtask,
274 'enter_password': {
275 'password': password,
276 'link': 'next_link'
277 }
278 }))
279
280 elif next_subtask == 'AccountDuplicationCheck':
281 next_subtask = self._call_login_api(
282 'Submitting account duplication check', headers, data=build_login_json({
283 'subtask_id': next_subtask,
284 'check_logged_in_account': {
285 'link': 'AccountDuplicationCheck_false'
286 }
287 }))
288
289 elif next_subtask == 'LoginTwoFactorAuthChallenge':
290 next_subtask = self._call_login_api(
291 'Submitting 2FA token', headers, data=build_login_json(input_dict(
292 next_subtask, self._get_tfa_info('two-factor authentication token'))))
293
294 elif next_subtask == 'LoginAcid':
295 next_subtask = self._call_login_api(
296 'Submitting confirmation code', headers, data=build_login_json(input_dict(
297 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
298
299 elif next_subtask == 'ArkoseLogin':
300 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
301
302 elif next_subtask == 'DenyLoginSubtask':
303 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
304
305 elif next_subtask == 'LoginSuccessSubtask':
306 raise ExtractorError('Twitter API did not grant auth token cookie')
307
308 else:
309 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
310
311 self.report_login()
312
313 def _call_api(self, path, video_id, query={}, graphql=False):
314 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
315 headers.update({
316 'x-twitter-auth-type': 'OAuth2Session',
317 'x-twitter-client-language': 'en',
318 'x-twitter-active-user': 'yes',
319 } if self.is_logged_in else {
320 'x-guest-token': self._fetch_guest_token(video_id)
321 })
322 allowed_status = {400, 401, 403, 404} if graphql else {403}
323 result = self._download_json(
324 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
325 video_id, headers=headers, query=query, expected_status=allowed_status,
326 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
327
328 if result.get('errors'):
329 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
330 if errors and 'not authorized' in errors:
331 self.raise_login_required(remove_end(errors, '.'))
332 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
333
334 return result
335
336 def _build_graphql_query(self, media_id):
337 raise NotImplementedError('Method must be implemented to support GraphQL')
338
339 def _call_graphql_api(self, endpoint, media_id):
340 data = self._build_graphql_query(media_id)
341 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
342 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
343
344
345 class TwitterCardIE(InfoExtractor):
346 IE_NAME = 'twitter:card'
347 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
348 _TESTS = [
349 {
350 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
351 # MD5 checksums are different in different places
352 'info_dict': {
353 'id': '560070131976392705',
354 'ext': 'mp4',
355 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
356 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
357 'uploader': 'Twitter',
358 'uploader_id': 'Twitter',
359 'thumbnail': r're:^https?://.*\.jpg',
360 'duration': 30.033,
361 'timestamp': 1422366112,
362 'upload_date': '20150127',
363 'age_limit': 0,
364 'comment_count': int,
365 'tags': [],
366 'repost_count': int,
367 'like_count': int,
368 'display_id': '560070183650213889',
369 'uploader_url': 'https://twitter.com/Twitter',
370 },
371 },
372 {
373 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
374 'md5': '7137eca597f72b9abbe61e5ae0161399',
375 'info_dict': {
376 'id': '623160978427936768',
377 'ext': 'mp4',
378 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
379 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
380 'uploader': 'NASA',
381 'uploader_id': 'NASA',
382 'timestamp': 1437408129,
383 'upload_date': '20150720',
384 'uploader_url': 'https://twitter.com/NASA',
385 'age_limit': 0,
386 'comment_count': int,
387 'like_count': int,
388 'repost_count': int,
389 'tags': ['PlutoFlyby'],
390 },
391 'params': {'format': '[protocol=https]'}
392 },
393 {
394 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
395 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
396 'info_dict': {
397 'id': 'dq4Oj5quskI',
398 'ext': 'mp4',
399 'title': 'Ubuntu 11.10 Overview',
400 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
401 'upload_date': '20111013',
402 'uploader': 'OMG! UBUNTU!',
403 'uploader_id': 'omgubuntu',
404 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
405 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
406 'channel_follower_count': int,
407 'chapters': 'count:8',
408 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
409 'duration': 138,
410 'categories': ['Film & Animation'],
411 'age_limit': 0,
412 'comment_count': int,
413 'availability': 'public',
414 'like_count': int,
415 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
416 'view_count': int,
417 'tags': 'count:12',
418 'channel': 'OMG! UBUNTU!',
419 'playable_in_embed': True,
420 },
421 'add_ie': ['Youtube'],
422 },
423 {
424 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
425 'info_dict': {
426 'id': 'iBb2x00UVlv',
427 'ext': 'mp4',
428 'upload_date': '20151113',
429 'uploader_id': '1189339351084113920',
430 'uploader': 'ArsenalTerje',
431 'title': 'Vine by ArsenalTerje',
432 'timestamp': 1447451307,
433 'alt_title': 'Vine by ArsenalTerje',
434 'comment_count': int,
435 'like_count': int,
436 'thumbnail': r're:^https?://[^?#]+\.jpg',
437 'view_count': int,
438 'repost_count': int,
439 },
440 'add_ie': ['Vine'],
441 'params': {'skip_download': 'm3u8'},
442 },
443 {
444 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
445 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
446 'info_dict': {
447 'id': '705235433198714880',
448 'ext': 'mp4',
449 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
450 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
451 'uploader': 'Brent Yarina',
452 'uploader_id': 'BTNBrentYarina',
453 'timestamp': 1456976204,
454 'upload_date': '20160303',
455 },
456 'skip': 'This content is no longer available.',
457 },
458 {
459 'url': 'https://twitter.com/i/videos/752274308186120192',
460 'only_matching': True,
461 },
462 ]
463
464 def _real_extract(self, url):
465 status_id = self._match_id(url)
466 return self.url_result(
467 'https://twitter.com/statuses/' + status_id,
468 TwitterIE.ie_key(), status_id)
469
470
471 class TwitterIE(TwitterBaseIE):
472 IE_NAME = 'twitter'
473 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
474
475 _TESTS = [{
476 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
477 'info_dict': {
478 'id': '643211870443208704',
479 'display_id': '643211948184596480',
480 'ext': 'mp4',
481 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
482 'thumbnail': r're:^https?://.*\.jpg',
483 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
484 'channel_id': '549749560',
485 'uploader': 'FREE THE NIPPLE',
486 'uploader_id': 'freethenipple',
487 'duration': 12.922,
488 'timestamp': 1442188653,
489 'upload_date': '20150913',
490 'uploader_url': 'https://twitter.com/freethenipple',
491 'comment_count': int,
492 'repost_count': int,
493 'like_count': int,
494 'tags': [],
495 'age_limit': 18,
496 '_old_archive_ids': ['twitter 643211948184596480'],
497 },
498 'skip': 'Requires authentication',
499 }, {
500 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
501 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
502 'info_dict': {
503 'id': '657991469417025536',
504 'ext': 'mp4',
505 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
506 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
507 'thumbnail': r're:^https?://.*\.png',
508 'uploader': 'Gifs',
509 'uploader_id': 'giphz',
510 },
511 'expected_warnings': ['height', 'width'],
512 'skip': 'Account suspended',
513 }, {
514 'url': 'https://twitter.com/starwars/status/665052190608723968',
515 'info_dict': {
516 'id': '665052190608723968',
517 'display_id': '665052190608723968',
518 'ext': 'mp4',
519 'title': r're:Star Wars.*A new beginning is coming December 18.*',
520 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
521 'channel_id': '20106852',
522 'uploader_id': 'starwars',
523 'uploader': r're:Star Wars.*',
524 'timestamp': 1447395772,
525 'upload_date': '20151113',
526 'uploader_url': 'https://twitter.com/starwars',
527 'comment_count': int,
528 'repost_count': int,
529 'like_count': int,
530 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
531 'age_limit': 0,
532 '_old_archive_ids': ['twitter 665052190608723968'],
533 },
534 }, {
535 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
536 'info_dict': {
537 'id': '705235433198714880',
538 'ext': 'mp4',
539 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
540 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
541 'uploader_id': 'BTNBrentYarina',
542 'uploader': 'Brent Yarina',
543 'timestamp': 1456976204,
544 'upload_date': '20160303',
545 'uploader_url': 'https://twitter.com/BTNBrentYarina',
546 'comment_count': int,
547 'repost_count': int,
548 'like_count': int,
549 'tags': [],
550 'age_limit': 0,
551 },
552 'params': {
553 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
554 # Test case of TwitterCardIE
555 'skip_download': True,
556 },
557 'skip': 'Dead external link',
558 }, {
559 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
560 'info_dict': {
561 'id': '700207414000242688',
562 'display_id': '700207533655363584',
563 'ext': 'mp4',
564 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
565 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
566 'thumbnail': r're:^https?://.*\.jpg',
567 'channel_id': '1383165541',
568 'uploader': 'jaydin donte geer',
569 'uploader_id': 'jaydingeer',
570 'duration': 30.0,
571 'timestamp': 1455777459,
572 'upload_date': '20160218',
573 'uploader_url': 'https://twitter.com/jaydingeer',
574 'comment_count': int,
575 'repost_count': int,
576 'like_count': int,
577 'tags': ['Damndaniel'],
578 'age_limit': 0,
579 '_old_archive_ids': ['twitter 700207533655363584'],
580 },
581 }, {
582 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
583 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
584 'info_dict': {
585 'id': 'MIOxnrUteUd',
586 'ext': 'mp4',
587 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
588 'uploader': 'TAKUMA',
589 'uploader_id': '1004126642786242560',
590 'timestamp': 1402826626,
591 'upload_date': '20140615',
592 'thumbnail': r're:^https?://.*\.jpg',
593 'alt_title': 'Vine by TAKUMA',
594 'comment_count': int,
595 'repost_count': int,
596 'like_count': int,
597 'view_count': int,
598 },
599 'add_ie': ['Vine'],
600 }, {
601 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
602 'info_dict': {
603 'id': '717462543795523584',
604 'display_id': '719944021058060289',
605 'ext': 'mp4',
606 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
607 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
608 'channel_id': '701615052',
609 'uploader_id': 'CaptainAmerica',
610 'uploader': 'Captain America',
611 'duration': 3.17,
612 'timestamp': 1460483005,
613 'upload_date': '20160412',
614 'uploader_url': 'https://twitter.com/CaptainAmerica',
615 'thumbnail': r're:^https?://.*\.jpg',
616 'comment_count': int,
617 'repost_count': int,
618 'like_count': int,
619 'tags': [],
620 'age_limit': 0,
621 '_old_archive_ids': ['twitter 719944021058060289'],
622 },
623 }, {
624 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
625 'info_dict': {
626 'id': '1zqKVVlkqLaKB',
627 'ext': 'mp4',
628 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
629 'upload_date': '20160923',
630 'uploader_id': '1PmKqpJdOJQoY',
631 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
632 'timestamp': 1474613214,
633 'thumbnail': r're:^https?://.*\.jpg',
634 },
635 'add_ie': ['Periscope'],
636 'skip': 'Broadcast not found',
637 }, {
638 # has mp4 formats via mobile API
639 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
640 'info_dict': {
641 'id': '852077943283097602',
642 'ext': 'mp4',
643 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
644 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
645 'channel_id': '2526757026',
646 'uploader': 'عالم الأخبار',
647 'uploader_id': 'news_al3alm',
648 'duration': 277.4,
649 'timestamp': 1492000653,
650 'upload_date': '20170412',
651 'display_id': '852138619213144067',
652 'age_limit': 0,
653 'uploader_url': 'https://twitter.com/news_al3alm',
654 'thumbnail': r're:^https?://.*\.jpg',
655 'tags': [],
656 'repost_count': int,
657 'like_count': int,
658 'comment_count': int,
659 '_old_archive_ids': ['twitter 852138619213144067'],
660 },
661 }, {
662 'url': 'https://twitter.com/i/web/status/910031516746514432',
663 'info_dict': {
664 'id': '910030238373089285',
665 'display_id': '910031516746514432',
666 'ext': 'mp4',
667 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
668 'thumbnail': r're:^https?://.*\.jpg',
669 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
670 'channel_id': '2319432498',
671 'uploader': 'Préfet de Guadeloupe',
672 'uploader_id': 'Prefet971',
673 'duration': 47.48,
674 'timestamp': 1505803395,
675 'upload_date': '20170919',
676 'uploader_url': 'https://twitter.com/Prefet971',
677 'comment_count': int,
678 'repost_count': int,
679 'like_count': int,
680 'tags': ['Maria'],
681 'age_limit': 0,
682 '_old_archive_ids': ['twitter 910031516746514432'],
683 },
684 'params': {
685 'skip_download': True, # requires ffmpeg
686 },
687 }, {
688 # card via api.twitter.com/1.1/videos/tweet/config
689 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
690 'info_dict': {
691 'id': '1001551417340022785',
692 'display_id': '1001551623938805763',
693 'ext': 'mp4',
694 'title': 're:.*?Shep is on a roll today.*?',
695 'thumbnail': r're:^https?://.*\.jpg',
696 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
697 'channel_id': '255036353',
698 'uploader': 'Lis Power',
699 'uploader_id': 'LisPower1',
700 'duration': 111.278,
701 'timestamp': 1527623489,
702 'upload_date': '20180529',
703 'uploader_url': 'https://twitter.com/LisPower1',
704 'comment_count': int,
705 'repost_count': int,
706 'like_count': int,
707 'tags': [],
708 'age_limit': 0,
709 '_old_archive_ids': ['twitter 1001551623938805763'],
710 },
711 'params': {
712 'skip_download': True, # requires ffmpeg
713 },
714 }, {
715 'url': 'https://twitter.com/foobar/status/1087791357756956680',
716 'info_dict': {
717 'id': '1087791272830607360',
718 'display_id': '1087791357756956680',
719 'ext': 'mp4',
720 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
721 'thumbnail': r're:^https?://.*\.jpg',
722 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
723 'uploader': 'X',
724 'uploader_id': 'X',
725 'duration': 61.567,
726 'timestamp': 1548184644,
727 'upload_date': '20190122',
728 'uploader_url': 'https://twitter.com/X',
729 'comment_count': int,
730 'repost_count': int,
731 'like_count': int,
732 'view_count': int,
733 'tags': [],
734 'age_limit': 0,
735 },
736 'skip': 'This Tweet is unavailable',
737 }, {
738 # not available in Periscope
739 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
740 'info_dict': {
741 'id': '1vOGwqejwoWxB',
742 'ext': 'mp4',
743 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
744 'uploader': 'Vivi',
745 'uploader_id': '1eVjYOLGkGrQL',
746 'thumbnail': r're:^https?://.*\.jpg',
747 'tags': ['EduTECH2019'],
748 'view_count': int,
749 },
750 'add_ie': ['TwitterBroadcast'],
751 'skip': 'Broadcast no longer exists',
752 }, {
753 # unified card
754 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
755 'info_dict': {
756 'id': '1349774757969989634',
757 'display_id': '1349794411333394432',
758 'ext': 'mp4',
759 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
760 'thumbnail': r're:^https?://.*\.jpg',
761 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
762 'channel_id': '18552281',
763 'uploader': 'Brooklyn Nets',
764 'uploader_id': 'BrooklynNets',
765 'duration': 324.484,
766 'timestamp': 1610651040,
767 'upload_date': '20210114',
768 'uploader_url': 'https://twitter.com/BrooklynNets',
769 'comment_count': int,
770 'repost_count': int,
771 'like_count': int,
772 'tags': [],
773 'age_limit': 0,
774 '_old_archive_ids': ['twitter 1349794411333394432'],
775 },
776 'params': {
777 'skip_download': True,
778 },
779 }, {
780 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
781 'info_dict': {
782 'id': '1577855447914409984',
783 'display_id': '1577855540407197696',
784 'ext': 'mp4',
785 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
786 'description': 'md5:b9c3699335447391d11753ab21c70a74',
787 'upload_date': '20221006',
788 'channel_id': '143077138',
789 'uploader': 'Oshtru',
790 'uploader_id': 'oshtru',
791 'uploader_url': 'https://twitter.com/oshtru',
792 'thumbnail': r're:^https?://.*\.jpg',
793 'duration': 30.03,
794 'timestamp': 1665025050,
795 'comment_count': int,
796 'repost_count': int,
797 'like_count': int,
798 'tags': [],
799 'age_limit': 0,
800 '_old_archive_ids': ['twitter 1577855540407197696'],
801 },
802 'params': {'skip_download': True},
803 }, {
804 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
805 'info_dict': {
806 'id': '1577719286659006464',
807 'title': 'Ultima Reload - Test',
808 'description': 'Test https://t.co/Y3KEZD7Dad',
809 'channel_id': '168922496',
810 'uploader': 'Ultima Reload',
811 'uploader_id': 'UltimaShadowX',
812 'uploader_url': 'https://twitter.com/UltimaShadowX',
813 'upload_date': '20221005',
814 'timestamp': 1664992565,
815 'comment_count': int,
816 'repost_count': int,
817 'like_count': int,
818 'tags': [],
819 'age_limit': 0,
820 },
821 'playlist_count': 4,
822 'params': {'skip_download': True},
823 }, {
824 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
825 'info_dict': {
826 'id': '1575559336759263233',
827 'display_id': '1575560063510810624',
828 'ext': 'mp4',
829 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
830 'thumbnail': r're:^https?://.*\.jpg',
831 'description': 'md5:95aea692fda36a12081b9629b02daa92',
832 'channel_id': '1094109584',
833 'uploader': 'Max Olson',
834 'uploader_id': 'MesoMax919',
835 'uploader_url': 'https://twitter.com/MesoMax919',
836 'duration': 21.321,
837 'timestamp': 1664477766,
838 'upload_date': '20220929',
839 'comment_count': int,
840 'repost_count': int,
841 'like_count': int,
842 'tags': ['HurricaneIan'],
843 'age_limit': 0,
844 '_old_archive_ids': ['twitter 1575560063510810624'],
845 },
846 }, {
847 # Adult content, fails if not logged in
848 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
849 'info_dict': {
850 'id': '1575199163847000068',
851 'display_id': '1575199173472927762',
852 'ext': 'mp4',
853 'title': str,
854 'description': str,
855 'channel_id': '1217167793541480450',
856 'uploader': str,
857 'uploader_id': 'Rizdraws',
858 'uploader_url': 'https://twitter.com/Rizdraws',
859 'upload_date': '20220928',
860 'timestamp': 1664391723,
861 'thumbnail': r're:^https?://.+\.jpg',
862 'like_count': int,
863 'repost_count': int,
864 'comment_count': int,
865 'age_limit': 18,
866 'tags': [],
867 '_old_archive_ids': ['twitter 1575199173472927762'],
868 },
869 'params': {'skip_download': 'The media could not be played'},
870 'skip': 'Requires authentication',
871 }, {
872 # Playlist result only with graphql API
873 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
874 'playlist_mincount': 2,
875 'info_dict': {
876 'id': '1395079556562706435',
877 'title': str,
878 'tags': [],
879 'channel_id': '21539378',
880 'uploader': str,
881 'like_count': int,
882 'upload_date': '20210519',
883 'age_limit': 0,
884 'repost_count': int,
885 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
886 'uploader_id': 'Srirachachau',
887 'comment_count': int,
888 'uploader_url': 'https://twitter.com/Srirachachau',
889 'timestamp': 1621447860,
890 },
891 }, {
892 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
893 'playlist_mincount': 2,
894 'info_dict': {
895 'id': '1578353380363501568',
896 'title': str,
897 'channel_id': '2195866214',
898 'uploader_id': 'DavidToons_',
899 'repost_count': int,
900 'like_count': int,
901 'uploader': str,
902 'timestamp': 1665143744,
903 'uploader_url': 'https://twitter.com/DavidToons_',
904 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
905 'tags': [],
906 'comment_count': int,
907 'upload_date': '20221007',
908 'age_limit': 0,
909 },
910 }, {
911 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
912 'playlist_count': 2,
913 'info_dict': {
914 'id': '1578401165338976258',
915 'title': str,
916 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
917 'channel_id': '19338359',
918 'uploader': str,
919 'uploader_id': 'primevideouk',
920 'timestamp': 1665155137,
921 'upload_date': '20221007',
922 'age_limit': 0,
923 'uploader_url': 'https://twitter.com/primevideouk',
924 'comment_count': int,
925 'repost_count': int,
926 'like_count': int,
927 'tags': ['TheRingsOfPower'],
928 },
929 }, {
930 # Twitter Spaces
931 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
932 'info_dict': {
933 'id': '1lPJqmBeeNAJb',
934 'ext': 'm4a',
935 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
936 'uploader': r're:Monique Camarra.+?',
937 'uploader_id': 'MoniqueCamarra',
938 'live_status': 'was_live',
939 'release_timestamp': 1658417414,
940 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
941 'timestamp': 1658407771,
942 'release_date': '20220721',
943 'upload_date': '20220721',
944 },
945 'add_ie': ['TwitterSpaces'],
946 'params': {'skip_download': 'm3u8'},
947 'skip': 'Requires authentication',
948 }, {
949 # URL specifies video number but --yes-playlist
950 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
951 'playlist_mincount': 2,
952 'info_dict': {
953 'id': '1600649710662213632',
954 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
955 'timestamp': 1670459604.0,
956 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
957 'comment_count': int,
958 'uploader_id': 'CTVJLaidlaw',
959 'channel_id': '80082014',
960 'repost_count': int,
961 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
962 'upload_date': '20221208',
963 'age_limit': 0,
964 'uploader': 'Jocelyn Laidlaw',
965 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
966 'like_count': int,
967 },
968 }, {
969 # URL specifies video number and --no-playlist
970 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
971 'info_dict': {
972 'id': '1600649511827013632',
973 'ext': 'mp4',
974 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
975 'thumbnail': r're:^https?://.+\.jpg',
976 'timestamp': 1670459604.0,
977 'channel_id': '80082014',
978 'uploader_id': 'CTVJLaidlaw',
979 'uploader': 'Jocelyn Laidlaw',
980 'repost_count': int,
981 'comment_count': int,
982 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
983 'duration': 102.226,
984 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
985 'display_id': '1600649710662213632',
986 'like_count': int,
987 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
988 'upload_date': '20221208',
989 'age_limit': 0,
990 '_old_archive_ids': ['twitter 1600649710662213632'],
991 },
992 'params': {'noplaylist': True},
993 }, {
994 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
995 # note the id different between extraction and url
996 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
997 'info_dict': {
998 'id': '1621117577354424321',
999 'display_id': '1621117700482416640',
1000 'ext': 'mp4',
1001 'title': '뽀 - 아 최우제 이동속도 봐',
1002 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
1003 'duration': 24.598,
1004 'channel_id': '1281839411068432384',
1005 'uploader': '뽀',
1006 'uploader_id': 's2FAKER',
1007 'uploader_url': 'https://twitter.com/s2FAKER',
1008 'upload_date': '20230202',
1009 'timestamp': 1675339553.0,
1010 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
1011 'age_limit': 18,
1012 'tags': [],
1013 'like_count': int,
1014 'repost_count': int,
1015 'comment_count': int,
1016 '_old_archive_ids': ['twitter 1621117700482416640'],
1017 },
1018 'skip': 'Requires authentication',
1019 }, {
1020 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1021 'info_dict': {
1022 'id': '1599108643743473680',
1023 'display_id': '1599108751385972737',
1024 'ext': 'mp4',
1025 'title': '\u06ea - \U0001F48B',
1026 'channel_id': '1347791436809441283',
1027 'uploader_url': 'https://twitter.com/hlo_again',
1028 'like_count': int,
1029 'uploader_id': 'hlo_again',
1030 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1031 'repost_count': int,
1032 'duration': 9.531,
1033 'comment_count': int,
1034 'upload_date': '20221203',
1035 'age_limit': 0,
1036 'timestamp': 1670092210.0,
1037 'tags': [],
1038 'uploader': '\u06ea',
1039 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1040 '_old_archive_ids': ['twitter 1599108751385972737'],
1041 },
1042 'params': {'noplaylist': True},
1043 }, {
1044 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1045 'info_dict': {
1046 'id': '1600009362759733248',
1047 'display_id': '1600009574919962625',
1048 'ext': 'mp4',
1049 'channel_id': '211814412',
1050 'uploader_url': 'https://twitter.com/MunTheShinobi',
1051 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1052 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1053 'age_limit': 0,
1054 'uploader': 'Mün',
1055 'repost_count': int,
1056 'upload_date': '20221206',
1057 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1058 'comment_count': int,
1059 'like_count': int,
1060 'tags': [],
1061 'uploader_id': 'MunTheShinobi',
1062 'duration': 139.987,
1063 'timestamp': 1670306984.0,
1064 '_old_archive_ids': ['twitter 1600009574919962625'],
1065 },
1066 }, {
1067 # retweeted_status (private)
1068 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1069 'info_dict': {
1070 'id': '1623274794488659969',
1071 'display_id': '1623739803874349067',
1072 'ext': 'mp4',
1073 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1074 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1075 'uploader': 'Johnny Bullets',
1076 'uploader_id': 'Johnnybull3ts',
1077 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1078 'age_limit': 0,
1079 'tags': [],
1080 'duration': 8.033,
1081 'timestamp': 1675853859.0,
1082 'upload_date': '20230208',
1083 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1084 'like_count': int,
1085 'repost_count': int,
1086 },
1087 'skip': 'Protected tweet',
1088 }, {
1089 # retweeted_status
1090 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1091 'info_dict': {
1092 'id': '1694928337846538240',
1093 'ext': 'mp4',
1094 'display_id': '1695424220702888009',
1095 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1096 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1097 'channel_id': '15212187',
1098 'uploader': 'Benny Johnson',
1099 'uploader_id': 'bennyjohnson',
1100 'uploader_url': 'https://twitter.com/bennyjohnson',
1101 'age_limit': 0,
1102 'tags': [],
1103 'duration': 45.001,
1104 'timestamp': 1692962814.0,
1105 'upload_date': '20230825',
1106 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1107 'like_count': int,
1108 'repost_count': int,
1109 'comment_count': int,
1110 '_old_archive_ids': ['twitter 1695424220702888009'],
1111 },
1112 }, {
1113 # retweeted_status w/ legacy API
1114 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1115 'info_dict': {
1116 'id': '1694928337846538240',
1117 'ext': 'mp4',
1118 'display_id': '1695424220702888009',
1119 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1120 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1121 'channel_id': '15212187',
1122 'uploader': 'Benny Johnson',
1123 'uploader_id': 'bennyjohnson',
1124 'uploader_url': 'https://twitter.com/bennyjohnson',
1125 'age_limit': 0,
1126 'tags': [],
1127 'duration': 45.001,
1128 'timestamp': 1692962814.0,
1129 'upload_date': '20230825',
1130 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1131 'like_count': int,
1132 'repost_count': int,
1133 '_old_archive_ids': ['twitter 1695424220702888009'],
1134 },
1135 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1136 }, {
1137 # Broadcast embedded in tweet
1138 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1139 'info_dict': {
1140 'id': '1rmxPMjLzAXKN',
1141 'ext': 'mp4',
1142 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1143 'uploader': 'Jessica Dobson',
1144 'uploader_id': 'JessicaDobsonWX',
1145 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1146 'timestamp': 1701566398,
1147 'upload_date': '20231203',
1148 'live_status': 'was_live',
1149 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1150 'concurrent_view_count': int,
1151 'view_count': int,
1152 },
1153 'add_ie': ['TwitterBroadcast'],
1154 }, {
1155 # Animated gif and quote tweet video
1156 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1157 'playlist_mincount': 2,
1158 'info_dict': {
1159 'id': '1696256659889565950',
1160 'title': 'BAKOON - https://t.co/zom968d0a0',
1161 'description': 'https://t.co/zom968d0a0',
1162 'tags': [],
1163 'channel_id': '1263540390',
1164 'uploader': 'BAKOON',
1165 'uploader_id': 'BAKKOOONN',
1166 'uploader_url': 'https://twitter.com/BAKKOOONN',
1167 'age_limit': 18,
1168 'timestamp': 1693254077.0,
1169 'upload_date': '20230828',
1170 'like_count': int,
1171 'comment_count': int,
1172 'repost_count': int,
1173 },
1174 'skip': 'Requires authentication',
1175 }, {
1176 # "stale tweet" with typename "TweetWithVisibilityResults"
1177 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1178 'md5': '511377ff8dfa7545307084dca4dce319',
1179 'info_dict': {
1180 'id': '1724883339285544960',
1181 'ext': 'mp4',
1182 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1183 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1184 'display_id': '1724884212803834154',
1185 'channel_id': '337808606',
1186 'uploader': 'Robert F. Kennedy Jr',
1187 'uploader_id': 'RobertKennedyJr',
1188 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1189 'upload_date': '20231115',
1190 'timestamp': 1700079417.0,
1191 'duration': 341.048,
1192 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1193 'tags': ['Kennedy24'],
1194 'repost_count': int,
1195 'like_count': int,
1196 'comment_count': int,
1197 'age_limit': 0,
1198 '_old_archive_ids': ['twitter 1724884212803834154'],
1199 },
1200 }, {
1201 # x.com
1202 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1203 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1204 'info_dict': {
1205 'id': '1790637589910654976',
1206 'ext': 'mp4',
1207 'title': 'Historic Vids - One of the most intense moments in history',
1208 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1209 'display_id': '1790637656616943991',
1210 'uploader': 'Historic Vids',
1211 'uploader_id': 'historyinmemes',
1212 'uploader_url': 'https://twitter.com/historyinmemes',
1213 'channel_id': '855481986290524160',
1214 'upload_date': '20240515',
1215 'timestamp': 1715756260.0,
1216 'duration': 15.488,
1217 'tags': [],
1218 'comment_count': int,
1219 'repost_count': int,
1220 'like_count': int,
1221 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1222 'age_limit': 0,
1223 '_old_archive_ids': ['twitter 1790637656616943991'],
1224 }
1225 }, {
1226 # onion route
1227 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1228 'only_matching': True,
1229 }, {
1230 # Twitch Clip Embed
1231 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1232 'only_matching': True,
1233 }, {
1234 # promo_video_website card
1235 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1236 'only_matching': True,
1237 }, {
1238 # promo_video_convo card
1239 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1240 'only_matching': True,
1241 }, {
1242 # appplayer card
1243 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1244 'only_matching': True,
1245 }, {
1246 # video_direct_message card
1247 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1248 'only_matching': True,
1249 }, {
1250 # poll2choice_video card
1251 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1252 'only_matching': True,
1253 }, {
1254 # poll3choice_video card
1255 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1256 'only_matching': True,
1257 }, {
1258 # poll4choice_video card
1259 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1260 'only_matching': True,
1261 }]
1262
1263 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1264
1265 @property
1266 def _GRAPHQL_ENDPOINT(self):
1267 if self.is_logged_in:
1268 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1269 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1270
1271 def _graphql_to_legacy(self, data, twid):
1272 result = traverse_obj(data, (
1273 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1274 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1275 'tweet_results', 'result', ('tweet', None), {dict},
1276 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1277 data, ('tweetResult', 'result', {dict}), default={})
1278
1279 typename = result.get('__typename')
1280 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1281 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1282
1283 if 'tombstone' in result:
1284 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1285 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1286 elif typename == 'TweetUnavailable':
1287 reason = result.get('reason')
1288 if reason == 'NsfwLoggedOut':
1289 self.raise_login_required('NSFW tweet requires authentication')
1290 elif reason == 'Protected':
1291 self.raise_login_required('You are not authorized to view this protected tweet')
1292 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1293 # Result for "stale tweet" needs additional transformation
1294 elif typename == 'TweetWithVisibilityResults':
1295 result = traverse_obj(result, ('tweet', {dict})) or {}
1296
1297 status = result.get('legacy', {})
1298 status.update(traverse_obj(result, {
1299 'user': ('core', 'user_results', 'result', 'legacy'),
1300 'card': ('card', 'legacy'),
1301 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1302 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1303 }, expected_type=dict, default={}))
1304
1305 # extra transformations needed since result does not match legacy format
1306 if status.get('retweeted_status'):
1307 status['retweeted_status']['user'] = traverse_obj(status, (
1308 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1309
1310 binding_values = {
1311 binding_value.get('key'): binding_value.get('value')
1312 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1313 }
1314 if binding_values:
1315 status['card']['binding_values'] = binding_values
1316
1317 return status
1318
1319 def _build_graphql_query(self, media_id):
1320 return {
1321 'variables': {
1322 'focalTweetId': media_id,
1323 'includePromotedContent': True,
1324 'with_rux_injections': False,
1325 'withBirdwatchNotes': True,
1326 'withCommunity': True,
1327 'withDownvotePerspective': False,
1328 'withQuickPromoteEligibilityTweetFields': True,
1329 'withReactionsMetadata': False,
1330 'withReactionsPerspective': False,
1331 'withSuperFollowsTweetFields': True,
1332 'withSuperFollowsUserFields': True,
1333 'withV2Timeline': True,
1334 'withVoice': True,
1335 },
1336 'features': {
1337 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1338 'interactive_text_enabled': True,
1339 'responsive_web_edit_tweet_api_enabled': True,
1340 'responsive_web_enhance_cards_enabled': True,
1341 'responsive_web_graphql_timeline_navigation_enabled': False,
1342 'responsive_web_text_conversations_enabled': False,
1343 'responsive_web_uc_gql_enabled': True,
1344 'standardized_nudges_misinfo': True,
1345 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1346 'tweetypie_unmention_optimization_enabled': True,
1347 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1348 'verified_phone_label_enabled': False,
1349 'vibe_api_enabled': True,
1350 },
1351 } if self.is_logged_in else {
1352 'variables': {
1353 'tweetId': media_id,
1354 'withCommunity': False,
1355 'includePromotedContent': False,
1356 'withVoice': False,
1357 },
1358 'features': {
1359 'creator_subscriptions_tweet_preview_api_enabled': True,
1360 'tweetypie_unmention_optimization_enabled': True,
1361 'responsive_web_edit_tweet_api_enabled': True,
1362 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1363 'view_counts_everywhere_api_enabled': True,
1364 'longform_notetweets_consumption_enabled': True,
1365 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1366 'tweet_awards_web_tipping_enabled': False,
1367 'freedom_of_speech_not_reach_fetch_enabled': True,
1368 'standardized_nudges_misinfo': True,
1369 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1370 'longform_notetweets_rich_text_read_enabled': True,
1371 'longform_notetweets_inline_media_enabled': True,
1372 'responsive_web_graphql_exclude_directive_enabled': True,
1373 'verified_phone_label_enabled': False,
1374 'responsive_web_media_download_video_enabled': False,
1375 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1376 'responsive_web_graphql_timeline_navigation_enabled': True,
1377 'responsive_web_enhance_cards_enabled': False
1378 },
1379 'fieldToggles': {
1380 'withArticleRichContentState': False
1381 }
1382 }
1383
1384 def _call_syndication_api(self, twid):
1385 self.report_warning(
1386 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1387 status = self._download_json(
1388 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1389 headers={'User-Agent': 'Googlebot'}, query={
1390 'id': twid,
1391 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1392 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1393 })
1394 if not status:
1395 raise ExtractorError('Syndication endpoint returned empty JSON response')
1396 # Transform the result so its structure matches that of legacy/graphql
1397 media = []
1398 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1399 detail['id_str'] = traverse_obj(detail, (
1400 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1401 media.append(detail)
1402 status['extended_entities'] = {'media': media}
1403
1404 return status
1405
1406 def _extract_status(self, twid):
1407 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1408 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1409
1410 try:
1411 if self.is_logged_in or self._selected_api == 'graphql':
1412 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1413 elif self._selected_api == 'legacy':
1414 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1415 'cards_platform': 'Web-12',
1416 'include_cards': 1,
1417 'include_reply_count': 1,
1418 'include_user_entities': 0,
1419 'tweet_mode': 'extended',
1420 })
1421 except ExtractorError as e:
1422 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1423 raise
1424 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1425 status = self._call_syndication_api(twid)
1426
1427 if self._selected_api == 'syndication':
1428 status = self._call_syndication_api(twid)
1429
1430 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1431
1432 def _real_extract(self, url):
1433 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1434 status = self._extract_status(twid)
1435
1436 title = description = traverse_obj(
1437 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1438 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1439 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1440 user = status.get('user') or {}
1441 uploader = user.get('name')
1442 if uploader:
1443 title = f'{uploader} - {title}'
1444 uploader_id = user.get('screen_name')
1445
1446 info = {
1447 'id': twid,
1448 'title': title,
1449 'description': description,
1450 'uploader': uploader,
1451 'timestamp': unified_timestamp(status.get('created_at')),
1452 'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
1453 'uploader_id': uploader_id,
1454 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1455 'like_count': int_or_none(status.get('favorite_count')),
1456 'repost_count': int_or_none(status.get('retweet_count')),
1457 'comment_count': int_or_none(status.get('reply_count')),
1458 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1459 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1460 }
1461
1462 def extract_from_video_info(media):
1463 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1464 self.write_debug(f'Extracting from video info: {media_id}')
1465
1466 formats = []
1467 subtitles = {}
1468 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1469 fmts, subs = self._extract_variant_formats(variant, twid)
1470 subtitles = self._merge_subtitles(subtitles, subs)
1471 formats.extend(fmts)
1472
1473 thumbnails = []
1474 media_url = media.get('media_url_https') or media.get('media_url')
1475 if media_url:
1476 def add_thumbnail(name, size):
1477 thumbnails.append({
1478 'id': name,
1479 'url': update_url_query(media_url, {'name': name}),
1480 'width': int_or_none(size.get('w') or size.get('width')),
1481 'height': int_or_none(size.get('h') or size.get('height')),
1482 })
1483 for name, size in media.get('sizes', {}).items():
1484 add_thumbnail(name, size)
1485 add_thumbnail('orig', media.get('original_info') or {})
1486
1487 return {
1488 'id': media_id,
1489 'formats': formats,
1490 'subtitles': subtitles,
1491 'thumbnails': thumbnails,
1492 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
1493 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1494 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1495 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1496 }
1497
1498 def extract_from_card_info(card):
1499 if not card:
1500 return
1501
1502 self.write_debug(f'Extracting from card info: {card.get("url")}')
1503 binding_values = card['binding_values']
1504
1505 def get_binding_value(k):
1506 o = binding_values.get(k) or {}
1507 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1508
1509 card_name = card['name'].split(':')[-1]
1510 if card_name == 'player':
1511 yield {
1512 '_type': 'url',
1513 'url': get_binding_value('player_url'),
1514 }
1515 elif card_name == 'periscope_broadcast':
1516 yield {
1517 '_type': 'url',
1518 'url': get_binding_value('url') or get_binding_value('player_url'),
1519 'ie_key': PeriscopeIE.ie_key(),
1520 }
1521 elif card_name == 'broadcast':
1522 yield {
1523 '_type': 'url',
1524 'url': get_binding_value('broadcast_url'),
1525 'ie_key': TwitterBroadcastIE.ie_key(),
1526 }
1527 elif card_name == 'audiospace':
1528 yield {
1529 '_type': 'url',
1530 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1531 'ie_key': TwitterSpacesIE.ie_key(),
1532 }
1533 elif card_name == 'summary':
1534 yield {
1535 '_type': 'url',
1536 'url': get_binding_value('card_url'),
1537 }
1538 elif card_name == 'unified_card':
1539 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1540 yield from map(extract_from_video_info, traverse_obj(
1541 unified_card, ('media_entities', ...), expected_type=dict))
1542 # amplify, promo_video_website, promo_video_convo, appplayer,
1543 # video_direct_message, poll2choice_video, poll3choice_video,
1544 # poll4choice_video, ...
1545 else:
1546 is_amplify = card_name == 'amplify'
1547 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1548 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1549 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1550
1551 thumbnails = []
1552 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1553 image = get_binding_value('player_image' + suffix) or {}
1554 image_url = image.get('url')
1555 if not image_url or '/player-placeholder' in image_url:
1556 continue
1557 thumbnails.append({
1558 'id': suffix[1:] if suffix else 'medium',
1559 'url': image_url,
1560 'width': int_or_none(image.get('width')),
1561 'height': int_or_none(image.get('height')),
1562 })
1563
1564 yield {
1565 'formats': formats,
1566 'subtitles': subtitles,
1567 'thumbnails': thumbnails,
1568 'duration': int_or_none(get_binding_value(
1569 'content_duration_seconds')),
1570 }
1571
1572 videos = traverse_obj(status, (
1573 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1574
1575 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1576 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1577 else:
1578 desired_obj = traverse_obj(status, (
1579 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1580 if not desired_obj:
1581 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1582 elif desired_obj.get('type') != 'video':
1583 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1584
1585 # Restore original archive id and video index in title
1586 for index, entry in enumerate(videos, 1):
1587 if entry.get('id') != desired_obj.get('id'):
1588 continue
1589 if index == 1:
1590 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1591 if len(videos) != 1:
1592 info['title'] += f' #{index}'
1593 break
1594
1595 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1596
1597 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1598 if not entries:
1599 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1600 if not expanded_url or expanded_url == url:
1601 self.raise_no_formats('No video could be found in this tweet', expected=True)
1602 return info
1603
1604 return self.url_result(expanded_url, display_id=twid, **info)
1605
1606 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1607
1608 if len(entries) == 1:
1609 return entries[0]
1610
1611 for index, entry in enumerate(entries, 1):
1612 entry['title'] += f' #{index}'
1613
1614 return self.playlist_result(entries, **info)
1615
1616
1617 class TwitterAmplifyIE(TwitterBaseIE):
1618 IE_NAME = 'twitter:amplify'
1619 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1620
1621 _TEST = {
1622 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1623 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1624 'info_dict': {
1625 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1626 'ext': 'mp4',
1627 'title': 'Twitter Video',
1628 'thumbnail': 're:^https?://.*',
1629 },
1630 'params': {'format': '[protocol=https]'},
1631 }
1632
1633 def _real_extract(self, url):
1634 video_id = self._match_id(url)
1635 webpage = self._download_webpage(url, video_id)
1636
1637 vmap_url = self._html_search_meta(
1638 'twitter:amplify:vmap', webpage, 'vmap url')
1639 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1640
1641 thumbnails = []
1642 thumbnail = self._html_search_meta(
1643 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1644
1645 def _find_dimension(target):
1646 w = int_or_none(self._html_search_meta(
1647 'twitter:%s:width' % target, webpage, fatal=False))
1648 h = int_or_none(self._html_search_meta(
1649 'twitter:%s:height' % target, webpage, fatal=False))
1650 return w, h
1651
1652 if thumbnail:
1653 thumbnail_w, thumbnail_h = _find_dimension('image')
1654 thumbnails.append({
1655 'url': thumbnail,
1656 'width': thumbnail_w,
1657 'height': thumbnail_h,
1658 })
1659
1660 video_w, video_h = _find_dimension('player')
1661 formats[0].update({
1662 'width': video_w,
1663 'height': video_h,
1664 })
1665
1666 return {
1667 'id': video_id,
1668 'title': 'Twitter Video',
1669 'formats': formats,
1670 'thumbnails': thumbnails,
1671 }
1672
1673
1674 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1675 IE_NAME = 'twitter:broadcast'
1676 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1677
1678 _TESTS = [{
1679 # untitled Periscope video
1680 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1681 'info_dict': {
1682 'id': '1yNGaQLWpejGj',
1683 'ext': 'mp4',
1684 'title': 'Andrea May Sahouri - Periscope Broadcast',
1685 'uploader': 'Andrea May Sahouri',
1686 'uploader_id': 'andreamsahouri',
1687 'uploader_url': 'https://twitter.com/andreamsahouri',
1688 'timestamp': 1590973638,
1689 'upload_date': '20200601',
1690 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1691 'view_count': int,
1692 },
1693 }, {
1694 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1695 'info_dict': {
1696 'id': '1ZkKzeyrPbaxv',
1697 'ext': 'mp4',
1698 'title': 'Starship | SN10 | High-Altitude Flight Test',
1699 'uploader': 'SpaceX',
1700 'uploader_id': 'SpaceX',
1701 'uploader_url': 'https://twitter.com/SpaceX',
1702 'timestamp': 1614812942,
1703 'upload_date': '20210303',
1704 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1705 'view_count': int,
1706 },
1707 }, {
1708 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1709 'info_dict': {
1710 'id': '1OyKAVQrgzwGb',
1711 'ext': 'mp4',
1712 'title': 'Starship Flight Test',
1713 'uploader': 'SpaceX',
1714 'uploader_id': 'SpaceX',
1715 'uploader_url': 'https://twitter.com/SpaceX',
1716 'timestamp': 1681993964,
1717 'upload_date': '20230420',
1718 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1719 'view_count': int,
1720 },
1721 }]
1722
1723 def _real_extract(self, url):
1724 broadcast_id = self._match_id(url)
1725 broadcast = self._call_api(
1726 'broadcasts/show.json', broadcast_id,
1727 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1728 if not broadcast:
1729 raise ExtractorError('Broadcast no longer exists', expected=True)
1730 info = self._parse_broadcast_data(broadcast, broadcast_id)
1731 info['title'] = broadcast.get('status') or info.get('title')
1732 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1733 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1734 if info['live_status'] == 'is_upcoming':
1735 return info
1736
1737 media_key = broadcast['media_key']
1738 source = self._call_api(
1739 f'live_video_stream/status/{media_key}', media_key)['source']
1740 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1741 if '/live_video_stream/geoblocked/' in m3u8_url:
1742 self.raise_geo_restricted()
1743 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1744 m3u8_url).query).get('type', [None])[0]
1745 state, width, height = self._extract_common_format_info(broadcast)
1746 info['formats'] = self._extract_pscp_m3u8_formats(
1747 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1748 return info
1749
1750
1751 class TwitterSpacesIE(TwitterBaseIE):
1752 IE_NAME = 'twitter:spaces'
1753 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1754
1755 _TESTS = [{
1756 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1757 'info_dict': {
1758 'id': '1RDxlgyvNXzJL',
1759 'ext': 'm4a',
1760 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1761 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1762 'uploader': r're:Lucio Di Gaetano.*?',
1763 'uploader_id': 'luciodigaetano',
1764 'live_status': 'was_live',
1765 'timestamp': 1659877956,
1766 'upload_date': '20220807',
1767 'release_timestamp': 1659904215,
1768 'release_date': '20220807',
1769 },
1770 'params': {'skip_download': 'm3u8'},
1771 }, {
1772 # post_live/TimedOut but downloadable
1773 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1774 'info_dict': {
1775 'id': '1vAxRAVQWONJl',
1776 'ext': 'm4a',
1777 'title': 'Framing Up FinOps: Billing Tools',
1778 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1779 'uploader': 'Google Cloud',
1780 'uploader_id': 'googlecloud',
1781 'live_status': 'post_live',
1782 'timestamp': 1681409554,
1783 'upload_date': '20230413',
1784 'release_timestamp': 1681839000,
1785 'release_date': '20230418',
1786 },
1787 'params': {'skip_download': 'm3u8'},
1788 }, {
1789 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1790 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1791 'info_dict': {
1792 'id': '1eaKbrQbjoRKX',
1793 'ext': 'm4a',
1794 'title': 'あ',
1795 'description': 'Twitter Space participated by nobody yet',
1796 'uploader': '息根とめる🔪Twitchで復活',
1797 'uploader_id': 'tomeru_ikinone',
1798 'live_status': 'was_live',
1799 'timestamp': 1685617198,
1800 'upload_date': '20230601',
1801 },
1802 'params': {'skip_download': 'm3u8'},
1803 }]
1804
1805 SPACE_STATUS = {
1806 'notstarted': 'is_upcoming',
1807 'ended': 'was_live',
1808 'running': 'is_live',
1809 'timedout': 'post_live',
1810 }
1811
1812 def _build_graphql_query(self, space_id):
1813 return {
1814 'variables': {
1815 'id': space_id,
1816 'isMetatagsQuery': True,
1817 'withDownvotePerspective': False,
1818 'withReactionsMetadata': False,
1819 'withReactionsPerspective': False,
1820 'withReplays': True,
1821 'withSuperFollowsUserFields': True,
1822 'withSuperFollowsTweetFields': True,
1823 },
1824 'features': {
1825 'dont_mention_me_view_api_enabled': True,
1826 'interactive_text_enabled': True,
1827 'responsive_web_edit_tweet_api_enabled': True,
1828 'responsive_web_enhance_cards_enabled': True,
1829 'responsive_web_uc_gql_enabled': True,
1830 'spaces_2022_h2_clipping': True,
1831 'spaces_2022_h2_spaces_communities': False,
1832 'standardized_nudges_misinfo': True,
1833 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1834 'vibe_api_enabled': True,
1835 },
1836 }
1837
1838 def _real_extract(self, url):
1839 space_id = self._match_id(url)
1840 if not self.is_logged_in:
1841 self.raise_login_required('Twitter Spaces require authentication')
1842 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1843 if not space_data:
1844 raise ExtractorError('Twitter Space not found', expected=True)
1845
1846 metadata = space_data['metadata']
1847 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1848 is_live = live_status == 'is_live'
1849
1850 formats = []
1851 headers = {'Referer': 'https://twitter.com/'}
1852 if live_status == 'is_upcoming':
1853 self.raise_no_formats('Twitter Space not started yet', expected=True)
1854 elif not is_live and not metadata.get('is_space_available_for_replay'):
1855 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1856 elif metadata.get('media_key'):
1857 source = traverse_obj(
1858 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1859 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1860 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1861 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
1862 headers=headers, fatal=False) if source else []
1863 for fmt in formats:
1864 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1865 if not is_live:
1866 fmt['container'] = 'm4a_dash'
1867
1868 participants = ', '.join(traverse_obj(
1869 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1870
1871 if not formats and live_status == 'post_live':
1872 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1873
1874 return {
1875 'id': space_id,
1876 'title': metadata.get('title'),
1877 'description': f'Twitter Space participated by {participants}',
1878 'uploader': traverse_obj(
1879 metadata, ('creator_results', 'result', 'legacy', 'name')),
1880 'uploader_id': traverse_obj(
1881 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1882 'live_status': live_status,
1883 'release_timestamp': try_call(
1884 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1885 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1886 'formats': formats,
1887 'http_headers': headers,
1888 }
1889
1890
1891 class TwitterShortenerIE(TwitterBaseIE):
1892 IE_NAME = 'twitter:shortener'
1893 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1894 _BASE_URL = 'https://t.co/'
1895
1896 def _real_extract(self, url):
1897 mobj = self._match_valid_url(url)
1898 eid, id = mobj.group('eid', 'id')
1899 if eid:
1900 id = eid
1901 url = self._BASE_URL + id
1902 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
1903 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1904 if new_url.startswith(__UNSAFE_LINK):
1905 new_url = new_url.replace(__UNSAFE_LINK, "")
1906 return self.url_result(new_url)