]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[ie/motherless] Support uploader playlists (#8994)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import json
2 import random
3 import re
4
5 from .common import InfoExtractor
6 from .periscope import PeriscopeBaseIE, PeriscopeIE
7 from ..compat import functools # isort: split
8 from ..compat import (
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12 )
13 from ..networking.exceptions import HTTPError
14 from ..utils import (
15 ExtractorError,
16 dict_get,
17 filter_dict,
18 float_or_none,
19 format_field,
20 int_or_none,
21 make_archive_id,
22 remove_end,
23 str_or_none,
24 strip_or_none,
25 traverse_obj,
26 try_call,
27 try_get,
28 unified_timestamp,
29 update_url_query,
30 url_or_none,
31 xpath_text,
32 )
33
34
35 class TwitterBaseIE(InfoExtractor):
36 _NETRC_MACHINE = 'twitter'
37 _API_BASE = 'https://api.twitter.com/1.1/'
38 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
40 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
41 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
42 _flow_token = None
43
44 _LOGIN_INIT_DATA = json.dumps({
45 'input_flow_data': {
46 'flow_context': {
47 'debug_overrides': {},
48 'start_location': {
49 'location': 'unknown'
50 }
51 }
52 },
53 'subtask_versions': {
54 'action_list': 2,
55 'alert_dialog': 1,
56 'app_download_cta': 1,
57 'check_logged_in_account': 1,
58 'choice_selection': 3,
59 'contacts_live_sync_permission_prompt': 0,
60 'cta': 7,
61 'email_verification': 2,
62 'end_flow': 1,
63 'enter_date': 1,
64 'enter_email': 2,
65 'enter_password': 5,
66 'enter_phone': 2,
67 'enter_recaptcha': 1,
68 'enter_text': 5,
69 'enter_username': 2,
70 'generic_urt': 3,
71 'in_app_notification': 1,
72 'interest_picker': 3,
73 'js_instrumentation': 1,
74 'menu_dialog': 1,
75 'notifications_permission_prompt': 2,
76 'open_account': 2,
77 'open_home_timeline': 1,
78 'open_link': 1,
79 'phone_verification': 4,
80 'privacy_options': 1,
81 'security_key': 3,
82 'select_avatar': 4,
83 'select_banner': 2,
84 'settings_list': 7,
85 'show_code': 1,
86 'sign_up': 2,
87 'sign_up_review': 4,
88 'tweet_selection_urt': 1,
89 'update_users': 1,
90 'upload_media': 1,
91 'user_recommendations_list': 4,
92 'user_recommendations_urt': 1,
93 'wait_spinner': 3,
94 'web_modal': 1
95 }
96 }, separators=(',', ':')).encode()
97
98 def _extract_variant_formats(self, variant, video_id):
99 variant_url = variant.get('url')
100 if not variant_url:
101 return [], {}
102 elif '.m3u8' in variant_url:
103 return self._extract_m3u8_formats_and_subtitles(
104 variant_url, video_id, 'mp4', 'm3u8_native',
105 m3u8_id='hls', fatal=False)
106 else:
107 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
108 f = {
109 'url': variant_url,
110 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
111 'tbr': tbr,
112 }
113 self._search_dimensions_in_video_url(f, variant_url)
114 return [f], {}
115
116 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
117 vmap_url = url_or_none(vmap_url)
118 if not vmap_url:
119 return [], {}
120 vmap_data = self._download_xml(vmap_url, video_id)
121 formats = []
122 subtitles = {}
123 urls = []
124 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
125 video_variant.attrib['url'] = compat_urllib_parse_unquote(
126 video_variant.attrib['url'])
127 urls.append(video_variant.attrib['url'])
128 fmts, subs = self._extract_variant_formats(
129 video_variant.attrib, video_id)
130 formats.extend(fmts)
131 subtitles = self._merge_subtitles(subtitles, subs)
132 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
133 if video_url not in urls:
134 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
135 formats.extend(fmts)
136 subtitles = self._merge_subtitles(subtitles, subs)
137 return formats, subtitles
138
139 @staticmethod
140 def _search_dimensions_in_video_url(a_format, video_url):
141 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
142 if m:
143 a_format.update({
144 'width': int(m.group('width')),
145 'height': int(m.group('height')),
146 })
147
148 @property
149 def is_logged_in(self):
150 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
151
152 @functools.cached_property
153 def _selected_api(self):
154 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
155
156 def _fetch_guest_token(self, display_id):
157 guest_token = traverse_obj(self._download_json(
158 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
159 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
160 ('guest_token', {str}))
161 if not guest_token:
162 raise ExtractorError('Could not retrieve guest token')
163 return guest_token
164
165 def _set_base_headers(self, legacy=False):
166 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
167 return filter_dict({
168 'Authorization': f'Bearer {bearer_token}',
169 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
170 })
171
172 def _call_login_api(self, note, headers, query={}, data=None):
173 response = self._download_json(
174 f'{self._API_BASE}onboarding/task.json', None, note,
175 headers=headers, query=query, data=data, expected_status=400)
176 error = traverse_obj(response, ('errors', 0, 'message', {str}))
177 if error:
178 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
179 elif traverse_obj(response, 'status') != 'success':
180 raise ExtractorError('Login was unsuccessful')
181
182 subtask = traverse_obj(
183 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
184 if not subtask:
185 raise ExtractorError('Twitter API did not return next login subtask')
186
187 self._flow_token = response['flow_token']
188
189 return subtask
190
191 def _perform_login(self, username, password):
192 if self.is_logged_in:
193 return
194
195 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
196 guest_token = self._search_regex(
197 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
198 headers = {
199 **self._set_base_headers(),
200 'content-type': 'application/json',
201 'x-guest-token': guest_token,
202 'x-twitter-client-language': 'en',
203 'x-twitter-active-user': 'yes',
204 'Referer': 'https://twitter.com/',
205 'Origin': 'https://twitter.com',
206 }
207
208 def build_login_json(*subtask_inputs):
209 return json.dumps({
210 'flow_token': self._flow_token,
211 'subtask_inputs': subtask_inputs
212 }, separators=(',', ':')).encode()
213
214 def input_dict(subtask_id, text):
215 return {
216 'subtask_id': subtask_id,
217 'enter_text': {
218 'text': text,
219 'link': 'next_link'
220 }
221 }
222
223 next_subtask = self._call_login_api(
224 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
225
226 while not self.is_logged_in:
227 if next_subtask == 'LoginJsInstrumentationSubtask':
228 next_subtask = self._call_login_api(
229 'Submitting JS instrumentation response', headers, data=build_login_json({
230 'subtask_id': next_subtask,
231 'js_instrumentation': {
232 'response': '{}',
233 'link': 'next_link'
234 }
235 }))
236
237 elif next_subtask == 'LoginEnterUserIdentifierSSO':
238 next_subtask = self._call_login_api(
239 'Submitting username', headers, data=build_login_json({
240 'subtask_id': next_subtask,
241 'settings_list': {
242 'setting_responses': [{
243 'key': 'user_identifier',
244 'response_data': {
245 'text_data': {
246 'result': username
247 }
248 }
249 }],
250 'link': 'next_link'
251 }
252 }))
253
254 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
255 next_subtask = self._call_login_api(
256 'Submitting alternate identifier', headers,
257 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
258 'one of username, phone number or email that was not used as --username'))))
259
260 elif next_subtask == 'LoginEnterPassword':
261 next_subtask = self._call_login_api(
262 'Submitting password', headers, data=build_login_json({
263 'subtask_id': next_subtask,
264 'enter_password': {
265 'password': password,
266 'link': 'next_link'
267 }
268 }))
269
270 elif next_subtask == 'AccountDuplicationCheck':
271 next_subtask = self._call_login_api(
272 'Submitting account duplication check', headers, data=build_login_json({
273 'subtask_id': next_subtask,
274 'check_logged_in_account': {
275 'link': 'AccountDuplicationCheck_false'
276 }
277 }))
278
279 elif next_subtask == 'LoginTwoFactorAuthChallenge':
280 next_subtask = self._call_login_api(
281 'Submitting 2FA token', headers, data=build_login_json(input_dict(
282 next_subtask, self._get_tfa_info('two-factor authentication token'))))
283
284 elif next_subtask == 'LoginAcid':
285 next_subtask = self._call_login_api(
286 'Submitting confirmation code', headers, data=build_login_json(input_dict(
287 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
288
289 elif next_subtask == 'ArkoseLogin':
290 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
291
292 elif next_subtask == 'DenyLoginSubtask':
293 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
294
295 elif next_subtask == 'LoginSuccessSubtask':
296 raise ExtractorError('Twitter API did not grant auth token cookie')
297
298 else:
299 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
300
301 self.report_login()
302
303 def _call_api(self, path, video_id, query={}, graphql=False):
304 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
305 headers.update({
306 'x-twitter-auth-type': 'OAuth2Session',
307 'x-twitter-client-language': 'en',
308 'x-twitter-active-user': 'yes',
309 } if self.is_logged_in else {
310 'x-guest-token': self._fetch_guest_token(video_id)
311 })
312 allowed_status = {400, 401, 403, 404} if graphql else {403}
313 result = self._download_json(
314 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
315 video_id, headers=headers, query=query, expected_status=allowed_status,
316 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
317
318 if result.get('errors'):
319 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
320 if errors and 'not authorized' in errors:
321 self.raise_login_required(remove_end(errors, '.'))
322 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
323
324 return result
325
326 def _build_graphql_query(self, media_id):
327 raise NotImplementedError('Method must be implemented to support GraphQL')
328
329 def _call_graphql_api(self, endpoint, media_id):
330 data = self._build_graphql_query(media_id)
331 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
332 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
333
334
335 class TwitterCardIE(InfoExtractor):
336 IE_NAME = 'twitter:card'
337 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
338 _TESTS = [
339 {
340 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
341 # MD5 checksums are different in different places
342 'info_dict': {
343 'id': '560070131976392705',
344 'ext': 'mp4',
345 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
346 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
347 'uploader': 'Twitter',
348 'uploader_id': 'Twitter',
349 'thumbnail': r're:^https?://.*\.jpg',
350 'duration': 30.033,
351 'timestamp': 1422366112,
352 'upload_date': '20150127',
353 'age_limit': 0,
354 'comment_count': int,
355 'tags': [],
356 'repost_count': int,
357 'like_count': int,
358 'display_id': '560070183650213889',
359 'uploader_url': 'https://twitter.com/Twitter',
360 },
361 },
362 {
363 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
364 'md5': '7137eca597f72b9abbe61e5ae0161399',
365 'info_dict': {
366 'id': '623160978427936768',
367 'ext': 'mp4',
368 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
369 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
370 'uploader': 'NASA',
371 'uploader_id': 'NASA',
372 'timestamp': 1437408129,
373 'upload_date': '20150720',
374 'uploader_url': 'https://twitter.com/NASA',
375 'age_limit': 0,
376 'comment_count': int,
377 'like_count': int,
378 'repost_count': int,
379 'tags': ['PlutoFlyby'],
380 },
381 'params': {'format': '[protocol=https]'}
382 },
383 {
384 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
385 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
386 'info_dict': {
387 'id': 'dq4Oj5quskI',
388 'ext': 'mp4',
389 'title': 'Ubuntu 11.10 Overview',
390 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
391 'upload_date': '20111013',
392 'uploader': 'OMG! UBUNTU!',
393 'uploader_id': 'omgubuntu',
394 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
395 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
396 'channel_follower_count': int,
397 'chapters': 'count:8',
398 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
399 'duration': 138,
400 'categories': ['Film & Animation'],
401 'age_limit': 0,
402 'comment_count': int,
403 'availability': 'public',
404 'like_count': int,
405 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
406 'view_count': int,
407 'tags': 'count:12',
408 'channel': 'OMG! UBUNTU!',
409 'playable_in_embed': True,
410 },
411 'add_ie': ['Youtube'],
412 },
413 {
414 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
415 'info_dict': {
416 'id': 'iBb2x00UVlv',
417 'ext': 'mp4',
418 'upload_date': '20151113',
419 'uploader_id': '1189339351084113920',
420 'uploader': 'ArsenalTerje',
421 'title': 'Vine by ArsenalTerje',
422 'timestamp': 1447451307,
423 'alt_title': 'Vine by ArsenalTerje',
424 'comment_count': int,
425 'like_count': int,
426 'thumbnail': r're:^https?://[^?#]+\.jpg',
427 'view_count': int,
428 'repost_count': int,
429 },
430 'add_ie': ['Vine'],
431 'params': {'skip_download': 'm3u8'},
432 },
433 {
434 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
435 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
436 'info_dict': {
437 'id': '705235433198714880',
438 'ext': 'mp4',
439 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
440 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
441 'uploader': 'Brent Yarina',
442 'uploader_id': 'BTNBrentYarina',
443 'timestamp': 1456976204,
444 'upload_date': '20160303',
445 },
446 'skip': 'This content is no longer available.',
447 },
448 {
449 'url': 'https://twitter.com/i/videos/752274308186120192',
450 'only_matching': True,
451 },
452 ]
453
454 def _real_extract(self, url):
455 status_id = self._match_id(url)
456 return self.url_result(
457 'https://twitter.com/statuses/' + status_id,
458 TwitterIE.ie_key(), status_id)
459
460
461 class TwitterIE(TwitterBaseIE):
462 IE_NAME = 'twitter'
463 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
464
465 _TESTS = [{
466 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
467 'info_dict': {
468 'id': '643211870443208704',
469 'display_id': '643211948184596480',
470 'ext': 'mp4',
471 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
472 'thumbnail': r're:^https?://.*\.jpg',
473 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
474 'uploader': 'FREE THE NIPPLE',
475 'uploader_id': 'freethenipple',
476 'duration': 12.922,
477 'timestamp': 1442188653,
478 'upload_date': '20150913',
479 'uploader_url': 'https://twitter.com/freethenipple',
480 'comment_count': int,
481 'repost_count': int,
482 'like_count': int,
483 'tags': [],
484 'age_limit': 18,
485 '_old_archive_ids': ['twitter 643211948184596480'],
486 },
487 }, {
488 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
489 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
490 'info_dict': {
491 'id': '657991469417025536',
492 'ext': 'mp4',
493 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
494 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
495 'thumbnail': r're:^https?://.*\.png',
496 'uploader': 'Gifs',
497 'uploader_id': 'giphz',
498 },
499 'expected_warnings': ['height', 'width'],
500 'skip': 'Account suspended',
501 }, {
502 'url': 'https://twitter.com/starwars/status/665052190608723968',
503 'info_dict': {
504 'id': '665052190608723968',
505 'display_id': '665052190608723968',
506 'ext': 'mp4',
507 'title': r're:Star Wars.*A new beginning is coming December 18.*',
508 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
509 'uploader_id': 'starwars',
510 'uploader': r're:Star Wars.*',
511 'timestamp': 1447395772,
512 'upload_date': '20151113',
513 'uploader_url': 'https://twitter.com/starwars',
514 'comment_count': int,
515 'repost_count': int,
516 'like_count': int,
517 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
518 'age_limit': 0,
519 '_old_archive_ids': ['twitter 665052190608723968'],
520 },
521 }, {
522 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
523 'info_dict': {
524 'id': '705235433198714880',
525 'ext': 'mp4',
526 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
527 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
528 'uploader_id': 'BTNBrentYarina',
529 'uploader': 'Brent Yarina',
530 'timestamp': 1456976204,
531 'upload_date': '20160303',
532 'uploader_url': 'https://twitter.com/BTNBrentYarina',
533 'comment_count': int,
534 'repost_count': int,
535 'like_count': int,
536 'tags': [],
537 'age_limit': 0,
538 },
539 'params': {
540 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
541 # Test case of TwitterCardIE
542 'skip_download': True,
543 },
544 'skip': 'Dead external link',
545 }, {
546 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
547 'info_dict': {
548 'id': '700207414000242688',
549 'display_id': '700207533655363584',
550 'ext': 'mp4',
551 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
552 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
553 'thumbnail': r're:^https?://.*\.jpg',
554 'uploader': 'jaydin donte geer',
555 'uploader_id': 'jaydingeer',
556 'duration': 30.0,
557 'timestamp': 1455777459,
558 'upload_date': '20160218',
559 'uploader_url': 'https://twitter.com/jaydingeer',
560 'comment_count': int,
561 'repost_count': int,
562 'like_count': int,
563 'tags': ['Damndaniel'],
564 'age_limit': 0,
565 '_old_archive_ids': ['twitter 700207533655363584'],
566 },
567 }, {
568 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
569 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
570 'info_dict': {
571 'id': 'MIOxnrUteUd',
572 'ext': 'mp4',
573 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
574 'uploader': 'TAKUMA',
575 'uploader_id': '1004126642786242560',
576 'timestamp': 1402826626,
577 'upload_date': '20140615',
578 'thumbnail': r're:^https?://.*\.jpg',
579 'alt_title': 'Vine by TAKUMA',
580 'comment_count': int,
581 'repost_count': int,
582 'like_count': int,
583 'view_count': int,
584 },
585 'add_ie': ['Vine'],
586 }, {
587 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
588 'info_dict': {
589 'id': '717462543795523584',
590 'display_id': '719944021058060289',
591 'ext': 'mp4',
592 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
593 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
594 'uploader_id': 'CaptainAmerica',
595 'uploader': 'Captain America',
596 'duration': 3.17,
597 'timestamp': 1460483005,
598 'upload_date': '20160412',
599 'uploader_url': 'https://twitter.com/CaptainAmerica',
600 'thumbnail': r're:^https?://.*\.jpg',
601 'comment_count': int,
602 'repost_count': int,
603 'like_count': int,
604 'tags': [],
605 'age_limit': 0,
606 '_old_archive_ids': ['twitter 719944021058060289'],
607 },
608 }, {
609 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
610 'info_dict': {
611 'id': '1zqKVVlkqLaKB',
612 'ext': 'mp4',
613 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
614 'upload_date': '20160923',
615 'uploader_id': '1PmKqpJdOJQoY',
616 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
617 'timestamp': 1474613214,
618 'thumbnail': r're:^https?://.*\.jpg',
619 },
620 'add_ie': ['Periscope'],
621 'skip': 'Broadcast not found',
622 }, {
623 # has mp4 formats via mobile API
624 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
625 'info_dict': {
626 'id': '852077943283097602',
627 'ext': 'mp4',
628 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
629 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
630 'uploader': 'عالم الأخبار',
631 'uploader_id': 'news_al3alm',
632 'duration': 277.4,
633 'timestamp': 1492000653,
634 'upload_date': '20170412',
635 'display_id': '852138619213144067',
636 'age_limit': 0,
637 'uploader_url': 'https://twitter.com/news_al3alm',
638 'thumbnail': r're:^https?://.*\.jpg',
639 'tags': [],
640 'repost_count': int,
641 'like_count': int,
642 'comment_count': int,
643 '_old_archive_ids': ['twitter 852138619213144067'],
644 },
645 }, {
646 'url': 'https://twitter.com/i/web/status/910031516746514432',
647 'info_dict': {
648 'id': '910030238373089285',
649 'display_id': '910031516746514432',
650 'ext': 'mp4',
651 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
652 'thumbnail': r're:^https?://.*\.jpg',
653 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
654 'uploader': 'Préfet de Guadeloupe',
655 'uploader_id': 'Prefet971',
656 'duration': 47.48,
657 'timestamp': 1505803395,
658 'upload_date': '20170919',
659 'uploader_url': 'https://twitter.com/Prefet971',
660 'comment_count': int,
661 'repost_count': int,
662 'like_count': int,
663 'tags': ['Maria'],
664 'age_limit': 0,
665 '_old_archive_ids': ['twitter 910031516746514432'],
666 },
667 'params': {
668 'skip_download': True, # requires ffmpeg
669 },
670 }, {
671 # card via api.twitter.com/1.1/videos/tweet/config
672 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
673 'info_dict': {
674 'id': '1001551417340022785',
675 'display_id': '1001551623938805763',
676 'ext': 'mp4',
677 'title': 're:.*?Shep is on a roll today.*?',
678 'thumbnail': r're:^https?://.*\.jpg',
679 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
680 'uploader': 'Lis Power',
681 'uploader_id': 'LisPower1',
682 'duration': 111.278,
683 'timestamp': 1527623489,
684 'upload_date': '20180529',
685 'uploader_url': 'https://twitter.com/LisPower1',
686 'comment_count': int,
687 'repost_count': int,
688 'like_count': int,
689 'tags': [],
690 'age_limit': 0,
691 '_old_archive_ids': ['twitter 1001551623938805763'],
692 },
693 'params': {
694 'skip_download': True, # requires ffmpeg
695 },
696 }, {
697 'url': 'https://twitter.com/foobar/status/1087791357756956680',
698 'info_dict': {
699 'id': '1087791272830607360',
700 'display_id': '1087791357756956680',
701 'ext': 'mp4',
702 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
703 'thumbnail': r're:^https?://.*\.jpg',
704 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
705 'uploader': 'X',
706 'uploader_id': 'X',
707 'duration': 61.567,
708 'timestamp': 1548184644,
709 'upload_date': '20190122',
710 'uploader_url': 'https://twitter.com/X',
711 'comment_count': int,
712 'repost_count': int,
713 'like_count': int,
714 'view_count': int,
715 'tags': [],
716 'age_limit': 0,
717 },
718 'skip': 'This Tweet is unavailable',
719 }, {
720 # not available in Periscope
721 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
722 'info_dict': {
723 'id': '1vOGwqejwoWxB',
724 'ext': 'mp4',
725 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
726 'uploader': 'Vivi',
727 'uploader_id': '1eVjYOLGkGrQL',
728 'thumbnail': r're:^https?://.*\.jpg',
729 'tags': ['EduTECH2019'],
730 'view_count': int,
731 },
732 'add_ie': ['TwitterBroadcast'],
733 'skip': 'Broadcast no longer exists',
734 }, {
735 # unified card
736 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
737 'info_dict': {
738 'id': '1349774757969989634',
739 'display_id': '1349794411333394432',
740 'ext': 'mp4',
741 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
742 'thumbnail': r're:^https?://.*\.jpg',
743 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
744 'uploader': 'Brooklyn Nets',
745 'uploader_id': 'BrooklynNets',
746 'duration': 324.484,
747 'timestamp': 1610651040,
748 'upload_date': '20210114',
749 'uploader_url': 'https://twitter.com/BrooklynNets',
750 'comment_count': int,
751 'repost_count': int,
752 'like_count': int,
753 'tags': [],
754 'age_limit': 0,
755 '_old_archive_ids': ['twitter 1349794411333394432'],
756 },
757 'params': {
758 'skip_download': True,
759 },
760 }, {
761 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
762 'info_dict': {
763 'id': '1577855447914409984',
764 'display_id': '1577855540407197696',
765 'ext': 'mp4',
766 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
767 'description': 'md5:b9c3699335447391d11753ab21c70a74',
768 'upload_date': '20221006',
769 'uploader': 'oshtru',
770 'uploader_id': 'oshtru',
771 'uploader_url': 'https://twitter.com/oshtru',
772 'thumbnail': r're:^https?://.*\.jpg',
773 'duration': 30.03,
774 'timestamp': 1665025050,
775 'comment_count': int,
776 'repost_count': int,
777 'like_count': int,
778 'tags': [],
779 'age_limit': 0,
780 '_old_archive_ids': ['twitter 1577855540407197696'],
781 },
782 'params': {'skip_download': True},
783 }, {
784 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
785 'info_dict': {
786 'id': '1577719286659006464',
787 'title': 'Ultima - Test',
788 'description': 'Test https://t.co/Y3KEZD7Dad',
789 'uploader': 'Ultima',
790 'uploader_id': 'UltimaShadowX',
791 'uploader_url': 'https://twitter.com/UltimaShadowX',
792 'upload_date': '20221005',
793 'timestamp': 1664992565,
794 'comment_count': int,
795 'repost_count': int,
796 'like_count': int,
797 'tags': [],
798 'age_limit': 0,
799 },
800 'playlist_count': 4,
801 'params': {'skip_download': True},
802 }, {
803 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
804 'info_dict': {
805 'id': '1575559336759263233',
806 'display_id': '1575560063510810624',
807 'ext': 'mp4',
808 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
809 'thumbnail': r're:^https?://.*\.jpg',
810 'description': 'md5:95aea692fda36a12081b9629b02daa92',
811 'uploader': 'Max Olson',
812 'uploader_id': 'MesoMax919',
813 'uploader_url': 'https://twitter.com/MesoMax919',
814 'duration': 21.321,
815 'timestamp': 1664477766,
816 'upload_date': '20220929',
817 'comment_count': int,
818 'repost_count': int,
819 'like_count': int,
820 'tags': ['HurricaneIan'],
821 'age_limit': 0,
822 '_old_archive_ids': ['twitter 1575560063510810624'],
823 },
824 }, {
825 # Adult content, fails if not logged in
826 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
827 'info_dict': {
828 'id': '1575199163847000068',
829 'display_id': '1575199173472927762',
830 'ext': 'mp4',
831 'title': str,
832 'description': str,
833 'uploader': str,
834 'uploader_id': 'Rizdraws',
835 'uploader_url': 'https://twitter.com/Rizdraws',
836 'upload_date': '20220928',
837 'timestamp': 1664391723,
838 'thumbnail': r're:^https?://.+\.jpg',
839 'like_count': int,
840 'repost_count': int,
841 'comment_count': int,
842 'age_limit': 18,
843 'tags': []
844 },
845 'params': {'skip_download': 'The media could not be played'},
846 'skip': 'Requires authentication',
847 }, {
848 # Playlist result only with graphql API
849 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
850 'playlist_mincount': 2,
851 'info_dict': {
852 'id': '1395079556562706435',
853 'title': str,
854 'tags': [],
855 'uploader': str,
856 'like_count': int,
857 'upload_date': '20210519',
858 'age_limit': 0,
859 'repost_count': int,
860 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
861 'uploader_id': 'Srirachachau',
862 'comment_count': int,
863 'uploader_url': 'https://twitter.com/Srirachachau',
864 'timestamp': 1621447860,
865 },
866 }, {
867 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
868 'playlist_mincount': 2,
869 'info_dict': {
870 'id': '1578353380363501568',
871 'title': str,
872 'uploader_id': 'DavidToons_',
873 'repost_count': int,
874 'like_count': int,
875 'uploader': str,
876 'timestamp': 1665143744,
877 'uploader_url': 'https://twitter.com/DavidToons_',
878 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
879 'tags': [],
880 'comment_count': int,
881 'upload_date': '20221007',
882 'age_limit': 0,
883 },
884 }, {
885 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
886 'playlist_count': 2,
887 'info_dict': {
888 'id': '1578401165338976258',
889 'title': str,
890 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
891 'uploader': str,
892 'uploader_id': 'primevideouk',
893 'timestamp': 1665155137,
894 'upload_date': '20221007',
895 'age_limit': 0,
896 'uploader_url': 'https://twitter.com/primevideouk',
897 'comment_count': int,
898 'repost_count': int,
899 'like_count': int,
900 'tags': ['TheRingsOfPower'],
901 },
902 }, {
903 # Twitter Spaces
904 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
905 'info_dict': {
906 'id': '1lPJqmBeeNAJb',
907 'ext': 'm4a',
908 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
909 'uploader': r're:Monique Camarra.+?',
910 'uploader_id': 'MoniqueCamarra',
911 'live_status': 'was_live',
912 'release_timestamp': 1658417414,
913 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
914 'timestamp': 1658407771,
915 'release_date': '20220721',
916 'upload_date': '20220721',
917 },
918 'add_ie': ['TwitterSpaces'],
919 'params': {'skip_download': 'm3u8'},
920 'skip': 'Requires authentication',
921 }, {
922 # URL specifies video number but --yes-playlist
923 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
924 'playlist_mincount': 2,
925 'info_dict': {
926 'id': '1600649710662213632',
927 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
928 'timestamp': 1670459604.0,
929 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
930 'comment_count': int,
931 'uploader_id': 'CTVJLaidlaw',
932 'repost_count': int,
933 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
934 'upload_date': '20221208',
935 'age_limit': 0,
936 'uploader': 'Jocelyn Laidlaw',
937 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
938 'like_count': int,
939 },
940 }, {
941 # URL specifies video number and --no-playlist
942 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
943 'info_dict': {
944 'id': '1600649511827013632',
945 'ext': 'mp4',
946 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
947 'thumbnail': r're:^https?://.+\.jpg',
948 'timestamp': 1670459604.0,
949 'uploader_id': 'CTVJLaidlaw',
950 'uploader': 'Jocelyn Laidlaw',
951 'repost_count': int,
952 'comment_count': int,
953 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
954 'duration': 102.226,
955 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
956 'display_id': '1600649710662213632',
957 'like_count': int,
958 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
959 'upload_date': '20221208',
960 'age_limit': 0,
961 '_old_archive_ids': ['twitter 1600649710662213632'],
962 },
963 'params': {'noplaylist': True},
964 }, {
965 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
966 # note the id different between extraction and url
967 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
968 'info_dict': {
969 'id': '1621117577354424321',
970 'display_id': '1621117700482416640',
971 'ext': 'mp4',
972 'title': '뽀 - 아 최우제 이동속도 봐',
973 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
974 'duration': 24.598,
975 'uploader': '뽀',
976 'uploader_id': 's2FAKER',
977 'uploader_url': 'https://twitter.com/s2FAKER',
978 'upload_date': '20230202',
979 'timestamp': 1675339553.0,
980 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
981 'age_limit': 18,
982 'tags': [],
983 'like_count': int,
984 'repost_count': int,
985 'comment_count': int,
986 '_old_archive_ids': ['twitter 1621117700482416640'],
987 },
988 }, {
989 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
990 'info_dict': {
991 'id': '1599108643743473680',
992 'display_id': '1599108751385972737',
993 'ext': 'mp4',
994 'title': '\u06ea - \U0001F48B',
995 'uploader_url': 'https://twitter.com/hlo_again',
996 'like_count': int,
997 'uploader_id': 'hlo_again',
998 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
999 'repost_count': int,
1000 'duration': 9.531,
1001 'comment_count': int,
1002 'upload_date': '20221203',
1003 'age_limit': 0,
1004 'timestamp': 1670092210.0,
1005 'tags': [],
1006 'uploader': '\u06ea',
1007 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1008 '_old_archive_ids': ['twitter 1599108751385972737'],
1009 },
1010 'params': {'noplaylist': True},
1011 }, {
1012 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1013 'info_dict': {
1014 'id': '1600009362759733248',
1015 'display_id': '1600009574919962625',
1016 'ext': 'mp4',
1017 'uploader_url': 'https://twitter.com/MunTheShinobi',
1018 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1019 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1020 'age_limit': 0,
1021 'uploader': 'Mün',
1022 'repost_count': int,
1023 'upload_date': '20221206',
1024 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1025 'comment_count': int,
1026 'like_count': int,
1027 'tags': [],
1028 'uploader_id': 'MunTheShinobi',
1029 'duration': 139.987,
1030 'timestamp': 1670306984.0,
1031 '_old_archive_ids': ['twitter 1600009574919962625'],
1032 },
1033 }, {
1034 # retweeted_status (private)
1035 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1036 'info_dict': {
1037 'id': '1623274794488659969',
1038 'display_id': '1623739803874349067',
1039 'ext': 'mp4',
1040 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1041 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1042 'uploader': 'Johnny Bullets',
1043 'uploader_id': 'Johnnybull3ts',
1044 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1045 'age_limit': 0,
1046 'tags': [],
1047 'duration': 8.033,
1048 'timestamp': 1675853859.0,
1049 'upload_date': '20230208',
1050 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1051 'like_count': int,
1052 'repost_count': int,
1053 },
1054 'skip': 'Protected tweet',
1055 }, {
1056 # retweeted_status
1057 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1058 'info_dict': {
1059 'id': '1694928337846538240',
1060 'ext': 'mp4',
1061 'display_id': '1695424220702888009',
1062 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1063 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1064 'uploader': 'Benny Johnson',
1065 'uploader_id': 'bennyjohnson',
1066 'uploader_url': 'https://twitter.com/bennyjohnson',
1067 'age_limit': 0,
1068 'tags': [],
1069 'duration': 45.001,
1070 'timestamp': 1692962814.0,
1071 'upload_date': '20230825',
1072 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1073 'like_count': int,
1074 'repost_count': int,
1075 'comment_count': int,
1076 '_old_archive_ids': ['twitter 1695424220702888009'],
1077 },
1078 }, {
1079 # retweeted_status w/ legacy API
1080 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1081 'info_dict': {
1082 'id': '1694928337846538240',
1083 'ext': 'mp4',
1084 'display_id': '1695424220702888009',
1085 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1086 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1087 'uploader': 'Benny Johnson',
1088 'uploader_id': 'bennyjohnson',
1089 'uploader_url': 'https://twitter.com/bennyjohnson',
1090 'age_limit': 0,
1091 'tags': [],
1092 'duration': 45.001,
1093 'timestamp': 1692962814.0,
1094 'upload_date': '20230825',
1095 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1096 'like_count': int,
1097 'repost_count': int,
1098 '_old_archive_ids': ['twitter 1695424220702888009'],
1099 },
1100 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1101 }, {
1102 # Broadcast embedded in tweet
1103 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1104 'info_dict': {
1105 'id': '1rmxPMjLzAXKN',
1106 'ext': 'mp4',
1107 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1108 'uploader': 'Jessica Dobson',
1109 'uploader_id': 'JessicaDobsonWX',
1110 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1111 'timestamp': 1701566398,
1112 'upload_date': '20231203',
1113 'live_status': 'was_live',
1114 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1115 'concurrent_view_count': int,
1116 'view_count': int,
1117 },
1118 'add_ie': ['TwitterBroadcast'],
1119 }, {
1120 # Animated gif and quote tweet video, with syndication API
1121 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1122 'playlist_mincount': 2,
1123 'info_dict': {
1124 'id': '1696256659889565950',
1125 'title': 'BAKOON - https://t.co/zom968d0a0',
1126 'description': 'https://t.co/zom968d0a0',
1127 'tags': [],
1128 'uploader': 'BAKOON',
1129 'uploader_id': 'BAKKOOONN',
1130 'uploader_url': 'https://twitter.com/BAKKOOONN',
1131 'age_limit': 18,
1132 'timestamp': 1693254077.0,
1133 'upload_date': '20230828',
1134 'like_count': int,
1135 },
1136 'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
1137 'expected_warnings': ['Not all metadata'],
1138 }, {
1139 # "stale tweet" with typename "TweetWithVisibilityResults"
1140 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1141 'md5': '62b1e11cdc2cdd0e527f83adb081f536',
1142 'info_dict': {
1143 'id': '1724883339285544960',
1144 'ext': 'mp4',
1145 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1146 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1147 'display_id': '1724884212803834154',
1148 'uploader': 'Robert F. Kennedy Jr',
1149 'uploader_id': 'RobertKennedyJr',
1150 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1151 'upload_date': '20231115',
1152 'timestamp': 1700079417.0,
1153 'duration': 341.048,
1154 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1155 'tags': ['Kennedy24'],
1156 'repost_count': int,
1157 'like_count': int,
1158 'comment_count': int,
1159 'age_limit': 0,
1160 '_old_archive_ids': ['twitter 1724884212803834154'],
1161 },
1162 }, {
1163 # onion route
1164 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1165 'only_matching': True,
1166 }, {
1167 # Twitch Clip Embed
1168 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1169 'only_matching': True,
1170 }, {
1171 # promo_video_website card
1172 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1173 'only_matching': True,
1174 }, {
1175 # promo_video_convo card
1176 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1177 'only_matching': True,
1178 }, {
1179 # appplayer card
1180 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1181 'only_matching': True,
1182 }, {
1183 # video_direct_message card
1184 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1185 'only_matching': True,
1186 }, {
1187 # poll2choice_video card
1188 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1189 'only_matching': True,
1190 }, {
1191 # poll3choice_video card
1192 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1193 'only_matching': True,
1194 }, {
1195 # poll4choice_video card
1196 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1197 'only_matching': True,
1198 }]
1199
1200 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1201
1202 @property
1203 def _GRAPHQL_ENDPOINT(self):
1204 if self.is_logged_in:
1205 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1206 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1207
1208 def _graphql_to_legacy(self, data, twid):
1209 result = traverse_obj(data, (
1210 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1211 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1212 'tweet_results', 'result', ('tweet', None), {dict},
1213 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1214 data, ('tweetResult', 'result', {dict}), default={})
1215
1216 typename = result.get('__typename')
1217 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1218 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1219
1220 if 'tombstone' in result:
1221 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1222 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1223 elif typename == 'TweetUnavailable':
1224 reason = result.get('reason')
1225 if reason == 'NsfwLoggedOut':
1226 self.raise_login_required('NSFW tweet requires authentication')
1227 elif reason == 'Protected':
1228 self.raise_login_required('You are not authorized to view this protected tweet')
1229 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1230 # Result for "stale tweet" needs additional transformation
1231 elif typename == 'TweetWithVisibilityResults':
1232 result = traverse_obj(result, ('tweet', {dict})) or {}
1233
1234 status = result.get('legacy', {})
1235 status.update(traverse_obj(result, {
1236 'user': ('core', 'user_results', 'result', 'legacy'),
1237 'card': ('card', 'legacy'),
1238 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1239 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1240 }, expected_type=dict, default={}))
1241
1242 # extra transformations needed since result does not match legacy format
1243 if status.get('retweeted_status'):
1244 status['retweeted_status']['user'] = traverse_obj(status, (
1245 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1246
1247 binding_values = {
1248 binding_value.get('key'): binding_value.get('value')
1249 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1250 }
1251 if binding_values:
1252 status['card']['binding_values'] = binding_values
1253
1254 return status
1255
1256 def _build_graphql_query(self, media_id):
1257 return {
1258 'variables': {
1259 'focalTweetId': media_id,
1260 'includePromotedContent': True,
1261 'with_rux_injections': False,
1262 'withBirdwatchNotes': True,
1263 'withCommunity': True,
1264 'withDownvotePerspective': False,
1265 'withQuickPromoteEligibilityTweetFields': True,
1266 'withReactionsMetadata': False,
1267 'withReactionsPerspective': False,
1268 'withSuperFollowsTweetFields': True,
1269 'withSuperFollowsUserFields': True,
1270 'withV2Timeline': True,
1271 'withVoice': True,
1272 },
1273 'features': {
1274 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1275 'interactive_text_enabled': True,
1276 'responsive_web_edit_tweet_api_enabled': True,
1277 'responsive_web_enhance_cards_enabled': True,
1278 'responsive_web_graphql_timeline_navigation_enabled': False,
1279 'responsive_web_text_conversations_enabled': False,
1280 'responsive_web_uc_gql_enabled': True,
1281 'standardized_nudges_misinfo': True,
1282 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1283 'tweetypie_unmention_optimization_enabled': True,
1284 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1285 'verified_phone_label_enabled': False,
1286 'vibe_api_enabled': True,
1287 },
1288 } if self.is_logged_in else {
1289 'variables': {
1290 'tweetId': media_id,
1291 'withCommunity': False,
1292 'includePromotedContent': False,
1293 'withVoice': False,
1294 },
1295 'features': {
1296 'creator_subscriptions_tweet_preview_api_enabled': True,
1297 'tweetypie_unmention_optimization_enabled': True,
1298 'responsive_web_edit_tweet_api_enabled': True,
1299 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1300 'view_counts_everywhere_api_enabled': True,
1301 'longform_notetweets_consumption_enabled': True,
1302 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1303 'tweet_awards_web_tipping_enabled': False,
1304 'freedom_of_speech_not_reach_fetch_enabled': True,
1305 'standardized_nudges_misinfo': True,
1306 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1307 'longform_notetweets_rich_text_read_enabled': True,
1308 'longform_notetweets_inline_media_enabled': True,
1309 'responsive_web_graphql_exclude_directive_enabled': True,
1310 'verified_phone_label_enabled': False,
1311 'responsive_web_media_download_video_enabled': False,
1312 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1313 'responsive_web_graphql_timeline_navigation_enabled': True,
1314 'responsive_web_enhance_cards_enabled': False
1315 },
1316 'fieldToggles': {
1317 'withArticleRichContentState': False
1318 }
1319 }
1320
1321 def _call_syndication_api(self, twid):
1322 self.report_warning(
1323 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1324 status = self._download_json(
1325 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1326 headers={'User-Agent': 'Googlebot'}, query={
1327 'id': twid,
1328 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1329 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1330 })
1331 if not status:
1332 raise ExtractorError('Syndication endpoint returned empty JSON response')
1333 # Transform the result so its structure matches that of legacy/graphql
1334 media = []
1335 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1336 detail['id_str'] = traverse_obj(detail, (
1337 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1338 media.append(detail)
1339 status['extended_entities'] = {'media': media}
1340
1341 return status
1342
1343 def _extract_status(self, twid):
1344 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1345 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1346
1347 try:
1348 if self.is_logged_in or self._selected_api == 'graphql':
1349 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1350 elif self._selected_api == 'legacy':
1351 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1352 'cards_platform': 'Web-12',
1353 'include_cards': 1,
1354 'include_reply_count': 1,
1355 'include_user_entities': 0,
1356 'tweet_mode': 'extended',
1357 })
1358 except ExtractorError as e:
1359 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1360 raise
1361 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1362 status = self._call_syndication_api(twid)
1363
1364 if self._selected_api == 'syndication':
1365 status = self._call_syndication_api(twid)
1366
1367 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1368
1369 def _real_extract(self, url):
1370 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1371 status = self._extract_status(twid)
1372
1373 title = description = traverse_obj(
1374 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1375 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1376 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1377 user = status.get('user') or {}
1378 uploader = user.get('name')
1379 if uploader:
1380 title = f'{uploader} - {title}'
1381 uploader_id = user.get('screen_name')
1382
1383 info = {
1384 'id': twid,
1385 'title': title,
1386 'description': description,
1387 'uploader': uploader,
1388 'timestamp': unified_timestamp(status.get('created_at')),
1389 'uploader_id': uploader_id,
1390 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1391 'like_count': int_or_none(status.get('favorite_count')),
1392 'repost_count': int_or_none(status.get('retweet_count')),
1393 'comment_count': int_or_none(status.get('reply_count')),
1394 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1395 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1396 }
1397
1398 def extract_from_video_info(media):
1399 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1400 self.write_debug(f'Extracting from video info: {media_id}')
1401
1402 formats = []
1403 subtitles = {}
1404 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1405 fmts, subs = self._extract_variant_formats(variant, twid)
1406 subtitles = self._merge_subtitles(subtitles, subs)
1407 formats.extend(fmts)
1408
1409 thumbnails = []
1410 media_url = media.get('media_url_https') or media.get('media_url')
1411 if media_url:
1412 def add_thumbnail(name, size):
1413 thumbnails.append({
1414 'id': name,
1415 'url': update_url_query(media_url, {'name': name}),
1416 'width': int_or_none(size.get('w') or size.get('width')),
1417 'height': int_or_none(size.get('h') or size.get('height')),
1418 })
1419 for name, size in media.get('sizes', {}).items():
1420 add_thumbnail(name, size)
1421 add_thumbnail('orig', media.get('original_info') or {})
1422
1423 return {
1424 'id': media_id,
1425 'formats': formats,
1426 'subtitles': subtitles,
1427 'thumbnails': thumbnails,
1428 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
1429 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1430 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1431 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1432 }
1433
1434 def extract_from_card_info(card):
1435 if not card:
1436 return
1437
1438 self.write_debug(f'Extracting from card info: {card.get("url")}')
1439 binding_values = card['binding_values']
1440
1441 def get_binding_value(k):
1442 o = binding_values.get(k) or {}
1443 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1444
1445 card_name = card['name'].split(':')[-1]
1446 if card_name == 'player':
1447 yield {
1448 '_type': 'url',
1449 'url': get_binding_value('player_url'),
1450 }
1451 elif card_name == 'periscope_broadcast':
1452 yield {
1453 '_type': 'url',
1454 'url': get_binding_value('url') or get_binding_value('player_url'),
1455 'ie_key': PeriscopeIE.ie_key(),
1456 }
1457 elif card_name == 'broadcast':
1458 yield {
1459 '_type': 'url',
1460 'url': get_binding_value('broadcast_url'),
1461 'ie_key': TwitterBroadcastIE.ie_key(),
1462 }
1463 elif card_name == 'audiospace':
1464 yield {
1465 '_type': 'url',
1466 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1467 'ie_key': TwitterSpacesIE.ie_key(),
1468 }
1469 elif card_name == 'summary':
1470 yield {
1471 '_type': 'url',
1472 'url': get_binding_value('card_url'),
1473 }
1474 elif card_name == 'unified_card':
1475 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1476 yield from map(extract_from_video_info, traverse_obj(
1477 unified_card, ('media_entities', ...), expected_type=dict))
1478 # amplify, promo_video_website, promo_video_convo, appplayer,
1479 # video_direct_message, poll2choice_video, poll3choice_video,
1480 # poll4choice_video, ...
1481 else:
1482 is_amplify = card_name == 'amplify'
1483 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1484 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1485 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1486
1487 thumbnails = []
1488 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1489 image = get_binding_value('player_image' + suffix) or {}
1490 image_url = image.get('url')
1491 if not image_url or '/player-placeholder' in image_url:
1492 continue
1493 thumbnails.append({
1494 'id': suffix[1:] if suffix else 'medium',
1495 'url': image_url,
1496 'width': int_or_none(image.get('width')),
1497 'height': int_or_none(image.get('height')),
1498 })
1499
1500 yield {
1501 'formats': formats,
1502 'subtitles': subtitles,
1503 'thumbnails': thumbnails,
1504 'duration': int_or_none(get_binding_value(
1505 'content_duration_seconds')),
1506 }
1507
1508 videos = traverse_obj(status, (
1509 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1510
1511 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1512 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1513 else:
1514 desired_obj = traverse_obj(status, (
1515 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1516 if not desired_obj:
1517 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1518 elif desired_obj.get('type') != 'video':
1519 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1520
1521 # Restore original archive id and video index in title
1522 for index, entry in enumerate(videos, 1):
1523 if entry.get('id') != desired_obj.get('id'):
1524 continue
1525 if index == 1:
1526 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1527 if len(videos) != 1:
1528 info['title'] += f' #{index}'
1529 break
1530
1531 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1532
1533 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1534 if not entries:
1535 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1536 if not expanded_url or expanded_url == url:
1537 self.raise_no_formats('No video could be found in this tweet', expected=True)
1538 return info
1539
1540 return self.url_result(expanded_url, display_id=twid, **info)
1541
1542 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1543
1544 if len(entries) == 1:
1545 return entries[0]
1546
1547 for index, entry in enumerate(entries, 1):
1548 entry['title'] += f' #{index}'
1549
1550 return self.playlist_result(entries, **info)
1551
1552
1553 class TwitterAmplifyIE(TwitterBaseIE):
1554 IE_NAME = 'twitter:amplify'
1555 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1556
1557 _TEST = {
1558 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1559 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1560 'info_dict': {
1561 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1562 'ext': 'mp4',
1563 'title': 'Twitter Video',
1564 'thumbnail': 're:^https?://.*',
1565 },
1566 'params': {'format': '[protocol=https]'},
1567 }
1568
1569 def _real_extract(self, url):
1570 video_id = self._match_id(url)
1571 webpage = self._download_webpage(url, video_id)
1572
1573 vmap_url = self._html_search_meta(
1574 'twitter:amplify:vmap', webpage, 'vmap url')
1575 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1576
1577 thumbnails = []
1578 thumbnail = self._html_search_meta(
1579 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1580
1581 def _find_dimension(target):
1582 w = int_or_none(self._html_search_meta(
1583 'twitter:%s:width' % target, webpage, fatal=False))
1584 h = int_or_none(self._html_search_meta(
1585 'twitter:%s:height' % target, webpage, fatal=False))
1586 return w, h
1587
1588 if thumbnail:
1589 thumbnail_w, thumbnail_h = _find_dimension('image')
1590 thumbnails.append({
1591 'url': thumbnail,
1592 'width': thumbnail_w,
1593 'height': thumbnail_h,
1594 })
1595
1596 video_w, video_h = _find_dimension('player')
1597 formats[0].update({
1598 'width': video_w,
1599 'height': video_h,
1600 })
1601
1602 return {
1603 'id': video_id,
1604 'title': 'Twitter Video',
1605 'formats': formats,
1606 'thumbnails': thumbnails,
1607 }
1608
1609
1610 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1611 IE_NAME = 'twitter:broadcast'
1612 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1613
1614 _TESTS = [{
1615 # untitled Periscope video
1616 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1617 'info_dict': {
1618 'id': '1yNGaQLWpejGj',
1619 'ext': 'mp4',
1620 'title': 'Andrea May Sahouri - Periscope Broadcast',
1621 'uploader': 'Andrea May Sahouri',
1622 'uploader_id': 'andreamsahouri',
1623 'uploader_url': 'https://twitter.com/andreamsahouri',
1624 'timestamp': 1590973638,
1625 'upload_date': '20200601',
1626 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1627 'view_count': int,
1628 },
1629 }, {
1630 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1631 'info_dict': {
1632 'id': '1ZkKzeyrPbaxv',
1633 'ext': 'mp4',
1634 'title': 'Starship | SN10 | High-Altitude Flight Test',
1635 'uploader': 'SpaceX',
1636 'uploader_id': 'SpaceX',
1637 'uploader_url': 'https://twitter.com/SpaceX',
1638 'timestamp': 1614812942,
1639 'upload_date': '20210303',
1640 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1641 'view_count': int,
1642 },
1643 }, {
1644 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1645 'info_dict': {
1646 'id': '1OyKAVQrgzwGb',
1647 'ext': 'mp4',
1648 'title': 'Starship Flight Test',
1649 'uploader': 'SpaceX',
1650 'uploader_id': 'SpaceX',
1651 'uploader_url': 'https://twitter.com/SpaceX',
1652 'timestamp': 1681993964,
1653 'upload_date': '20230420',
1654 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1655 'view_count': int,
1656 },
1657 }]
1658
1659 def _real_extract(self, url):
1660 broadcast_id = self._match_id(url)
1661 broadcast = self._call_api(
1662 'broadcasts/show.json', broadcast_id,
1663 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1664 if not broadcast:
1665 raise ExtractorError('Broadcast no longer exists', expected=True)
1666 info = self._parse_broadcast_data(broadcast, broadcast_id)
1667 info['title'] = broadcast.get('status') or info.get('title')
1668 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1669 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1670 if info['live_status'] == 'is_upcoming':
1671 return info
1672
1673 media_key = broadcast['media_key']
1674 source = self._call_api(
1675 f'live_video_stream/status/{media_key}', media_key)['source']
1676 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1677 if '/live_video_stream/geoblocked/' in m3u8_url:
1678 self.raise_geo_restricted()
1679 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1680 m3u8_url).query).get('type', [None])[0]
1681 state, width, height = self._extract_common_format_info(broadcast)
1682 info['formats'] = self._extract_pscp_m3u8_formats(
1683 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1684 return info
1685
1686
1687 class TwitterSpacesIE(TwitterBaseIE):
1688 IE_NAME = 'twitter:spaces'
1689 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1690
1691 _TESTS = [{
1692 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1693 'info_dict': {
1694 'id': '1RDxlgyvNXzJL',
1695 'ext': 'm4a',
1696 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1697 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1698 'uploader': r're:Lucio Di Gaetano.*?',
1699 'uploader_id': 'luciodigaetano',
1700 'live_status': 'was_live',
1701 'timestamp': 1659877956,
1702 'upload_date': '20220807',
1703 'release_timestamp': 1659904215,
1704 'release_date': '20220807',
1705 },
1706 'params': {'skip_download': 'm3u8'},
1707 }, {
1708 # post_live/TimedOut but downloadable
1709 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1710 'info_dict': {
1711 'id': '1vAxRAVQWONJl',
1712 'ext': 'm4a',
1713 'title': 'Framing Up FinOps: Billing Tools',
1714 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1715 'uploader': 'Google Cloud',
1716 'uploader_id': 'googlecloud',
1717 'live_status': 'post_live',
1718 'timestamp': 1681409554,
1719 'upload_date': '20230413',
1720 'release_timestamp': 1681839000,
1721 'release_date': '20230418',
1722 },
1723 'params': {'skip_download': 'm3u8'},
1724 }, {
1725 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1726 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1727 'info_dict': {
1728 'id': '1eaKbrQbjoRKX',
1729 'ext': 'm4a',
1730 'title': 'あ',
1731 'description': 'Twitter Space participated by nobody yet',
1732 'uploader': '息根とめる🔪Twitchで復活',
1733 'uploader_id': 'tomeru_ikinone',
1734 'live_status': 'was_live',
1735 'timestamp': 1685617198,
1736 'upload_date': '20230601',
1737 },
1738 'params': {'skip_download': 'm3u8'},
1739 }]
1740
1741 SPACE_STATUS = {
1742 'notstarted': 'is_upcoming',
1743 'ended': 'was_live',
1744 'running': 'is_live',
1745 'timedout': 'post_live',
1746 }
1747
1748 def _build_graphql_query(self, space_id):
1749 return {
1750 'variables': {
1751 'id': space_id,
1752 'isMetatagsQuery': True,
1753 'withDownvotePerspective': False,
1754 'withReactionsMetadata': False,
1755 'withReactionsPerspective': False,
1756 'withReplays': True,
1757 'withSuperFollowsUserFields': True,
1758 'withSuperFollowsTweetFields': True,
1759 },
1760 'features': {
1761 'dont_mention_me_view_api_enabled': True,
1762 'interactive_text_enabled': True,
1763 'responsive_web_edit_tweet_api_enabled': True,
1764 'responsive_web_enhance_cards_enabled': True,
1765 'responsive_web_uc_gql_enabled': True,
1766 'spaces_2022_h2_clipping': True,
1767 'spaces_2022_h2_spaces_communities': False,
1768 'standardized_nudges_misinfo': True,
1769 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1770 'vibe_api_enabled': True,
1771 },
1772 }
1773
1774 def _real_extract(self, url):
1775 space_id = self._match_id(url)
1776 if not self.is_logged_in:
1777 self.raise_login_required('Twitter Spaces require authentication')
1778 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1779 if not space_data:
1780 raise ExtractorError('Twitter Space not found', expected=True)
1781
1782 metadata = space_data['metadata']
1783 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1784 is_live = live_status == 'is_live'
1785
1786 formats = []
1787 headers = {'Referer': 'https://twitter.com/'}
1788 if live_status == 'is_upcoming':
1789 self.raise_no_formats('Twitter Space not started yet', expected=True)
1790 elif not is_live and not metadata.get('is_space_available_for_replay'):
1791 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1792 elif metadata.get('media_key'):
1793 source = traverse_obj(
1794 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1795 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1796 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1797 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
1798 headers=headers, fatal=False) if source else []
1799 for fmt in formats:
1800 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1801 if not is_live:
1802 fmt['container'] = 'm4a_dash'
1803
1804 participants = ', '.join(traverse_obj(
1805 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1806
1807 if not formats and live_status == 'post_live':
1808 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1809
1810 return {
1811 'id': space_id,
1812 'title': metadata.get('title'),
1813 'description': f'Twitter Space participated by {participants}',
1814 'uploader': traverse_obj(
1815 metadata, ('creator_results', 'result', 'legacy', 'name')),
1816 'uploader_id': traverse_obj(
1817 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1818 'live_status': live_status,
1819 'release_timestamp': try_call(
1820 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1821 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1822 'formats': formats,
1823 'http_headers': headers,
1824 }
1825
1826
1827 class TwitterShortenerIE(TwitterBaseIE):
1828 IE_NAME = 'twitter:shortener'
1829 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1830 _BASE_URL = 'https://t.co/'
1831
1832 def _real_extract(self, url):
1833 mobj = self._match_valid_url(url)
1834 eid, id = mobj.group('eid', 'id')
1835 if eid:
1836 id = eid
1837 url = self._BASE_URL + id
1838 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
1839 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1840 if new_url.startswith(__UNSAFE_LINK):
1841 new_url = new_url.replace(__UNSAFE_LINK, "")
1842 return self.url_result(new_url)