]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
932b478d44bf0243afcc6ba9e39162b7fe22ede3
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import json
2 import random
3 import re
4
5 from .common import InfoExtractor
6 from .periscope import PeriscopeBaseIE, PeriscopeIE
7 from ..compat import functools # isort: split
8 from ..compat import (
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12 )
13 from ..utils import (
14 ExtractorError,
15 dict_get,
16 filter_dict,
17 float_or_none,
18 format_field,
19 int_or_none,
20 make_archive_id,
21 remove_end,
22 str_or_none,
23 strip_or_none,
24 traverse_obj,
25 try_call,
26 try_get,
27 unified_timestamp,
28 update_url_query,
29 url_or_none,
30 xpath_text,
31 )
32
33
34 class TwitterBaseIE(InfoExtractor):
35 _NETRC_MACHINE = 'twitter'
36 _API_BASE = 'https://api.twitter.com/1.1/'
37 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
38 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
39 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
40 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
41 _flow_token = None
42
43 _LOGIN_INIT_DATA = json.dumps({
44 'input_flow_data': {
45 'flow_context': {
46 'debug_overrides': {},
47 'start_location': {
48 'location': 'unknown'
49 }
50 }
51 },
52 'subtask_versions': {
53 'action_list': 2,
54 'alert_dialog': 1,
55 'app_download_cta': 1,
56 'check_logged_in_account': 1,
57 'choice_selection': 3,
58 'contacts_live_sync_permission_prompt': 0,
59 'cta': 7,
60 'email_verification': 2,
61 'end_flow': 1,
62 'enter_date': 1,
63 'enter_email': 2,
64 'enter_password': 5,
65 'enter_phone': 2,
66 'enter_recaptcha': 1,
67 'enter_text': 5,
68 'enter_username': 2,
69 'generic_urt': 3,
70 'in_app_notification': 1,
71 'interest_picker': 3,
72 'js_instrumentation': 1,
73 'menu_dialog': 1,
74 'notifications_permission_prompt': 2,
75 'open_account': 2,
76 'open_home_timeline': 1,
77 'open_link': 1,
78 'phone_verification': 4,
79 'privacy_options': 1,
80 'security_key': 3,
81 'select_avatar': 4,
82 'select_banner': 2,
83 'settings_list': 7,
84 'show_code': 1,
85 'sign_up': 2,
86 'sign_up_review': 4,
87 'tweet_selection_urt': 1,
88 'update_users': 1,
89 'upload_media': 1,
90 'user_recommendations_list': 4,
91 'user_recommendations_urt': 1,
92 'wait_spinner': 3,
93 'web_modal': 1
94 }
95 }, separators=(',', ':')).encode()
96
97 def _extract_variant_formats(self, variant, video_id):
98 variant_url = variant.get('url')
99 if not variant_url:
100 return [], {}
101 elif '.m3u8' in variant_url:
102 return self._extract_m3u8_formats_and_subtitles(
103 variant_url, video_id, 'mp4', 'm3u8_native',
104 m3u8_id='hls', fatal=False)
105 else:
106 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
107 f = {
108 'url': variant_url,
109 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
110 'tbr': tbr,
111 }
112 self._search_dimensions_in_video_url(f, variant_url)
113 return [f], {}
114
115 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
116 vmap_url = url_or_none(vmap_url)
117 if not vmap_url:
118 return [], {}
119 vmap_data = self._download_xml(vmap_url, video_id)
120 formats = []
121 subtitles = {}
122 urls = []
123 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
124 video_variant.attrib['url'] = compat_urllib_parse_unquote(
125 video_variant.attrib['url'])
126 urls.append(video_variant.attrib['url'])
127 fmts, subs = self._extract_variant_formats(
128 video_variant.attrib, video_id)
129 formats.extend(fmts)
130 subtitles = self._merge_subtitles(subtitles, subs)
131 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
132 if video_url not in urls:
133 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
134 formats.extend(fmts)
135 subtitles = self._merge_subtitles(subtitles, subs)
136 return formats, subtitles
137
138 @staticmethod
139 def _search_dimensions_in_video_url(a_format, video_url):
140 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
141 if m:
142 a_format.update({
143 'width': int(m.group('width')),
144 'height': int(m.group('height')),
145 })
146
147 @property
148 def is_logged_in(self):
149 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
150
151 @functools.cached_property
152 def _selected_api(self):
153 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
154
155 def _fetch_guest_token(self, display_id):
156 guest_token = traverse_obj(self._download_json(
157 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
158 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
159 ('guest_token', {str}))
160 if not guest_token:
161 raise ExtractorError('Could not retrieve guest token')
162 return guest_token
163
164 def _set_base_headers(self, legacy=False):
165 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
166 return filter_dict({
167 'Authorization': f'Bearer {bearer_token}',
168 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
169 })
170
171 def _call_login_api(self, note, headers, query={}, data=None):
172 response = self._download_json(
173 f'{self._API_BASE}onboarding/task.json', None, note,
174 headers=headers, query=query, data=data, expected_status=400)
175 error = traverse_obj(response, ('errors', 0, 'message', {str}))
176 if error:
177 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
178 elif traverse_obj(response, 'status') != 'success':
179 raise ExtractorError('Login was unsuccessful')
180
181 subtask = traverse_obj(
182 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
183 if not subtask:
184 raise ExtractorError('Twitter API did not return next login subtask')
185
186 self._flow_token = response['flow_token']
187
188 return subtask
189
190 def _perform_login(self, username, password):
191 if self.is_logged_in:
192 return
193
194 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
195 guest_token = self._search_regex(
196 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
197 headers = {
198 **self._set_base_headers(),
199 'content-type': 'application/json',
200 'x-guest-token': guest_token,
201 'x-twitter-client-language': 'en',
202 'x-twitter-active-user': 'yes',
203 'Referer': 'https://twitter.com/',
204 'Origin': 'https://twitter.com',
205 }
206
207 def build_login_json(*subtask_inputs):
208 return json.dumps({
209 'flow_token': self._flow_token,
210 'subtask_inputs': subtask_inputs
211 }, separators=(',', ':')).encode()
212
213 def input_dict(subtask_id, text):
214 return {
215 'subtask_id': subtask_id,
216 'enter_text': {
217 'text': text,
218 'link': 'next_link'
219 }
220 }
221
222 next_subtask = self._call_login_api(
223 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
224
225 while not self.is_logged_in:
226 if next_subtask == 'LoginJsInstrumentationSubtask':
227 next_subtask = self._call_login_api(
228 'Submitting JS instrumentation response', headers, data=build_login_json({
229 'subtask_id': next_subtask,
230 'js_instrumentation': {
231 'response': '{}',
232 'link': 'next_link'
233 }
234 }))
235
236 elif next_subtask == 'LoginEnterUserIdentifierSSO':
237 next_subtask = self._call_login_api(
238 'Submitting username', headers, data=build_login_json({
239 'subtask_id': next_subtask,
240 'settings_list': {
241 'setting_responses': [{
242 'key': 'user_identifier',
243 'response_data': {
244 'text_data': {
245 'result': username
246 }
247 }
248 }],
249 'link': 'next_link'
250 }
251 }))
252
253 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
254 next_subtask = self._call_login_api(
255 'Submitting alternate identifier', headers,
256 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
257 'one of username, phone number or email that was not used as --username'))))
258
259 elif next_subtask == 'LoginEnterPassword':
260 next_subtask = self._call_login_api(
261 'Submitting password', headers, data=build_login_json({
262 'subtask_id': next_subtask,
263 'enter_password': {
264 'password': password,
265 'link': 'next_link'
266 }
267 }))
268
269 elif next_subtask == 'AccountDuplicationCheck':
270 next_subtask = self._call_login_api(
271 'Submitting account duplication check', headers, data=build_login_json({
272 'subtask_id': next_subtask,
273 'check_logged_in_account': {
274 'link': 'AccountDuplicationCheck_false'
275 }
276 }))
277
278 elif next_subtask == 'LoginTwoFactorAuthChallenge':
279 next_subtask = self._call_login_api(
280 'Submitting 2FA token', headers, data=build_login_json(input_dict(
281 next_subtask, self._get_tfa_info('two-factor authentication token'))))
282
283 elif next_subtask == 'LoginAcid':
284 next_subtask = self._call_login_api(
285 'Submitting confirmation code', headers, data=build_login_json(input_dict(
286 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
287
288 elif next_subtask == 'ArkoseLogin':
289 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
290
291 elif next_subtask == 'DenyLoginSubtask':
292 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
293
294 elif next_subtask == 'LoginSuccessSubtask':
295 raise ExtractorError('Twitter API did not grant auth token cookie')
296
297 else:
298 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
299
300 self.report_login()
301
302 def _call_api(self, path, video_id, query={}, graphql=False):
303 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
304 headers.update({
305 'x-twitter-auth-type': 'OAuth2Session',
306 'x-twitter-client-language': 'en',
307 'x-twitter-active-user': 'yes',
308 } if self.is_logged_in else {
309 'x-guest-token': self._fetch_guest_token(video_id)
310 })
311 allowed_status = {400, 401, 403, 404} if graphql else {403}
312 result = self._download_json(
313 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
314 video_id, headers=headers, query=query, expected_status=allowed_status,
315 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
316
317 if result.get('errors'):
318 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
319 if errors and 'not authorized' in errors:
320 self.raise_login_required(remove_end(errors, '.'))
321 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
322
323 return result
324
325 def _build_graphql_query(self, media_id):
326 raise NotImplementedError('Method must be implemented to support GraphQL')
327
328 def _call_graphql_api(self, endpoint, media_id):
329 data = self._build_graphql_query(media_id)
330 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
331 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
332
333
334 class TwitterCardIE(InfoExtractor):
335 IE_NAME = 'twitter:card'
336 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
337 _TESTS = [
338 {
339 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
340 # MD5 checksums are different in different places
341 'info_dict': {
342 'id': '560070131976392705',
343 'ext': 'mp4',
344 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
345 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
346 'uploader': 'Twitter',
347 'uploader_id': 'Twitter',
348 'thumbnail': r're:^https?://.*\.jpg',
349 'duration': 30.033,
350 'timestamp': 1422366112,
351 'upload_date': '20150127',
352 'age_limit': 0,
353 'comment_count': int,
354 'tags': [],
355 'repost_count': int,
356 'like_count': int,
357 'display_id': '560070183650213889',
358 'uploader_url': 'https://twitter.com/Twitter',
359 },
360 },
361 {
362 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
363 'md5': '7137eca597f72b9abbe61e5ae0161399',
364 'info_dict': {
365 'id': '623160978427936768',
366 'ext': 'mp4',
367 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
368 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
369 'uploader': 'NASA',
370 'uploader_id': 'NASA',
371 'timestamp': 1437408129,
372 'upload_date': '20150720',
373 'uploader_url': 'https://twitter.com/NASA',
374 'age_limit': 0,
375 'comment_count': int,
376 'like_count': int,
377 'repost_count': int,
378 'tags': ['PlutoFlyby'],
379 },
380 'params': {'format': '[protocol=https]'}
381 },
382 {
383 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
384 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
385 'info_dict': {
386 'id': 'dq4Oj5quskI',
387 'ext': 'mp4',
388 'title': 'Ubuntu 11.10 Overview',
389 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
390 'upload_date': '20111013',
391 'uploader': 'OMG! UBUNTU!',
392 'uploader_id': 'omgubuntu',
393 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
394 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
395 'channel_follower_count': int,
396 'chapters': 'count:8',
397 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
398 'duration': 138,
399 'categories': ['Film & Animation'],
400 'age_limit': 0,
401 'comment_count': int,
402 'availability': 'public',
403 'like_count': int,
404 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
405 'view_count': int,
406 'tags': 'count:12',
407 'channel': 'OMG! UBUNTU!',
408 'playable_in_embed': True,
409 },
410 'add_ie': ['Youtube'],
411 },
412 {
413 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
414 'info_dict': {
415 'id': 'iBb2x00UVlv',
416 'ext': 'mp4',
417 'upload_date': '20151113',
418 'uploader_id': '1189339351084113920',
419 'uploader': 'ArsenalTerje',
420 'title': 'Vine by ArsenalTerje',
421 'timestamp': 1447451307,
422 'alt_title': 'Vine by ArsenalTerje',
423 'comment_count': int,
424 'like_count': int,
425 'thumbnail': r're:^https?://[^?#]+\.jpg',
426 'view_count': int,
427 'repost_count': int,
428 },
429 'add_ie': ['Vine'],
430 'params': {'skip_download': 'm3u8'},
431 },
432 {
433 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
434 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
435 'info_dict': {
436 'id': '705235433198714880',
437 'ext': 'mp4',
438 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
439 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
440 'uploader': 'Brent Yarina',
441 'uploader_id': 'BTNBrentYarina',
442 'timestamp': 1456976204,
443 'upload_date': '20160303',
444 },
445 'skip': 'This content is no longer available.',
446 },
447 {
448 'url': 'https://twitter.com/i/videos/752274308186120192',
449 'only_matching': True,
450 },
451 ]
452
453 def _real_extract(self, url):
454 status_id = self._match_id(url)
455 return self.url_result(
456 'https://twitter.com/statuses/' + status_id,
457 TwitterIE.ie_key(), status_id)
458
459
460 class TwitterIE(TwitterBaseIE):
461 IE_NAME = 'twitter'
462 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
463
464 _TESTS = [{
465 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
466 'info_dict': {
467 'id': '643211870443208704',
468 'display_id': '643211948184596480',
469 'ext': 'mp4',
470 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
471 'thumbnail': r're:^https?://.*\.jpg',
472 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
473 'uploader': 'FREE THE NIPPLE',
474 'uploader_id': 'freethenipple',
475 'duration': 12.922,
476 'timestamp': 1442188653,
477 'upload_date': '20150913',
478 'uploader_url': 'https://twitter.com/freethenipple',
479 'comment_count': int,
480 'repost_count': int,
481 'like_count': int,
482 'tags': [],
483 'age_limit': 18,
484 '_old_archive_ids': ['twitter 643211948184596480'],
485 },
486 }, {
487 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
488 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
489 'info_dict': {
490 'id': '657991469417025536',
491 'ext': 'mp4',
492 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
493 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
494 'thumbnail': r're:^https?://.*\.png',
495 'uploader': 'Gifs',
496 'uploader_id': 'giphz',
497 },
498 'expected_warnings': ['height', 'width'],
499 'skip': 'Account suspended',
500 }, {
501 'url': 'https://twitter.com/starwars/status/665052190608723968',
502 'info_dict': {
503 'id': '665052190608723968',
504 'display_id': '665052190608723968',
505 'ext': 'mp4',
506 'title': r're:Star Wars.*A new beginning is coming December 18.*',
507 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
508 'uploader_id': 'starwars',
509 'uploader': r're:Star Wars.*',
510 'timestamp': 1447395772,
511 'upload_date': '20151113',
512 'uploader_url': 'https://twitter.com/starwars',
513 'comment_count': int,
514 'repost_count': int,
515 'like_count': int,
516 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
517 'age_limit': 0,
518 '_old_archive_ids': ['twitter 665052190608723968'],
519 },
520 }, {
521 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
522 'info_dict': {
523 'id': '705235433198714880',
524 'ext': 'mp4',
525 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
526 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
527 'uploader_id': 'BTNBrentYarina',
528 'uploader': 'Brent Yarina',
529 'timestamp': 1456976204,
530 'upload_date': '20160303',
531 'uploader_url': 'https://twitter.com/BTNBrentYarina',
532 'comment_count': int,
533 'repost_count': int,
534 'like_count': int,
535 'tags': [],
536 'age_limit': 0,
537 },
538 'params': {
539 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
540 # Test case of TwitterCardIE
541 'skip_download': True,
542 },
543 'skip': 'Dead external link',
544 }, {
545 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
546 'info_dict': {
547 'id': '700207414000242688',
548 'display_id': '700207533655363584',
549 'ext': 'mp4',
550 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
551 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
552 'thumbnail': r're:^https?://.*\.jpg',
553 'uploader': 'jaydin donte geer',
554 'uploader_id': 'jaydingeer',
555 'duration': 30.0,
556 'timestamp': 1455777459,
557 'upload_date': '20160218',
558 'uploader_url': 'https://twitter.com/jaydingeer',
559 'comment_count': int,
560 'repost_count': int,
561 'like_count': int,
562 'tags': ['Damndaniel'],
563 'age_limit': 0,
564 '_old_archive_ids': ['twitter 700207533655363584'],
565 },
566 }, {
567 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
568 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
569 'info_dict': {
570 'id': 'MIOxnrUteUd',
571 'ext': 'mp4',
572 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
573 'uploader': 'TAKUMA',
574 'uploader_id': '1004126642786242560',
575 'timestamp': 1402826626,
576 'upload_date': '20140615',
577 'thumbnail': r're:^https?://.*\.jpg',
578 'alt_title': 'Vine by TAKUMA',
579 'comment_count': int,
580 'repost_count': int,
581 'like_count': int,
582 'view_count': int,
583 },
584 'add_ie': ['Vine'],
585 }, {
586 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
587 'info_dict': {
588 'id': '717462543795523584',
589 'display_id': '719944021058060289',
590 'ext': 'mp4',
591 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
592 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
593 'uploader_id': 'CaptainAmerica',
594 'uploader': 'Captain America',
595 'duration': 3.17,
596 'timestamp': 1460483005,
597 'upload_date': '20160412',
598 'uploader_url': 'https://twitter.com/CaptainAmerica',
599 'thumbnail': r're:^https?://.*\.jpg',
600 'comment_count': int,
601 'repost_count': int,
602 'like_count': int,
603 'tags': [],
604 'age_limit': 0,
605 '_old_archive_ids': ['twitter 719944021058060289'],
606 },
607 }, {
608 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
609 'info_dict': {
610 'id': '1zqKVVlkqLaKB',
611 'ext': 'mp4',
612 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
613 'upload_date': '20160923',
614 'uploader_id': '1PmKqpJdOJQoY',
615 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
616 'timestamp': 1474613214,
617 'thumbnail': r're:^https?://.*\.jpg',
618 },
619 'add_ie': ['Periscope'],
620 'skip': 'Broadcast not found',
621 }, {
622 # has mp4 formats via mobile API
623 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
624 'info_dict': {
625 'id': '852077943283097602',
626 'ext': 'mp4',
627 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
628 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
629 'uploader': 'عالم الأخبار',
630 'uploader_id': 'news_al3alm',
631 'duration': 277.4,
632 'timestamp': 1492000653,
633 'upload_date': '20170412',
634 'display_id': '852138619213144067',
635 'age_limit': 0,
636 'uploader_url': 'https://twitter.com/news_al3alm',
637 'thumbnail': r're:^https?://.*\.jpg',
638 'tags': [],
639 'repost_count': int,
640 'like_count': int,
641 'comment_count': int,
642 '_old_archive_ids': ['twitter 852138619213144067'],
643 },
644 }, {
645 'url': 'https://twitter.com/i/web/status/910031516746514432',
646 'info_dict': {
647 'id': '910030238373089285',
648 'display_id': '910031516746514432',
649 'ext': 'mp4',
650 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
651 'thumbnail': r're:^https?://.*\.jpg',
652 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
653 'uploader': 'Préfet de Guadeloupe',
654 'uploader_id': 'Prefet971',
655 'duration': 47.48,
656 'timestamp': 1505803395,
657 'upload_date': '20170919',
658 'uploader_url': 'https://twitter.com/Prefet971',
659 'comment_count': int,
660 'repost_count': int,
661 'like_count': int,
662 'tags': ['Maria'],
663 'age_limit': 0,
664 '_old_archive_ids': ['twitter 910031516746514432'],
665 },
666 'params': {
667 'skip_download': True, # requires ffmpeg
668 },
669 }, {
670 # card via api.twitter.com/1.1/videos/tweet/config
671 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
672 'info_dict': {
673 'id': '1001551417340022785',
674 'display_id': '1001551623938805763',
675 'ext': 'mp4',
676 'title': 're:.*?Shep is on a roll today.*?',
677 'thumbnail': r're:^https?://.*\.jpg',
678 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
679 'uploader': 'Lis Power',
680 'uploader_id': 'LisPower1',
681 'duration': 111.278,
682 'timestamp': 1527623489,
683 'upload_date': '20180529',
684 'uploader_url': 'https://twitter.com/LisPower1',
685 'comment_count': int,
686 'repost_count': int,
687 'like_count': int,
688 'tags': [],
689 'age_limit': 0,
690 '_old_archive_ids': ['twitter 1001551623938805763'],
691 },
692 'params': {
693 'skip_download': True, # requires ffmpeg
694 },
695 }, {
696 'url': 'https://twitter.com/foobar/status/1087791357756956680',
697 'info_dict': {
698 'id': '1087791272830607360',
699 'display_id': '1087791357756956680',
700 'ext': 'mp4',
701 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
702 'thumbnail': r're:^https?://.*\.jpg',
703 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
704 'uploader': 'X',
705 'uploader_id': 'X',
706 'duration': 61.567,
707 'timestamp': 1548184644,
708 'upload_date': '20190122',
709 'uploader_url': 'https://twitter.com/X',
710 'comment_count': int,
711 'repost_count': int,
712 'like_count': int,
713 'view_count': int,
714 'tags': [],
715 'age_limit': 0,
716 },
717 'skip': 'This Tweet is unavailable',
718 }, {
719 # not available in Periscope
720 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
721 'info_dict': {
722 'id': '1vOGwqejwoWxB',
723 'ext': 'mp4',
724 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
725 'uploader': 'Vivi',
726 'uploader_id': '1eVjYOLGkGrQL',
727 'thumbnail': r're:^https?://.*\.jpg',
728 'tags': ['EduTECH2019'],
729 'view_count': int,
730 },
731 'add_ie': ['TwitterBroadcast'],
732 'skip': 'Broadcast no longer exists',
733 }, {
734 # unified card
735 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
736 'info_dict': {
737 'id': '1349774757969989634',
738 'display_id': '1349794411333394432',
739 'ext': 'mp4',
740 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
741 'thumbnail': r're:^https?://.*\.jpg',
742 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
743 'uploader': 'Brooklyn Nets',
744 'uploader_id': 'BrooklynNets',
745 'duration': 324.484,
746 'timestamp': 1610651040,
747 'upload_date': '20210114',
748 'uploader_url': 'https://twitter.com/BrooklynNets',
749 'comment_count': int,
750 'repost_count': int,
751 'like_count': int,
752 'tags': [],
753 'age_limit': 0,
754 '_old_archive_ids': ['twitter 1349794411333394432'],
755 },
756 'params': {
757 'skip_download': True,
758 },
759 }, {
760 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
761 'info_dict': {
762 'id': '1577855447914409984',
763 'display_id': '1577855540407197696',
764 'ext': 'mp4',
765 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
766 'description': 'md5:b9c3699335447391d11753ab21c70a74',
767 'upload_date': '20221006',
768 'uploader': 'oshtru',
769 'uploader_id': 'oshtru',
770 'uploader_url': 'https://twitter.com/oshtru',
771 'thumbnail': r're:^https?://.*\.jpg',
772 'duration': 30.03,
773 'timestamp': 1665025050,
774 'comment_count': int,
775 'repost_count': int,
776 'like_count': int,
777 'tags': [],
778 'age_limit': 0,
779 '_old_archive_ids': ['twitter 1577855540407197696'],
780 },
781 'params': {'skip_download': True},
782 }, {
783 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
784 'info_dict': {
785 'id': '1577719286659006464',
786 'title': 'Ultima - Test',
787 'description': 'Test https://t.co/Y3KEZD7Dad',
788 'uploader': 'Ultima',
789 'uploader_id': 'UltimaShadowX',
790 'uploader_url': 'https://twitter.com/UltimaShadowX',
791 'upload_date': '20221005',
792 'timestamp': 1664992565,
793 'comment_count': int,
794 'repost_count': int,
795 'like_count': int,
796 'tags': [],
797 'age_limit': 0,
798 },
799 'playlist_count': 4,
800 'params': {'skip_download': True},
801 }, {
802 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
803 'info_dict': {
804 'id': '1575559336759263233',
805 'display_id': '1575560063510810624',
806 'ext': 'mp4',
807 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
808 'thumbnail': r're:^https?://.*\.jpg',
809 'description': 'md5:95aea692fda36a12081b9629b02daa92',
810 'uploader': 'Max Olson',
811 'uploader_id': 'MesoMax919',
812 'uploader_url': 'https://twitter.com/MesoMax919',
813 'duration': 21.321,
814 'timestamp': 1664477766,
815 'upload_date': '20220929',
816 'comment_count': int,
817 'repost_count': int,
818 'like_count': int,
819 'tags': ['HurricaneIan'],
820 'age_limit': 0,
821 '_old_archive_ids': ['twitter 1575560063510810624'],
822 },
823 }, {
824 # Adult content, fails if not logged in
825 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
826 'info_dict': {
827 'id': '1575199163847000068',
828 'display_id': '1575199173472927762',
829 'ext': 'mp4',
830 'title': str,
831 'description': str,
832 'uploader': str,
833 'uploader_id': 'Rizdraws',
834 'uploader_url': 'https://twitter.com/Rizdraws',
835 'upload_date': '20220928',
836 'timestamp': 1664391723,
837 'thumbnail': r're:^https?://.+\.jpg',
838 'like_count': int,
839 'repost_count': int,
840 'comment_count': int,
841 'age_limit': 18,
842 'tags': []
843 },
844 'params': {'skip_download': 'The media could not be played'},
845 'skip': 'Requires authentication',
846 }, {
847 # Playlist result only with graphql API
848 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
849 'playlist_mincount': 2,
850 'info_dict': {
851 'id': '1395079556562706435',
852 'title': str,
853 'tags': [],
854 'uploader': str,
855 'like_count': int,
856 'upload_date': '20210519',
857 'age_limit': 0,
858 'repost_count': int,
859 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
860 'uploader_id': 'Srirachachau',
861 'comment_count': int,
862 'uploader_url': 'https://twitter.com/Srirachachau',
863 'timestamp': 1621447860,
864 },
865 }, {
866 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
867 'playlist_mincount': 2,
868 'info_dict': {
869 'id': '1578353380363501568',
870 'title': str,
871 'uploader_id': 'DavidToons_',
872 'repost_count': int,
873 'like_count': int,
874 'uploader': str,
875 'timestamp': 1665143744,
876 'uploader_url': 'https://twitter.com/DavidToons_',
877 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
878 'tags': [],
879 'comment_count': int,
880 'upload_date': '20221007',
881 'age_limit': 0,
882 },
883 }, {
884 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
885 'playlist_count': 2,
886 'info_dict': {
887 'id': '1578401165338976258',
888 'title': str,
889 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
890 'uploader': str,
891 'uploader_id': 'primevideouk',
892 'timestamp': 1665155137,
893 'upload_date': '20221007',
894 'age_limit': 0,
895 'uploader_url': 'https://twitter.com/primevideouk',
896 'comment_count': int,
897 'repost_count': int,
898 'like_count': int,
899 'tags': ['TheRingsOfPower'],
900 },
901 }, {
902 # Twitter Spaces
903 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
904 'info_dict': {
905 'id': '1lPJqmBeeNAJb',
906 'ext': 'm4a',
907 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
908 'uploader': r're:Monique Camarra.+?',
909 'uploader_id': 'MoniqueCamarra',
910 'live_status': 'was_live',
911 'release_timestamp': 1658417414,
912 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
913 'timestamp': 1658407771,
914 'release_date': '20220721',
915 'upload_date': '20220721',
916 },
917 'add_ie': ['TwitterSpaces'],
918 'params': {'skip_download': 'm3u8'},
919 'skip': 'Requires authentication',
920 }, {
921 # URL specifies video number but --yes-playlist
922 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
923 'playlist_mincount': 2,
924 'info_dict': {
925 'id': '1600649710662213632',
926 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
927 'timestamp': 1670459604.0,
928 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
929 'comment_count': int,
930 'uploader_id': 'CTVJLaidlaw',
931 'repost_count': int,
932 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
933 'upload_date': '20221208',
934 'age_limit': 0,
935 'uploader': 'Jocelyn Laidlaw',
936 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
937 'like_count': int,
938 },
939 }, {
940 # URL specifies video number and --no-playlist
941 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
942 'info_dict': {
943 'id': '1600649511827013632',
944 'ext': 'mp4',
945 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
946 'thumbnail': r're:^https?://.+\.jpg',
947 'timestamp': 1670459604.0,
948 'uploader_id': 'CTVJLaidlaw',
949 'uploader': 'Jocelyn Laidlaw',
950 'repost_count': int,
951 'comment_count': int,
952 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
953 'duration': 102.226,
954 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
955 'display_id': '1600649710662213632',
956 'like_count': int,
957 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
958 'upload_date': '20221208',
959 'age_limit': 0,
960 '_old_archive_ids': ['twitter 1600649710662213632'],
961 },
962 'params': {'noplaylist': True},
963 }, {
964 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
965 # note the id different between extraction and url
966 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
967 'info_dict': {
968 'id': '1621117577354424321',
969 'display_id': '1621117700482416640',
970 'ext': 'mp4',
971 'title': '뽀 - 아 최우제 이동속도 봐',
972 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
973 'duration': 24.598,
974 'uploader': '뽀',
975 'uploader_id': 's2FAKER',
976 'uploader_url': 'https://twitter.com/s2FAKER',
977 'upload_date': '20230202',
978 'timestamp': 1675339553.0,
979 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
980 'age_limit': 18,
981 'tags': [],
982 'like_count': int,
983 'repost_count': int,
984 'comment_count': int,
985 '_old_archive_ids': ['twitter 1621117700482416640'],
986 },
987 }, {
988 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
989 'info_dict': {
990 'id': '1599108643743473680',
991 'display_id': '1599108751385972737',
992 'ext': 'mp4',
993 'title': '\u06ea - \U0001F48B',
994 'uploader_url': 'https://twitter.com/hlo_again',
995 'like_count': int,
996 'uploader_id': 'hlo_again',
997 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
998 'repost_count': int,
999 'duration': 9.531,
1000 'comment_count': int,
1001 'upload_date': '20221203',
1002 'age_limit': 0,
1003 'timestamp': 1670092210.0,
1004 'tags': [],
1005 'uploader': '\u06ea',
1006 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1007 '_old_archive_ids': ['twitter 1599108751385972737'],
1008 },
1009 'params': {'noplaylist': True},
1010 }, {
1011 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1012 'info_dict': {
1013 'id': '1600009362759733248',
1014 'display_id': '1600009574919962625',
1015 'ext': 'mp4',
1016 'uploader_url': 'https://twitter.com/MunTheShinobi',
1017 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1018 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1019 'age_limit': 0,
1020 'uploader': 'Mün',
1021 'repost_count': int,
1022 'upload_date': '20221206',
1023 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1024 'comment_count': int,
1025 'like_count': int,
1026 'tags': [],
1027 'uploader_id': 'MunTheShinobi',
1028 'duration': 139.987,
1029 'timestamp': 1670306984.0,
1030 '_old_archive_ids': ['twitter 1600009574919962625'],
1031 },
1032 }, {
1033 # retweeted_status (private)
1034 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1035 'info_dict': {
1036 'id': '1623274794488659969',
1037 'display_id': '1623739803874349067',
1038 'ext': 'mp4',
1039 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1040 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1041 'uploader': 'Johnny Bullets',
1042 'uploader_id': 'Johnnybull3ts',
1043 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1044 'age_limit': 0,
1045 'tags': [],
1046 'duration': 8.033,
1047 'timestamp': 1675853859.0,
1048 'upload_date': '20230208',
1049 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1050 'like_count': int,
1051 'repost_count': int,
1052 },
1053 'skip': 'Protected tweet',
1054 }, {
1055 # retweeted_status
1056 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1057 'info_dict': {
1058 'id': '1694928337846538240',
1059 'ext': 'mp4',
1060 'display_id': '1695424220702888009',
1061 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1062 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1063 'uploader': 'Benny Johnson',
1064 'uploader_id': 'bennyjohnson',
1065 'uploader_url': 'https://twitter.com/bennyjohnson',
1066 'age_limit': 0,
1067 'tags': [],
1068 'duration': 45.001,
1069 'timestamp': 1692962814.0,
1070 'upload_date': '20230825',
1071 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1072 'like_count': int,
1073 'repost_count': int,
1074 'comment_count': int,
1075 '_old_archive_ids': ['twitter 1695424220702888009'],
1076 },
1077 }, {
1078 # retweeted_status w/ legacy API
1079 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1080 'info_dict': {
1081 'id': '1694928337846538240',
1082 'ext': 'mp4',
1083 'display_id': '1695424220702888009',
1084 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1085 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1086 'uploader': 'Benny Johnson',
1087 'uploader_id': 'bennyjohnson',
1088 'uploader_url': 'https://twitter.com/bennyjohnson',
1089 'age_limit': 0,
1090 'tags': [],
1091 'duration': 45.001,
1092 'timestamp': 1692962814.0,
1093 'upload_date': '20230825',
1094 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1095 'like_count': int,
1096 'repost_count': int,
1097 '_old_archive_ids': ['twitter 1695424220702888009'],
1098 },
1099 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1100 }, {
1101 # Broadcast embedded in tweet
1102 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1103 'info_dict': {
1104 'id': '1rmxPMjLzAXKN',
1105 'ext': 'mp4',
1106 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1107 'uploader': 'Jessica Dobson',
1108 'uploader_id': 'JessicaDobsonWX',
1109 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1110 'timestamp': 1701566398,
1111 'upload_date': '20231203',
1112 'live_status': 'was_live',
1113 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1114 'concurrent_view_count': int,
1115 'view_count': int,
1116 },
1117 'add_ie': ['TwitterBroadcast'],
1118 }, {
1119 # Animated gif and quote tweet video, with syndication API
1120 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1121 'playlist_mincount': 2,
1122 'info_dict': {
1123 'id': '1696256659889565950',
1124 'title': 'BAKOON - https://t.co/zom968d0a0',
1125 'description': 'https://t.co/zom968d0a0',
1126 'tags': [],
1127 'uploader': 'BAKOON',
1128 'uploader_id': 'BAKKOOONN',
1129 'uploader_url': 'https://twitter.com/BAKKOOONN',
1130 'age_limit': 18,
1131 'timestamp': 1693254077.0,
1132 'upload_date': '20230828',
1133 'like_count': int,
1134 },
1135 'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
1136 'expected_warnings': ['Not all metadata'],
1137 }, {
1138 # "stale tweet" with typename "TweetWithVisibilityResults"
1139 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1140 'md5': '62b1e11cdc2cdd0e527f83adb081f536',
1141 'info_dict': {
1142 'id': '1724883339285544960',
1143 'ext': 'mp4',
1144 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1145 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1146 'display_id': '1724884212803834154',
1147 'uploader': 'Robert F. Kennedy Jr',
1148 'uploader_id': 'RobertKennedyJr',
1149 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1150 'upload_date': '20231115',
1151 'timestamp': 1700079417.0,
1152 'duration': 341.048,
1153 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1154 'tags': ['Kennedy24'],
1155 'repost_count': int,
1156 'like_count': int,
1157 'comment_count': int,
1158 'age_limit': 0,
1159 '_old_archive_ids': ['twitter 1724884212803834154'],
1160 },
1161 }, {
1162 # onion route
1163 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1164 'only_matching': True,
1165 }, {
1166 # Twitch Clip Embed
1167 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1168 'only_matching': True,
1169 }, {
1170 # promo_video_website card
1171 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1172 'only_matching': True,
1173 }, {
1174 # promo_video_convo card
1175 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1176 'only_matching': True,
1177 }, {
1178 # appplayer card
1179 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1180 'only_matching': True,
1181 }, {
1182 # video_direct_message card
1183 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1184 'only_matching': True,
1185 }, {
1186 # poll2choice_video card
1187 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1188 'only_matching': True,
1189 }, {
1190 # poll3choice_video card
1191 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1192 'only_matching': True,
1193 }, {
1194 # poll4choice_video card
1195 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1196 'only_matching': True,
1197 }]
1198
1199 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1200
1201 @property
1202 def _GRAPHQL_ENDPOINT(self):
1203 if self.is_logged_in:
1204 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1205 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1206
1207 def _graphql_to_legacy(self, data, twid):
1208 result = traverse_obj(data, (
1209 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1210 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1211 'tweet_results', 'result', ('tweet', None), {dict},
1212 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1213 data, ('tweetResult', 'result', {dict}), default={})
1214
1215 typename = result.get('__typename')
1216 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1217 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1218
1219 if 'tombstone' in result:
1220 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1221 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1222 elif typename == 'TweetUnavailable':
1223 reason = result.get('reason')
1224 if reason == 'NsfwLoggedOut':
1225 self.raise_login_required('NSFW tweet requires authentication')
1226 elif reason == 'Protected':
1227 self.raise_login_required('You are not authorized to view this protected tweet')
1228 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1229 # Result for "stale tweet" needs additional transformation
1230 elif typename == 'TweetWithVisibilityResults':
1231 result = traverse_obj(result, ('tweet', {dict})) or {}
1232
1233 status = result.get('legacy', {})
1234 status.update(traverse_obj(result, {
1235 'user': ('core', 'user_results', 'result', 'legacy'),
1236 'card': ('card', 'legacy'),
1237 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1238 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1239 }, expected_type=dict, default={}))
1240
1241 # extra transformations needed since result does not match legacy format
1242 if status.get('retweeted_status'):
1243 status['retweeted_status']['user'] = traverse_obj(status, (
1244 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1245
1246 binding_values = {
1247 binding_value.get('key'): binding_value.get('value')
1248 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1249 }
1250 if binding_values:
1251 status['card']['binding_values'] = binding_values
1252
1253 return status
1254
1255 def _build_graphql_query(self, media_id):
1256 return {
1257 'variables': {
1258 'focalTweetId': media_id,
1259 'includePromotedContent': True,
1260 'with_rux_injections': False,
1261 'withBirdwatchNotes': True,
1262 'withCommunity': True,
1263 'withDownvotePerspective': False,
1264 'withQuickPromoteEligibilityTweetFields': True,
1265 'withReactionsMetadata': False,
1266 'withReactionsPerspective': False,
1267 'withSuperFollowsTweetFields': True,
1268 'withSuperFollowsUserFields': True,
1269 'withV2Timeline': True,
1270 'withVoice': True,
1271 },
1272 'features': {
1273 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1274 'interactive_text_enabled': True,
1275 'responsive_web_edit_tweet_api_enabled': True,
1276 'responsive_web_enhance_cards_enabled': True,
1277 'responsive_web_graphql_timeline_navigation_enabled': False,
1278 'responsive_web_text_conversations_enabled': False,
1279 'responsive_web_uc_gql_enabled': True,
1280 'standardized_nudges_misinfo': True,
1281 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1282 'tweetypie_unmention_optimization_enabled': True,
1283 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1284 'verified_phone_label_enabled': False,
1285 'vibe_api_enabled': True,
1286 },
1287 } if self.is_logged_in else {
1288 'variables': {
1289 'tweetId': media_id,
1290 'withCommunity': False,
1291 'includePromotedContent': False,
1292 'withVoice': False,
1293 },
1294 'features': {
1295 'creator_subscriptions_tweet_preview_api_enabled': True,
1296 'tweetypie_unmention_optimization_enabled': True,
1297 'responsive_web_edit_tweet_api_enabled': True,
1298 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1299 'view_counts_everywhere_api_enabled': True,
1300 'longform_notetweets_consumption_enabled': True,
1301 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1302 'tweet_awards_web_tipping_enabled': False,
1303 'freedom_of_speech_not_reach_fetch_enabled': True,
1304 'standardized_nudges_misinfo': True,
1305 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1306 'longform_notetweets_rich_text_read_enabled': True,
1307 'longform_notetweets_inline_media_enabled': True,
1308 'responsive_web_graphql_exclude_directive_enabled': True,
1309 'verified_phone_label_enabled': False,
1310 'responsive_web_media_download_video_enabled': False,
1311 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1312 'responsive_web_graphql_timeline_navigation_enabled': True,
1313 'responsive_web_enhance_cards_enabled': False
1314 },
1315 'fieldToggles': {
1316 'withArticleRichContentState': False
1317 }
1318 }
1319
1320 def _extract_status(self, twid):
1321 if self.is_logged_in or self._selected_api == 'graphql':
1322 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1323
1324 elif self._selected_api == 'legacy':
1325 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1326 'cards_platform': 'Web-12',
1327 'include_cards': 1,
1328 'include_reply_count': 1,
1329 'include_user_entities': 0,
1330 'tweet_mode': 'extended',
1331 })
1332
1333 elif self._selected_api == 'syndication':
1334 self.report_warning(
1335 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1336 status = self._download_json(
1337 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1338 headers={'User-Agent': 'Googlebot'}, query={
1339 'id': twid,
1340 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1341 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1342 })
1343 if not status:
1344 raise ExtractorError('Syndication endpoint returned empty JSON response')
1345 # Transform the result so its structure matches that of legacy/graphql
1346 media = []
1347 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1348 detail['id_str'] = traverse_obj(detail, (
1349 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1350 media.append(detail)
1351 status['extended_entities'] = {'media': media}
1352
1353 else:
1354 raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
1355
1356 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1357
1358 def _real_extract(self, url):
1359 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1360 status = self._extract_status(twid)
1361
1362 title = description = traverse_obj(
1363 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1364 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1365 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1366 user = status.get('user') or {}
1367 uploader = user.get('name')
1368 if uploader:
1369 title = f'{uploader} - {title}'
1370 uploader_id = user.get('screen_name')
1371
1372 info = {
1373 'id': twid,
1374 'title': title,
1375 'description': description,
1376 'uploader': uploader,
1377 'timestamp': unified_timestamp(status.get('created_at')),
1378 'uploader_id': uploader_id,
1379 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1380 'like_count': int_or_none(status.get('favorite_count')),
1381 'repost_count': int_or_none(status.get('retweet_count')),
1382 'comment_count': int_or_none(status.get('reply_count')),
1383 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1384 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1385 }
1386
1387 def extract_from_video_info(media):
1388 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1389 self.write_debug(f'Extracting from video info: {media_id}')
1390
1391 formats = []
1392 subtitles = {}
1393 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1394 fmts, subs = self._extract_variant_formats(variant, twid)
1395 subtitles = self._merge_subtitles(subtitles, subs)
1396 formats.extend(fmts)
1397
1398 thumbnails = []
1399 media_url = media.get('media_url_https') or media.get('media_url')
1400 if media_url:
1401 def add_thumbnail(name, size):
1402 thumbnails.append({
1403 'id': name,
1404 'url': update_url_query(media_url, {'name': name}),
1405 'width': int_or_none(size.get('w') or size.get('width')),
1406 'height': int_or_none(size.get('h') or size.get('height')),
1407 })
1408 for name, size in media.get('sizes', {}).items():
1409 add_thumbnail(name, size)
1410 add_thumbnail('orig', media.get('original_info') or {})
1411
1412 return {
1413 'id': media_id,
1414 'formats': formats,
1415 'subtitles': subtitles,
1416 'thumbnails': thumbnails,
1417 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
1418 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1419 # The codec of http formats are unknown
1420 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1421 }
1422
1423 def extract_from_card_info(card):
1424 if not card:
1425 return
1426
1427 self.write_debug(f'Extracting from card info: {card.get("url")}')
1428 binding_values = card['binding_values']
1429
1430 def get_binding_value(k):
1431 o = binding_values.get(k) or {}
1432 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1433
1434 card_name = card['name'].split(':')[-1]
1435 if card_name == 'player':
1436 yield {
1437 '_type': 'url',
1438 'url': get_binding_value('player_url'),
1439 }
1440 elif card_name == 'periscope_broadcast':
1441 yield {
1442 '_type': 'url',
1443 'url': get_binding_value('url') or get_binding_value('player_url'),
1444 'ie_key': PeriscopeIE.ie_key(),
1445 }
1446 elif card_name == 'broadcast':
1447 yield {
1448 '_type': 'url',
1449 'url': get_binding_value('broadcast_url'),
1450 'ie_key': TwitterBroadcastIE.ie_key(),
1451 }
1452 elif card_name == 'audiospace':
1453 yield {
1454 '_type': 'url',
1455 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1456 'ie_key': TwitterSpacesIE.ie_key(),
1457 }
1458 elif card_name == 'summary':
1459 yield {
1460 '_type': 'url',
1461 'url': get_binding_value('card_url'),
1462 }
1463 elif card_name == 'unified_card':
1464 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1465 yield from map(extract_from_video_info, traverse_obj(
1466 unified_card, ('media_entities', ...), expected_type=dict))
1467 # amplify, promo_video_website, promo_video_convo, appplayer,
1468 # video_direct_message, poll2choice_video, poll3choice_video,
1469 # poll4choice_video, ...
1470 else:
1471 is_amplify = card_name == 'amplify'
1472 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1473 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1474 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1475
1476 thumbnails = []
1477 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1478 image = get_binding_value('player_image' + suffix) or {}
1479 image_url = image.get('url')
1480 if not image_url or '/player-placeholder' in image_url:
1481 continue
1482 thumbnails.append({
1483 'id': suffix[1:] if suffix else 'medium',
1484 'url': image_url,
1485 'width': int_or_none(image.get('width')),
1486 'height': int_or_none(image.get('height')),
1487 })
1488
1489 yield {
1490 'formats': formats,
1491 'subtitles': subtitles,
1492 'thumbnails': thumbnails,
1493 'duration': int_or_none(get_binding_value(
1494 'content_duration_seconds')),
1495 }
1496
1497 videos = traverse_obj(status, (
1498 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1499
1500 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1501 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1502 else:
1503 desired_obj = traverse_obj(status, (
1504 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1505 if not desired_obj:
1506 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1507 elif desired_obj.get('type') != 'video':
1508 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1509
1510 # Restore original archive id and video index in title
1511 for index, entry in enumerate(videos, 1):
1512 if entry.get('id') != desired_obj.get('id'):
1513 continue
1514 if index == 1:
1515 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1516 if len(videos) != 1:
1517 info['title'] += f' #{index}'
1518 break
1519
1520 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1521
1522 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1523 if not entries:
1524 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1525 if not expanded_url or expanded_url == url:
1526 self.raise_no_formats('No video could be found in this tweet', expected=True)
1527 return info
1528
1529 return self.url_result(expanded_url, display_id=twid, **info)
1530
1531 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1532
1533 if len(entries) == 1:
1534 return entries[0]
1535
1536 for index, entry in enumerate(entries, 1):
1537 entry['title'] += f' #{index}'
1538
1539 return self.playlist_result(entries, **info)
1540
1541
1542 class TwitterAmplifyIE(TwitterBaseIE):
1543 IE_NAME = 'twitter:amplify'
1544 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1545
1546 _TEST = {
1547 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1548 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1549 'info_dict': {
1550 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1551 'ext': 'mp4',
1552 'title': 'Twitter Video',
1553 'thumbnail': 're:^https?://.*',
1554 },
1555 'params': {'format': '[protocol=https]'},
1556 }
1557
1558 def _real_extract(self, url):
1559 video_id = self._match_id(url)
1560 webpage = self._download_webpage(url, video_id)
1561
1562 vmap_url = self._html_search_meta(
1563 'twitter:amplify:vmap', webpage, 'vmap url')
1564 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1565
1566 thumbnails = []
1567 thumbnail = self._html_search_meta(
1568 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1569
1570 def _find_dimension(target):
1571 w = int_or_none(self._html_search_meta(
1572 'twitter:%s:width' % target, webpage, fatal=False))
1573 h = int_or_none(self._html_search_meta(
1574 'twitter:%s:height' % target, webpage, fatal=False))
1575 return w, h
1576
1577 if thumbnail:
1578 thumbnail_w, thumbnail_h = _find_dimension('image')
1579 thumbnails.append({
1580 'url': thumbnail,
1581 'width': thumbnail_w,
1582 'height': thumbnail_h,
1583 })
1584
1585 video_w, video_h = _find_dimension('player')
1586 formats[0].update({
1587 'width': video_w,
1588 'height': video_h,
1589 })
1590
1591 return {
1592 'id': video_id,
1593 'title': 'Twitter Video',
1594 'formats': formats,
1595 'thumbnails': thumbnails,
1596 }
1597
1598
1599 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1600 IE_NAME = 'twitter:broadcast'
1601 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1602
1603 _TESTS = [{
1604 # untitled Periscope video
1605 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1606 'info_dict': {
1607 'id': '1yNGaQLWpejGj',
1608 'ext': 'mp4',
1609 'title': 'Andrea May Sahouri - Periscope Broadcast',
1610 'uploader': 'Andrea May Sahouri',
1611 'uploader_id': 'andreamsahouri',
1612 'uploader_url': 'https://twitter.com/andreamsahouri',
1613 'timestamp': 1590973638,
1614 'upload_date': '20200601',
1615 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1616 'view_count': int,
1617 },
1618 }, {
1619 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1620 'info_dict': {
1621 'id': '1ZkKzeyrPbaxv',
1622 'ext': 'mp4',
1623 'title': 'Starship | SN10 | High-Altitude Flight Test',
1624 'uploader': 'SpaceX',
1625 'uploader_id': 'SpaceX',
1626 'uploader_url': 'https://twitter.com/SpaceX',
1627 'timestamp': 1614812942,
1628 'upload_date': '20210303',
1629 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1630 'view_count': int,
1631 },
1632 }, {
1633 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1634 'info_dict': {
1635 'id': '1OyKAVQrgzwGb',
1636 'ext': 'mp4',
1637 'title': 'Starship Flight Test',
1638 'uploader': 'SpaceX',
1639 'uploader_id': 'SpaceX',
1640 'uploader_url': 'https://twitter.com/SpaceX',
1641 'timestamp': 1681993964,
1642 'upload_date': '20230420',
1643 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1644 'view_count': int,
1645 },
1646 }]
1647
1648 def _real_extract(self, url):
1649 broadcast_id = self._match_id(url)
1650 broadcast = self._call_api(
1651 'broadcasts/show.json', broadcast_id,
1652 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1653 if not broadcast:
1654 raise ExtractorError('Broadcast no longer exists', expected=True)
1655 info = self._parse_broadcast_data(broadcast, broadcast_id)
1656 info['title'] = broadcast.get('status') or info.get('title')
1657 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1658 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1659 if info['live_status'] == 'is_upcoming':
1660 return info
1661
1662 media_key = broadcast['media_key']
1663 source = self._call_api(
1664 f'live_video_stream/status/{media_key}', media_key)['source']
1665 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1666 if '/live_video_stream/geoblocked/' in m3u8_url:
1667 self.raise_geo_restricted()
1668 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1669 m3u8_url).query).get('type', [None])[0]
1670 state, width, height = self._extract_common_format_info(broadcast)
1671 info['formats'] = self._extract_pscp_m3u8_formats(
1672 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1673 return info
1674
1675
1676 class TwitterSpacesIE(TwitterBaseIE):
1677 IE_NAME = 'twitter:spaces'
1678 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1679
1680 _TESTS = [{
1681 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1682 'info_dict': {
1683 'id': '1RDxlgyvNXzJL',
1684 'ext': 'm4a',
1685 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1686 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1687 'uploader': r're:Lucio Di Gaetano.*?',
1688 'uploader_id': 'luciodigaetano',
1689 'live_status': 'was_live',
1690 'timestamp': 1659877956,
1691 'upload_date': '20220807',
1692 'release_timestamp': 1659904215,
1693 'release_date': '20220807',
1694 },
1695 'params': {'skip_download': 'm3u8'},
1696 }, {
1697 # post_live/TimedOut but downloadable
1698 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1699 'info_dict': {
1700 'id': '1vAxRAVQWONJl',
1701 'ext': 'm4a',
1702 'title': 'Framing Up FinOps: Billing Tools',
1703 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1704 'uploader': 'Google Cloud',
1705 'uploader_id': 'googlecloud',
1706 'live_status': 'post_live',
1707 'timestamp': 1681409554,
1708 'upload_date': '20230413',
1709 'release_timestamp': 1681839000,
1710 'release_date': '20230418',
1711 },
1712 'params': {'skip_download': 'm3u8'},
1713 }, {
1714 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1715 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1716 'info_dict': {
1717 'id': '1eaKbrQbjoRKX',
1718 'ext': 'm4a',
1719 'title': 'あ',
1720 'description': 'Twitter Space participated by nobody yet',
1721 'uploader': '息根とめる🔪Twitchで復活',
1722 'uploader_id': 'tomeru_ikinone',
1723 'live_status': 'was_live',
1724 'timestamp': 1685617198,
1725 'upload_date': '20230601',
1726 },
1727 'params': {'skip_download': 'm3u8'},
1728 }]
1729
1730 SPACE_STATUS = {
1731 'notstarted': 'is_upcoming',
1732 'ended': 'was_live',
1733 'running': 'is_live',
1734 'timedout': 'post_live',
1735 }
1736
1737 def _build_graphql_query(self, space_id):
1738 return {
1739 'variables': {
1740 'id': space_id,
1741 'isMetatagsQuery': True,
1742 'withDownvotePerspective': False,
1743 'withReactionsMetadata': False,
1744 'withReactionsPerspective': False,
1745 'withReplays': True,
1746 'withSuperFollowsUserFields': True,
1747 'withSuperFollowsTweetFields': True,
1748 },
1749 'features': {
1750 'dont_mention_me_view_api_enabled': True,
1751 'interactive_text_enabled': True,
1752 'responsive_web_edit_tweet_api_enabled': True,
1753 'responsive_web_enhance_cards_enabled': True,
1754 'responsive_web_uc_gql_enabled': True,
1755 'spaces_2022_h2_clipping': True,
1756 'spaces_2022_h2_spaces_communities': False,
1757 'standardized_nudges_misinfo': True,
1758 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1759 'vibe_api_enabled': True,
1760 },
1761 }
1762
1763 def _real_extract(self, url):
1764 space_id = self._match_id(url)
1765 if not self.is_logged_in:
1766 self.raise_login_required('Twitter Spaces require authentication')
1767 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1768 if not space_data:
1769 raise ExtractorError('Twitter Space not found', expected=True)
1770
1771 metadata = space_data['metadata']
1772 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1773 is_live = live_status == 'is_live'
1774
1775 formats = []
1776 headers = {'Referer': 'https://twitter.com/'}
1777 if live_status == 'is_upcoming':
1778 self.raise_no_formats('Twitter Space not started yet', expected=True)
1779 elif not is_live and not metadata.get('is_space_available_for_replay'):
1780 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1781 elif metadata.get('media_key'):
1782 source = traverse_obj(
1783 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1784 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1785 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1786 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
1787 headers=headers, fatal=False) if source else []
1788 for fmt in formats:
1789 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1790 if not is_live:
1791 fmt['container'] = 'm4a_dash'
1792
1793 participants = ', '.join(traverse_obj(
1794 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1795
1796 if not formats and live_status == 'post_live':
1797 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1798
1799 return {
1800 'id': space_id,
1801 'title': metadata.get('title'),
1802 'description': f'Twitter Space participated by {participants}',
1803 'uploader': traverse_obj(
1804 metadata, ('creator_results', 'result', 'legacy', 'name')),
1805 'uploader_id': traverse_obj(
1806 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1807 'live_status': live_status,
1808 'release_timestamp': try_call(
1809 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1810 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1811 'formats': formats,
1812 'http_headers': headers,
1813 }
1814
1815
1816 class TwitterShortenerIE(TwitterBaseIE):
1817 IE_NAME = 'twitter:shortener'
1818 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1819 _BASE_URL = 'https://t.co/'
1820
1821 def _real_extract(self, url):
1822 mobj = self._match_valid_url(url)
1823 eid, id = mobj.group('eid', 'id')
1824 if eid:
1825 id = eid
1826 url = self._BASE_URL + id
1827 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
1828 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1829 if new_url.startswith(__UNSAFE_LINK):
1830 new_url = new_url.replace(__UNSAFE_LINK, "")
1831 return self.url_result(new_url)