]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[ie/afreecatv:live] Fix extractor (#9348)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import json
2 import random
3 import re
4
5 from .common import InfoExtractor
6 from .periscope import PeriscopeBaseIE, PeriscopeIE
7 from ..compat import functools # isort: split
8 from ..compat import (
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12 )
13 from ..networking.exceptions import HTTPError
14 from ..utils import (
15 ExtractorError,
16 dict_get,
17 filter_dict,
18 float_or_none,
19 format_field,
20 int_or_none,
21 make_archive_id,
22 remove_end,
23 str_or_none,
24 strip_or_none,
25 traverse_obj,
26 try_call,
27 try_get,
28 unified_timestamp,
29 update_url_query,
30 url_or_none,
31 xpath_text,
32 )
33
34
35 class TwitterBaseIE(InfoExtractor):
36 _NETRC_MACHINE = 'twitter'
37 _API_BASE = 'https://api.twitter.com/1.1/'
38 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
40 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
41 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
42 _flow_token = None
43
44 _LOGIN_INIT_DATA = json.dumps({
45 'input_flow_data': {
46 'flow_context': {
47 'debug_overrides': {},
48 'start_location': {
49 'location': 'unknown'
50 }
51 }
52 },
53 'subtask_versions': {
54 'action_list': 2,
55 'alert_dialog': 1,
56 'app_download_cta': 1,
57 'check_logged_in_account': 1,
58 'choice_selection': 3,
59 'contacts_live_sync_permission_prompt': 0,
60 'cta': 7,
61 'email_verification': 2,
62 'end_flow': 1,
63 'enter_date': 1,
64 'enter_email': 2,
65 'enter_password': 5,
66 'enter_phone': 2,
67 'enter_recaptcha': 1,
68 'enter_text': 5,
69 'enter_username': 2,
70 'generic_urt': 3,
71 'in_app_notification': 1,
72 'interest_picker': 3,
73 'js_instrumentation': 1,
74 'menu_dialog': 1,
75 'notifications_permission_prompt': 2,
76 'open_account': 2,
77 'open_home_timeline': 1,
78 'open_link': 1,
79 'phone_verification': 4,
80 'privacy_options': 1,
81 'security_key': 3,
82 'select_avatar': 4,
83 'select_banner': 2,
84 'settings_list': 7,
85 'show_code': 1,
86 'sign_up': 2,
87 'sign_up_review': 4,
88 'tweet_selection_urt': 1,
89 'update_users': 1,
90 'upload_media': 1,
91 'user_recommendations_list': 4,
92 'user_recommendations_urt': 1,
93 'wait_spinner': 3,
94 'web_modal': 1
95 }
96 }, separators=(',', ':')).encode()
97
98 def _extract_variant_formats(self, variant, video_id):
99 variant_url = variant.get('url')
100 if not variant_url:
101 return [], {}
102 elif '.m3u8' in variant_url:
103 fmts, subs = self._extract_m3u8_formats_and_subtitles(
104 variant_url, video_id, 'mp4', 'm3u8_native',
105 m3u8_id='hls', fatal=False)
106 for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
107 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
108 f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
109 return fmts, subs
110 else:
111 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
112 f = {
113 'url': variant_url,
114 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
115 'tbr': tbr,
116 }
117 self._search_dimensions_in_video_url(f, variant_url)
118 return [f], {}
119
120 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
121 vmap_url = url_or_none(vmap_url)
122 if not vmap_url:
123 return [], {}
124 vmap_data = self._download_xml(vmap_url, video_id)
125 formats = []
126 subtitles = {}
127 urls = []
128 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
129 video_variant.attrib['url'] = compat_urllib_parse_unquote(
130 video_variant.attrib['url'])
131 urls.append(video_variant.attrib['url'])
132 fmts, subs = self._extract_variant_formats(
133 video_variant.attrib, video_id)
134 formats.extend(fmts)
135 subtitles = self._merge_subtitles(subtitles, subs)
136 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
137 if video_url not in urls:
138 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
139 formats.extend(fmts)
140 subtitles = self._merge_subtitles(subtitles, subs)
141 return formats, subtitles
142
143 @staticmethod
144 def _search_dimensions_in_video_url(a_format, video_url):
145 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
146 if m:
147 a_format.update({
148 'width': int(m.group('width')),
149 'height': int(m.group('height')),
150 })
151
152 @property
153 def is_logged_in(self):
154 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
155
156 @functools.cached_property
157 def _selected_api(self):
158 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
159
160 def _fetch_guest_token(self, display_id):
161 guest_token = traverse_obj(self._download_json(
162 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
163 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
164 ('guest_token', {str}))
165 if not guest_token:
166 raise ExtractorError('Could not retrieve guest token')
167 return guest_token
168
169 def _set_base_headers(self, legacy=False):
170 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
171 return filter_dict({
172 'Authorization': f'Bearer {bearer_token}',
173 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
174 })
175
176 def _call_login_api(self, note, headers, query={}, data=None):
177 response = self._download_json(
178 f'{self._API_BASE}onboarding/task.json', None, note,
179 headers=headers, query=query, data=data, expected_status=400)
180 error = traverse_obj(response, ('errors', 0, 'message', {str}))
181 if error:
182 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
183 elif traverse_obj(response, 'status') != 'success':
184 raise ExtractorError('Login was unsuccessful')
185
186 subtask = traverse_obj(
187 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
188 if not subtask:
189 raise ExtractorError('Twitter API did not return next login subtask')
190
191 self._flow_token = response['flow_token']
192
193 return subtask
194
195 def _perform_login(self, username, password):
196 if self.is_logged_in:
197 return
198
199 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
200 guest_token = self._search_regex(
201 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
202 headers = {
203 **self._set_base_headers(),
204 'content-type': 'application/json',
205 'x-guest-token': guest_token,
206 'x-twitter-client-language': 'en',
207 'x-twitter-active-user': 'yes',
208 'Referer': 'https://twitter.com/',
209 'Origin': 'https://twitter.com',
210 }
211
212 def build_login_json(*subtask_inputs):
213 return json.dumps({
214 'flow_token': self._flow_token,
215 'subtask_inputs': subtask_inputs
216 }, separators=(',', ':')).encode()
217
218 def input_dict(subtask_id, text):
219 return {
220 'subtask_id': subtask_id,
221 'enter_text': {
222 'text': text,
223 'link': 'next_link'
224 }
225 }
226
227 next_subtask = self._call_login_api(
228 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
229
230 while not self.is_logged_in:
231 if next_subtask == 'LoginJsInstrumentationSubtask':
232 next_subtask = self._call_login_api(
233 'Submitting JS instrumentation response', headers, data=build_login_json({
234 'subtask_id': next_subtask,
235 'js_instrumentation': {
236 'response': '{}',
237 'link': 'next_link'
238 }
239 }))
240
241 elif next_subtask == 'LoginEnterUserIdentifierSSO':
242 next_subtask = self._call_login_api(
243 'Submitting username', headers, data=build_login_json({
244 'subtask_id': next_subtask,
245 'settings_list': {
246 'setting_responses': [{
247 'key': 'user_identifier',
248 'response_data': {
249 'text_data': {
250 'result': username
251 }
252 }
253 }],
254 'link': 'next_link'
255 }
256 }))
257
258 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
259 next_subtask = self._call_login_api(
260 'Submitting alternate identifier', headers,
261 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
262 'one of username, phone number or email that was not used as --username'))))
263
264 elif next_subtask == 'LoginEnterPassword':
265 next_subtask = self._call_login_api(
266 'Submitting password', headers, data=build_login_json({
267 'subtask_id': next_subtask,
268 'enter_password': {
269 'password': password,
270 'link': 'next_link'
271 }
272 }))
273
274 elif next_subtask == 'AccountDuplicationCheck':
275 next_subtask = self._call_login_api(
276 'Submitting account duplication check', headers, data=build_login_json({
277 'subtask_id': next_subtask,
278 'check_logged_in_account': {
279 'link': 'AccountDuplicationCheck_false'
280 }
281 }))
282
283 elif next_subtask == 'LoginTwoFactorAuthChallenge':
284 next_subtask = self._call_login_api(
285 'Submitting 2FA token', headers, data=build_login_json(input_dict(
286 next_subtask, self._get_tfa_info('two-factor authentication token'))))
287
288 elif next_subtask == 'LoginAcid':
289 next_subtask = self._call_login_api(
290 'Submitting confirmation code', headers, data=build_login_json(input_dict(
291 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
292
293 elif next_subtask == 'ArkoseLogin':
294 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
295
296 elif next_subtask == 'DenyLoginSubtask':
297 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
298
299 elif next_subtask == 'LoginSuccessSubtask':
300 raise ExtractorError('Twitter API did not grant auth token cookie')
301
302 else:
303 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
304
305 self.report_login()
306
307 def _call_api(self, path, video_id, query={}, graphql=False):
308 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
309 headers.update({
310 'x-twitter-auth-type': 'OAuth2Session',
311 'x-twitter-client-language': 'en',
312 'x-twitter-active-user': 'yes',
313 } if self.is_logged_in else {
314 'x-guest-token': self._fetch_guest_token(video_id)
315 })
316 allowed_status = {400, 401, 403, 404} if graphql else {403}
317 result = self._download_json(
318 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
319 video_id, headers=headers, query=query, expected_status=allowed_status,
320 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
321
322 if result.get('errors'):
323 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
324 if errors and 'not authorized' in errors:
325 self.raise_login_required(remove_end(errors, '.'))
326 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
327
328 return result
329
330 def _build_graphql_query(self, media_id):
331 raise NotImplementedError('Method must be implemented to support GraphQL')
332
333 def _call_graphql_api(self, endpoint, media_id):
334 data = self._build_graphql_query(media_id)
335 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
336 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
337
338
339 class TwitterCardIE(InfoExtractor):
340 IE_NAME = 'twitter:card'
341 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
342 _TESTS = [
343 {
344 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
345 # MD5 checksums are different in different places
346 'info_dict': {
347 'id': '560070131976392705',
348 'ext': 'mp4',
349 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
350 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
351 'uploader': 'Twitter',
352 'uploader_id': 'Twitter',
353 'thumbnail': r're:^https?://.*\.jpg',
354 'duration': 30.033,
355 'timestamp': 1422366112,
356 'upload_date': '20150127',
357 'age_limit': 0,
358 'comment_count': int,
359 'tags': [],
360 'repost_count': int,
361 'like_count': int,
362 'display_id': '560070183650213889',
363 'uploader_url': 'https://twitter.com/Twitter',
364 },
365 },
366 {
367 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
368 'md5': '7137eca597f72b9abbe61e5ae0161399',
369 'info_dict': {
370 'id': '623160978427936768',
371 'ext': 'mp4',
372 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
373 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
374 'uploader': 'NASA',
375 'uploader_id': 'NASA',
376 'timestamp': 1437408129,
377 'upload_date': '20150720',
378 'uploader_url': 'https://twitter.com/NASA',
379 'age_limit': 0,
380 'comment_count': int,
381 'like_count': int,
382 'repost_count': int,
383 'tags': ['PlutoFlyby'],
384 },
385 'params': {'format': '[protocol=https]'}
386 },
387 {
388 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
389 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
390 'info_dict': {
391 'id': 'dq4Oj5quskI',
392 'ext': 'mp4',
393 'title': 'Ubuntu 11.10 Overview',
394 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
395 'upload_date': '20111013',
396 'uploader': 'OMG! UBUNTU!',
397 'uploader_id': 'omgubuntu',
398 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
399 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
400 'channel_follower_count': int,
401 'chapters': 'count:8',
402 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
403 'duration': 138,
404 'categories': ['Film & Animation'],
405 'age_limit': 0,
406 'comment_count': int,
407 'availability': 'public',
408 'like_count': int,
409 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
410 'view_count': int,
411 'tags': 'count:12',
412 'channel': 'OMG! UBUNTU!',
413 'playable_in_embed': True,
414 },
415 'add_ie': ['Youtube'],
416 },
417 {
418 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
419 'info_dict': {
420 'id': 'iBb2x00UVlv',
421 'ext': 'mp4',
422 'upload_date': '20151113',
423 'uploader_id': '1189339351084113920',
424 'uploader': 'ArsenalTerje',
425 'title': 'Vine by ArsenalTerje',
426 'timestamp': 1447451307,
427 'alt_title': 'Vine by ArsenalTerje',
428 'comment_count': int,
429 'like_count': int,
430 'thumbnail': r're:^https?://[^?#]+\.jpg',
431 'view_count': int,
432 'repost_count': int,
433 },
434 'add_ie': ['Vine'],
435 'params': {'skip_download': 'm3u8'},
436 },
437 {
438 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
439 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
440 'info_dict': {
441 'id': '705235433198714880',
442 'ext': 'mp4',
443 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
444 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
445 'uploader': 'Brent Yarina',
446 'uploader_id': 'BTNBrentYarina',
447 'timestamp': 1456976204,
448 'upload_date': '20160303',
449 },
450 'skip': 'This content is no longer available.',
451 },
452 {
453 'url': 'https://twitter.com/i/videos/752274308186120192',
454 'only_matching': True,
455 },
456 ]
457
458 def _real_extract(self, url):
459 status_id = self._match_id(url)
460 return self.url_result(
461 'https://twitter.com/statuses/' + status_id,
462 TwitterIE.ie_key(), status_id)
463
464
465 class TwitterIE(TwitterBaseIE):
466 IE_NAME = 'twitter'
467 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
468
469 _TESTS = [{
470 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
471 'info_dict': {
472 'id': '643211870443208704',
473 'display_id': '643211948184596480',
474 'ext': 'mp4',
475 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
476 'thumbnail': r're:^https?://.*\.jpg',
477 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
478 'channel_id': '549749560',
479 'uploader': 'FREE THE NIPPLE',
480 'uploader_id': 'freethenipple',
481 'duration': 12.922,
482 'timestamp': 1442188653,
483 'upload_date': '20150913',
484 'uploader_url': 'https://twitter.com/freethenipple',
485 'comment_count': int,
486 'repost_count': int,
487 'like_count': int,
488 'tags': [],
489 'age_limit': 18,
490 '_old_archive_ids': ['twitter 643211948184596480'],
491 },
492 'skip': 'Requires authentication',
493 }, {
494 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
495 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
496 'info_dict': {
497 'id': '657991469417025536',
498 'ext': 'mp4',
499 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
500 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
501 'thumbnail': r're:^https?://.*\.png',
502 'uploader': 'Gifs',
503 'uploader_id': 'giphz',
504 },
505 'expected_warnings': ['height', 'width'],
506 'skip': 'Account suspended',
507 }, {
508 'url': 'https://twitter.com/starwars/status/665052190608723968',
509 'info_dict': {
510 'id': '665052190608723968',
511 'display_id': '665052190608723968',
512 'ext': 'mp4',
513 'title': r're:Star Wars.*A new beginning is coming December 18.*',
514 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
515 'channel_id': '20106852',
516 'uploader_id': 'starwars',
517 'uploader': r're:Star Wars.*',
518 'timestamp': 1447395772,
519 'upload_date': '20151113',
520 'uploader_url': 'https://twitter.com/starwars',
521 'comment_count': int,
522 'repost_count': int,
523 'like_count': int,
524 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
525 'age_limit': 0,
526 '_old_archive_ids': ['twitter 665052190608723968'],
527 },
528 }, {
529 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
530 'info_dict': {
531 'id': '705235433198714880',
532 'ext': 'mp4',
533 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
534 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
535 'uploader_id': 'BTNBrentYarina',
536 'uploader': 'Brent Yarina',
537 'timestamp': 1456976204,
538 'upload_date': '20160303',
539 'uploader_url': 'https://twitter.com/BTNBrentYarina',
540 'comment_count': int,
541 'repost_count': int,
542 'like_count': int,
543 'tags': [],
544 'age_limit': 0,
545 },
546 'params': {
547 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
548 # Test case of TwitterCardIE
549 'skip_download': True,
550 },
551 'skip': 'Dead external link',
552 }, {
553 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
554 'info_dict': {
555 'id': '700207414000242688',
556 'display_id': '700207533655363584',
557 'ext': 'mp4',
558 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
559 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
560 'thumbnail': r're:^https?://.*\.jpg',
561 'channel_id': '1383165541',
562 'uploader': 'jaydin donte geer',
563 'uploader_id': 'jaydingeer',
564 'duration': 30.0,
565 'timestamp': 1455777459,
566 'upload_date': '20160218',
567 'uploader_url': 'https://twitter.com/jaydingeer',
568 'comment_count': int,
569 'repost_count': int,
570 'like_count': int,
571 'tags': ['Damndaniel'],
572 'age_limit': 0,
573 '_old_archive_ids': ['twitter 700207533655363584'],
574 },
575 }, {
576 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
577 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
578 'info_dict': {
579 'id': 'MIOxnrUteUd',
580 'ext': 'mp4',
581 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
582 'uploader': 'TAKUMA',
583 'uploader_id': '1004126642786242560',
584 'timestamp': 1402826626,
585 'upload_date': '20140615',
586 'thumbnail': r're:^https?://.*\.jpg',
587 'alt_title': 'Vine by TAKUMA',
588 'comment_count': int,
589 'repost_count': int,
590 'like_count': int,
591 'view_count': int,
592 },
593 'add_ie': ['Vine'],
594 }, {
595 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
596 'info_dict': {
597 'id': '717462543795523584',
598 'display_id': '719944021058060289',
599 'ext': 'mp4',
600 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
601 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
602 'channel_id': '701615052',
603 'uploader_id': 'CaptainAmerica',
604 'uploader': 'Captain America',
605 'duration': 3.17,
606 'timestamp': 1460483005,
607 'upload_date': '20160412',
608 'uploader_url': 'https://twitter.com/CaptainAmerica',
609 'thumbnail': r're:^https?://.*\.jpg',
610 'comment_count': int,
611 'repost_count': int,
612 'like_count': int,
613 'tags': [],
614 'age_limit': 0,
615 '_old_archive_ids': ['twitter 719944021058060289'],
616 },
617 }, {
618 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
619 'info_dict': {
620 'id': '1zqKVVlkqLaKB',
621 'ext': 'mp4',
622 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
623 'upload_date': '20160923',
624 'uploader_id': '1PmKqpJdOJQoY',
625 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
626 'timestamp': 1474613214,
627 'thumbnail': r're:^https?://.*\.jpg',
628 },
629 'add_ie': ['Periscope'],
630 'skip': 'Broadcast not found',
631 }, {
632 # has mp4 formats via mobile API
633 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
634 'info_dict': {
635 'id': '852077943283097602',
636 'ext': 'mp4',
637 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
638 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
639 'channel_id': '2526757026',
640 'uploader': 'عالم الأخبار',
641 'uploader_id': 'news_al3alm',
642 'duration': 277.4,
643 'timestamp': 1492000653,
644 'upload_date': '20170412',
645 'display_id': '852138619213144067',
646 'age_limit': 0,
647 'uploader_url': 'https://twitter.com/news_al3alm',
648 'thumbnail': r're:^https?://.*\.jpg',
649 'tags': [],
650 'repost_count': int,
651 'like_count': int,
652 'comment_count': int,
653 '_old_archive_ids': ['twitter 852138619213144067'],
654 },
655 }, {
656 'url': 'https://twitter.com/i/web/status/910031516746514432',
657 'info_dict': {
658 'id': '910030238373089285',
659 'display_id': '910031516746514432',
660 'ext': 'mp4',
661 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
662 'thumbnail': r're:^https?://.*\.jpg',
663 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
664 'channel_id': '2319432498',
665 'uploader': 'Préfet de Guadeloupe',
666 'uploader_id': 'Prefet971',
667 'duration': 47.48,
668 'timestamp': 1505803395,
669 'upload_date': '20170919',
670 'uploader_url': 'https://twitter.com/Prefet971',
671 'comment_count': int,
672 'repost_count': int,
673 'like_count': int,
674 'tags': ['Maria'],
675 'age_limit': 0,
676 '_old_archive_ids': ['twitter 910031516746514432'],
677 },
678 'params': {
679 'skip_download': True, # requires ffmpeg
680 },
681 }, {
682 # card via api.twitter.com/1.1/videos/tweet/config
683 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
684 'info_dict': {
685 'id': '1001551417340022785',
686 'display_id': '1001551623938805763',
687 'ext': 'mp4',
688 'title': 're:.*?Shep is on a roll today.*?',
689 'thumbnail': r're:^https?://.*\.jpg',
690 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
691 'channel_id': '255036353',
692 'uploader': 'Lis Power',
693 'uploader_id': 'LisPower1',
694 'duration': 111.278,
695 'timestamp': 1527623489,
696 'upload_date': '20180529',
697 'uploader_url': 'https://twitter.com/LisPower1',
698 'comment_count': int,
699 'repost_count': int,
700 'like_count': int,
701 'tags': [],
702 'age_limit': 0,
703 '_old_archive_ids': ['twitter 1001551623938805763'],
704 },
705 'params': {
706 'skip_download': True, # requires ffmpeg
707 },
708 }, {
709 'url': 'https://twitter.com/foobar/status/1087791357756956680',
710 'info_dict': {
711 'id': '1087791272830607360',
712 'display_id': '1087791357756956680',
713 'ext': 'mp4',
714 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
715 'thumbnail': r're:^https?://.*\.jpg',
716 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
717 'uploader': 'X',
718 'uploader_id': 'X',
719 'duration': 61.567,
720 'timestamp': 1548184644,
721 'upload_date': '20190122',
722 'uploader_url': 'https://twitter.com/X',
723 'comment_count': int,
724 'repost_count': int,
725 'like_count': int,
726 'view_count': int,
727 'tags': [],
728 'age_limit': 0,
729 },
730 'skip': 'This Tweet is unavailable',
731 }, {
732 # not available in Periscope
733 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
734 'info_dict': {
735 'id': '1vOGwqejwoWxB',
736 'ext': 'mp4',
737 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
738 'uploader': 'Vivi',
739 'uploader_id': '1eVjYOLGkGrQL',
740 'thumbnail': r're:^https?://.*\.jpg',
741 'tags': ['EduTECH2019'],
742 'view_count': int,
743 },
744 'add_ie': ['TwitterBroadcast'],
745 'skip': 'Broadcast no longer exists',
746 }, {
747 # unified card
748 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
749 'info_dict': {
750 'id': '1349774757969989634',
751 'display_id': '1349794411333394432',
752 'ext': 'mp4',
753 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
754 'thumbnail': r're:^https?://.*\.jpg',
755 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
756 'channel_id': '18552281',
757 'uploader': 'Brooklyn Nets',
758 'uploader_id': 'BrooklynNets',
759 'duration': 324.484,
760 'timestamp': 1610651040,
761 'upload_date': '20210114',
762 'uploader_url': 'https://twitter.com/BrooklynNets',
763 'comment_count': int,
764 'repost_count': int,
765 'like_count': int,
766 'tags': [],
767 'age_limit': 0,
768 '_old_archive_ids': ['twitter 1349794411333394432'],
769 },
770 'params': {
771 'skip_download': True,
772 },
773 }, {
774 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
775 'info_dict': {
776 'id': '1577855447914409984',
777 'display_id': '1577855540407197696',
778 'ext': 'mp4',
779 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
780 'description': 'md5:b9c3699335447391d11753ab21c70a74',
781 'upload_date': '20221006',
782 'channel_id': '143077138',
783 'uploader': 'Oshtru',
784 'uploader_id': 'oshtru',
785 'uploader_url': 'https://twitter.com/oshtru',
786 'thumbnail': r're:^https?://.*\.jpg',
787 'duration': 30.03,
788 'timestamp': 1665025050,
789 'comment_count': int,
790 'repost_count': int,
791 'like_count': int,
792 'tags': [],
793 'age_limit': 0,
794 '_old_archive_ids': ['twitter 1577855540407197696'],
795 },
796 'params': {'skip_download': True},
797 }, {
798 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
799 'info_dict': {
800 'id': '1577719286659006464',
801 'title': 'Ultima Reload - Test',
802 'description': 'Test https://t.co/Y3KEZD7Dad',
803 'channel_id': '168922496',
804 'uploader': 'Ultima Reload',
805 'uploader_id': 'UltimaShadowX',
806 'uploader_url': 'https://twitter.com/UltimaShadowX',
807 'upload_date': '20221005',
808 'timestamp': 1664992565,
809 'comment_count': int,
810 'repost_count': int,
811 'like_count': int,
812 'tags': [],
813 'age_limit': 0,
814 },
815 'playlist_count': 4,
816 'params': {'skip_download': True},
817 }, {
818 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
819 'info_dict': {
820 'id': '1575559336759263233',
821 'display_id': '1575560063510810624',
822 'ext': 'mp4',
823 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
824 'thumbnail': r're:^https?://.*\.jpg',
825 'description': 'md5:95aea692fda36a12081b9629b02daa92',
826 'channel_id': '1094109584',
827 'uploader': 'Max Olson',
828 'uploader_id': 'MesoMax919',
829 'uploader_url': 'https://twitter.com/MesoMax919',
830 'duration': 21.321,
831 'timestamp': 1664477766,
832 'upload_date': '20220929',
833 'comment_count': int,
834 'repost_count': int,
835 'like_count': int,
836 'tags': ['HurricaneIan'],
837 'age_limit': 0,
838 '_old_archive_ids': ['twitter 1575560063510810624'],
839 },
840 }, {
841 # Adult content, fails if not logged in
842 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
843 'info_dict': {
844 'id': '1575199163847000068',
845 'display_id': '1575199173472927762',
846 'ext': 'mp4',
847 'title': str,
848 'description': str,
849 'channel_id': '1217167793541480450',
850 'uploader': str,
851 'uploader_id': 'Rizdraws',
852 'uploader_url': 'https://twitter.com/Rizdraws',
853 'upload_date': '20220928',
854 'timestamp': 1664391723,
855 'thumbnail': r're:^https?://.+\.jpg',
856 'like_count': int,
857 'repost_count': int,
858 'comment_count': int,
859 'age_limit': 18,
860 'tags': [],
861 '_old_archive_ids': ['twitter 1575199173472927762'],
862 },
863 'params': {'skip_download': 'The media could not be played'},
864 'skip': 'Requires authentication',
865 }, {
866 # Playlist result only with graphql API
867 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
868 'playlist_mincount': 2,
869 'info_dict': {
870 'id': '1395079556562706435',
871 'title': str,
872 'tags': [],
873 'channel_id': '21539378',
874 'uploader': str,
875 'like_count': int,
876 'upload_date': '20210519',
877 'age_limit': 0,
878 'repost_count': int,
879 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
880 'uploader_id': 'Srirachachau',
881 'comment_count': int,
882 'uploader_url': 'https://twitter.com/Srirachachau',
883 'timestamp': 1621447860,
884 },
885 }, {
886 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
887 'playlist_mincount': 2,
888 'info_dict': {
889 'id': '1578353380363501568',
890 'title': str,
891 'channel_id': '2195866214',
892 'uploader_id': 'DavidToons_',
893 'repost_count': int,
894 'like_count': int,
895 'uploader': str,
896 'timestamp': 1665143744,
897 'uploader_url': 'https://twitter.com/DavidToons_',
898 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
899 'tags': [],
900 'comment_count': int,
901 'upload_date': '20221007',
902 'age_limit': 0,
903 },
904 }, {
905 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
906 'playlist_count': 2,
907 'info_dict': {
908 'id': '1578401165338976258',
909 'title': str,
910 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
911 'channel_id': '19338359',
912 'uploader': str,
913 'uploader_id': 'primevideouk',
914 'timestamp': 1665155137,
915 'upload_date': '20221007',
916 'age_limit': 0,
917 'uploader_url': 'https://twitter.com/primevideouk',
918 'comment_count': int,
919 'repost_count': int,
920 'like_count': int,
921 'tags': ['TheRingsOfPower'],
922 },
923 }, {
924 # Twitter Spaces
925 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
926 'info_dict': {
927 'id': '1lPJqmBeeNAJb',
928 'ext': 'm4a',
929 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
930 'uploader': r're:Monique Camarra.+?',
931 'uploader_id': 'MoniqueCamarra',
932 'live_status': 'was_live',
933 'release_timestamp': 1658417414,
934 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
935 'timestamp': 1658407771,
936 'release_date': '20220721',
937 'upload_date': '20220721',
938 },
939 'add_ie': ['TwitterSpaces'],
940 'params': {'skip_download': 'm3u8'},
941 'skip': 'Requires authentication',
942 }, {
943 # URL specifies video number but --yes-playlist
944 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
945 'playlist_mincount': 2,
946 'info_dict': {
947 'id': '1600649710662213632',
948 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
949 'timestamp': 1670459604.0,
950 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
951 'comment_count': int,
952 'uploader_id': 'CTVJLaidlaw',
953 'channel_id': '80082014',
954 'repost_count': int,
955 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
956 'upload_date': '20221208',
957 'age_limit': 0,
958 'uploader': 'Jocelyn Laidlaw',
959 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
960 'like_count': int,
961 },
962 }, {
963 # URL specifies video number and --no-playlist
964 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
965 'info_dict': {
966 'id': '1600649511827013632',
967 'ext': 'mp4',
968 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
969 'thumbnail': r're:^https?://.+\.jpg',
970 'timestamp': 1670459604.0,
971 'channel_id': '80082014',
972 'uploader_id': 'CTVJLaidlaw',
973 'uploader': 'Jocelyn Laidlaw',
974 'repost_count': int,
975 'comment_count': int,
976 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
977 'duration': 102.226,
978 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
979 'display_id': '1600649710662213632',
980 'like_count': int,
981 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
982 'upload_date': '20221208',
983 'age_limit': 0,
984 '_old_archive_ids': ['twitter 1600649710662213632'],
985 },
986 'params': {'noplaylist': True},
987 }, {
988 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
989 # note the id different between extraction and url
990 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
991 'info_dict': {
992 'id': '1621117577354424321',
993 'display_id': '1621117700482416640',
994 'ext': 'mp4',
995 'title': '뽀 - 아 최우제 이동속도 봐',
996 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
997 'duration': 24.598,
998 'channel_id': '1281839411068432384',
999 'uploader': '뽀',
1000 'uploader_id': 's2FAKER',
1001 'uploader_url': 'https://twitter.com/s2FAKER',
1002 'upload_date': '20230202',
1003 'timestamp': 1675339553.0,
1004 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
1005 'age_limit': 18,
1006 'tags': [],
1007 'like_count': int,
1008 'repost_count': int,
1009 'comment_count': int,
1010 '_old_archive_ids': ['twitter 1621117700482416640'],
1011 },
1012 'skip': 'Requires authentication',
1013 }, {
1014 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1015 'info_dict': {
1016 'id': '1599108643743473680',
1017 'display_id': '1599108751385972737',
1018 'ext': 'mp4',
1019 'title': '\u06ea - \U0001F48B',
1020 'channel_id': '1347791436809441283',
1021 'uploader_url': 'https://twitter.com/hlo_again',
1022 'like_count': int,
1023 'uploader_id': 'hlo_again',
1024 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1025 'repost_count': int,
1026 'duration': 9.531,
1027 'comment_count': int,
1028 'upload_date': '20221203',
1029 'age_limit': 0,
1030 'timestamp': 1670092210.0,
1031 'tags': [],
1032 'uploader': '\u06ea',
1033 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1034 '_old_archive_ids': ['twitter 1599108751385972737'],
1035 },
1036 'params': {'noplaylist': True},
1037 }, {
1038 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1039 'info_dict': {
1040 'id': '1600009362759733248',
1041 'display_id': '1600009574919962625',
1042 'ext': 'mp4',
1043 'channel_id': '211814412',
1044 'uploader_url': 'https://twitter.com/MunTheShinobi',
1045 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1046 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1047 'age_limit': 0,
1048 'uploader': 'Mün',
1049 'repost_count': int,
1050 'upload_date': '20221206',
1051 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1052 'comment_count': int,
1053 'like_count': int,
1054 'tags': [],
1055 'uploader_id': 'MunTheShinobi',
1056 'duration': 139.987,
1057 'timestamp': 1670306984.0,
1058 '_old_archive_ids': ['twitter 1600009574919962625'],
1059 },
1060 }, {
1061 # retweeted_status (private)
1062 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1063 'info_dict': {
1064 'id': '1623274794488659969',
1065 'display_id': '1623739803874349067',
1066 'ext': 'mp4',
1067 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1068 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1069 'uploader': 'Johnny Bullets',
1070 'uploader_id': 'Johnnybull3ts',
1071 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1072 'age_limit': 0,
1073 'tags': [],
1074 'duration': 8.033,
1075 'timestamp': 1675853859.0,
1076 'upload_date': '20230208',
1077 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1078 'like_count': int,
1079 'repost_count': int,
1080 },
1081 'skip': 'Protected tweet',
1082 }, {
1083 # retweeted_status
1084 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1085 'info_dict': {
1086 'id': '1694928337846538240',
1087 'ext': 'mp4',
1088 'display_id': '1695424220702888009',
1089 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1090 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1091 'channel_id': '15212187',
1092 'uploader': 'Benny Johnson',
1093 'uploader_id': 'bennyjohnson',
1094 'uploader_url': 'https://twitter.com/bennyjohnson',
1095 'age_limit': 0,
1096 'tags': [],
1097 'duration': 45.001,
1098 'timestamp': 1692962814.0,
1099 'upload_date': '20230825',
1100 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1101 'like_count': int,
1102 'repost_count': int,
1103 'comment_count': int,
1104 '_old_archive_ids': ['twitter 1695424220702888009'],
1105 },
1106 }, {
1107 # retweeted_status w/ legacy API
1108 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1109 'info_dict': {
1110 'id': '1694928337846538240',
1111 'ext': 'mp4',
1112 'display_id': '1695424220702888009',
1113 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1114 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1115 'channel_id': '15212187',
1116 'uploader': 'Benny Johnson',
1117 'uploader_id': 'bennyjohnson',
1118 'uploader_url': 'https://twitter.com/bennyjohnson',
1119 'age_limit': 0,
1120 'tags': [],
1121 'duration': 45.001,
1122 'timestamp': 1692962814.0,
1123 'upload_date': '20230825',
1124 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1125 'like_count': int,
1126 'repost_count': int,
1127 '_old_archive_ids': ['twitter 1695424220702888009'],
1128 },
1129 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1130 }, {
1131 # Broadcast embedded in tweet
1132 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1133 'info_dict': {
1134 'id': '1rmxPMjLzAXKN',
1135 'ext': 'mp4',
1136 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1137 'uploader': 'Jessica Dobson',
1138 'uploader_id': 'JessicaDobsonWX',
1139 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1140 'timestamp': 1701566398,
1141 'upload_date': '20231203',
1142 'live_status': 'was_live',
1143 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1144 'concurrent_view_count': int,
1145 'view_count': int,
1146 },
1147 'add_ie': ['TwitterBroadcast'],
1148 }, {
1149 # Animated gif and quote tweet video
1150 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1151 'playlist_mincount': 2,
1152 'info_dict': {
1153 'id': '1696256659889565950',
1154 'title': 'BAKOON - https://t.co/zom968d0a0',
1155 'description': 'https://t.co/zom968d0a0',
1156 'tags': [],
1157 'channel_id': '1263540390',
1158 'uploader': 'BAKOON',
1159 'uploader_id': 'BAKKOOONN',
1160 'uploader_url': 'https://twitter.com/BAKKOOONN',
1161 'age_limit': 18,
1162 'timestamp': 1693254077.0,
1163 'upload_date': '20230828',
1164 'like_count': int,
1165 'comment_count': int,
1166 'repost_count': int,
1167 },
1168 'skip': 'Requires authentication',
1169 }, {
1170 # "stale tweet" with typename "TweetWithVisibilityResults"
1171 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1172 'md5': '511377ff8dfa7545307084dca4dce319',
1173 'info_dict': {
1174 'id': '1724883339285544960',
1175 'ext': 'mp4',
1176 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1177 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1178 'display_id': '1724884212803834154',
1179 'channel_id': '337808606',
1180 'uploader': 'Robert F. Kennedy Jr',
1181 'uploader_id': 'RobertKennedyJr',
1182 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1183 'upload_date': '20231115',
1184 'timestamp': 1700079417.0,
1185 'duration': 341.048,
1186 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1187 'tags': ['Kennedy24'],
1188 'repost_count': int,
1189 'like_count': int,
1190 'comment_count': int,
1191 'age_limit': 0,
1192 '_old_archive_ids': ['twitter 1724884212803834154'],
1193 },
1194 }, {
1195 # onion route
1196 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1197 'only_matching': True,
1198 }, {
1199 # Twitch Clip Embed
1200 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1201 'only_matching': True,
1202 }, {
1203 # promo_video_website card
1204 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1205 'only_matching': True,
1206 }, {
1207 # promo_video_convo card
1208 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1209 'only_matching': True,
1210 }, {
1211 # appplayer card
1212 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1213 'only_matching': True,
1214 }, {
1215 # video_direct_message card
1216 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1217 'only_matching': True,
1218 }, {
1219 # poll2choice_video card
1220 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1221 'only_matching': True,
1222 }, {
1223 # poll3choice_video card
1224 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1225 'only_matching': True,
1226 }, {
1227 # poll4choice_video card
1228 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1229 'only_matching': True,
1230 }]
1231
1232 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1233
1234 @property
1235 def _GRAPHQL_ENDPOINT(self):
1236 if self.is_logged_in:
1237 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1238 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1239
1240 def _graphql_to_legacy(self, data, twid):
1241 result = traverse_obj(data, (
1242 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1243 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1244 'tweet_results', 'result', ('tweet', None), {dict},
1245 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1246 data, ('tweetResult', 'result', {dict}), default={})
1247
1248 typename = result.get('__typename')
1249 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1250 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1251
1252 if 'tombstone' in result:
1253 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1254 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1255 elif typename == 'TweetUnavailable':
1256 reason = result.get('reason')
1257 if reason == 'NsfwLoggedOut':
1258 self.raise_login_required('NSFW tweet requires authentication')
1259 elif reason == 'Protected':
1260 self.raise_login_required('You are not authorized to view this protected tweet')
1261 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1262 # Result for "stale tweet" needs additional transformation
1263 elif typename == 'TweetWithVisibilityResults':
1264 result = traverse_obj(result, ('tweet', {dict})) or {}
1265
1266 status = result.get('legacy', {})
1267 status.update(traverse_obj(result, {
1268 'user': ('core', 'user_results', 'result', 'legacy'),
1269 'card': ('card', 'legacy'),
1270 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1271 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1272 }, expected_type=dict, default={}))
1273
1274 # extra transformations needed since result does not match legacy format
1275 if status.get('retweeted_status'):
1276 status['retweeted_status']['user'] = traverse_obj(status, (
1277 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1278
1279 binding_values = {
1280 binding_value.get('key'): binding_value.get('value')
1281 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1282 }
1283 if binding_values:
1284 status['card']['binding_values'] = binding_values
1285
1286 return status
1287
1288 def _build_graphql_query(self, media_id):
1289 return {
1290 'variables': {
1291 'focalTweetId': media_id,
1292 'includePromotedContent': True,
1293 'with_rux_injections': False,
1294 'withBirdwatchNotes': True,
1295 'withCommunity': True,
1296 'withDownvotePerspective': False,
1297 'withQuickPromoteEligibilityTweetFields': True,
1298 'withReactionsMetadata': False,
1299 'withReactionsPerspective': False,
1300 'withSuperFollowsTweetFields': True,
1301 'withSuperFollowsUserFields': True,
1302 'withV2Timeline': True,
1303 'withVoice': True,
1304 },
1305 'features': {
1306 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1307 'interactive_text_enabled': True,
1308 'responsive_web_edit_tweet_api_enabled': True,
1309 'responsive_web_enhance_cards_enabled': True,
1310 'responsive_web_graphql_timeline_navigation_enabled': False,
1311 'responsive_web_text_conversations_enabled': False,
1312 'responsive_web_uc_gql_enabled': True,
1313 'standardized_nudges_misinfo': True,
1314 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1315 'tweetypie_unmention_optimization_enabled': True,
1316 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1317 'verified_phone_label_enabled': False,
1318 'vibe_api_enabled': True,
1319 },
1320 } if self.is_logged_in else {
1321 'variables': {
1322 'tweetId': media_id,
1323 'withCommunity': False,
1324 'includePromotedContent': False,
1325 'withVoice': False,
1326 },
1327 'features': {
1328 'creator_subscriptions_tweet_preview_api_enabled': True,
1329 'tweetypie_unmention_optimization_enabled': True,
1330 'responsive_web_edit_tweet_api_enabled': True,
1331 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1332 'view_counts_everywhere_api_enabled': True,
1333 'longform_notetweets_consumption_enabled': True,
1334 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1335 'tweet_awards_web_tipping_enabled': False,
1336 'freedom_of_speech_not_reach_fetch_enabled': True,
1337 'standardized_nudges_misinfo': True,
1338 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1339 'longform_notetweets_rich_text_read_enabled': True,
1340 'longform_notetweets_inline_media_enabled': True,
1341 'responsive_web_graphql_exclude_directive_enabled': True,
1342 'verified_phone_label_enabled': False,
1343 'responsive_web_media_download_video_enabled': False,
1344 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1345 'responsive_web_graphql_timeline_navigation_enabled': True,
1346 'responsive_web_enhance_cards_enabled': False
1347 },
1348 'fieldToggles': {
1349 'withArticleRichContentState': False
1350 }
1351 }
1352
1353 def _call_syndication_api(self, twid):
1354 self.report_warning(
1355 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1356 status = self._download_json(
1357 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1358 headers={'User-Agent': 'Googlebot'}, query={
1359 'id': twid,
1360 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1361 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1362 })
1363 if not status:
1364 raise ExtractorError('Syndication endpoint returned empty JSON response')
1365 # Transform the result so its structure matches that of legacy/graphql
1366 media = []
1367 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1368 detail['id_str'] = traverse_obj(detail, (
1369 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1370 media.append(detail)
1371 status['extended_entities'] = {'media': media}
1372
1373 return status
1374
1375 def _extract_status(self, twid):
1376 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1377 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1378
1379 try:
1380 if self.is_logged_in or self._selected_api == 'graphql':
1381 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1382 elif self._selected_api == 'legacy':
1383 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1384 'cards_platform': 'Web-12',
1385 'include_cards': 1,
1386 'include_reply_count': 1,
1387 'include_user_entities': 0,
1388 'tweet_mode': 'extended',
1389 })
1390 except ExtractorError as e:
1391 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1392 raise
1393 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1394 status = self._call_syndication_api(twid)
1395
1396 if self._selected_api == 'syndication':
1397 status = self._call_syndication_api(twid)
1398
1399 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1400
1401 def _real_extract(self, url):
1402 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1403 status = self._extract_status(twid)
1404
1405 title = description = traverse_obj(
1406 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1407 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1408 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1409 user = status.get('user') or {}
1410 uploader = user.get('name')
1411 if uploader:
1412 title = f'{uploader} - {title}'
1413 uploader_id = user.get('screen_name')
1414
1415 info = {
1416 'id': twid,
1417 'title': title,
1418 'description': description,
1419 'uploader': uploader,
1420 'timestamp': unified_timestamp(status.get('created_at')),
1421 'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
1422 'uploader_id': uploader_id,
1423 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1424 'like_count': int_or_none(status.get('favorite_count')),
1425 'repost_count': int_or_none(status.get('retweet_count')),
1426 'comment_count': int_or_none(status.get('reply_count')),
1427 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1428 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1429 }
1430
1431 def extract_from_video_info(media):
1432 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1433 self.write_debug(f'Extracting from video info: {media_id}')
1434
1435 formats = []
1436 subtitles = {}
1437 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1438 fmts, subs = self._extract_variant_formats(variant, twid)
1439 subtitles = self._merge_subtitles(subtitles, subs)
1440 formats.extend(fmts)
1441
1442 thumbnails = []
1443 media_url = media.get('media_url_https') or media.get('media_url')
1444 if media_url:
1445 def add_thumbnail(name, size):
1446 thumbnails.append({
1447 'id': name,
1448 'url': update_url_query(media_url, {'name': name}),
1449 'width': int_or_none(size.get('w') or size.get('width')),
1450 'height': int_or_none(size.get('h') or size.get('height')),
1451 })
1452 for name, size in media.get('sizes', {}).items():
1453 add_thumbnail(name, size)
1454 add_thumbnail('orig', media.get('original_info') or {})
1455
1456 return {
1457 'id': media_id,
1458 'formats': formats,
1459 'subtitles': subtitles,
1460 'thumbnails': thumbnails,
1461 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
1462 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1463 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1464 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1465 }
1466
1467 def extract_from_card_info(card):
1468 if not card:
1469 return
1470
1471 self.write_debug(f'Extracting from card info: {card.get("url")}')
1472 binding_values = card['binding_values']
1473
1474 def get_binding_value(k):
1475 o = binding_values.get(k) or {}
1476 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1477
1478 card_name = card['name'].split(':')[-1]
1479 if card_name == 'player':
1480 yield {
1481 '_type': 'url',
1482 'url': get_binding_value('player_url'),
1483 }
1484 elif card_name == 'periscope_broadcast':
1485 yield {
1486 '_type': 'url',
1487 'url': get_binding_value('url') or get_binding_value('player_url'),
1488 'ie_key': PeriscopeIE.ie_key(),
1489 }
1490 elif card_name == 'broadcast':
1491 yield {
1492 '_type': 'url',
1493 'url': get_binding_value('broadcast_url'),
1494 'ie_key': TwitterBroadcastIE.ie_key(),
1495 }
1496 elif card_name == 'audiospace':
1497 yield {
1498 '_type': 'url',
1499 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1500 'ie_key': TwitterSpacesIE.ie_key(),
1501 }
1502 elif card_name == 'summary':
1503 yield {
1504 '_type': 'url',
1505 'url': get_binding_value('card_url'),
1506 }
1507 elif card_name == 'unified_card':
1508 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1509 yield from map(extract_from_video_info, traverse_obj(
1510 unified_card, ('media_entities', ...), expected_type=dict))
1511 # amplify, promo_video_website, promo_video_convo, appplayer,
1512 # video_direct_message, poll2choice_video, poll3choice_video,
1513 # poll4choice_video, ...
1514 else:
1515 is_amplify = card_name == 'amplify'
1516 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1517 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1518 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1519
1520 thumbnails = []
1521 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1522 image = get_binding_value('player_image' + suffix) or {}
1523 image_url = image.get('url')
1524 if not image_url or '/player-placeholder' in image_url:
1525 continue
1526 thumbnails.append({
1527 'id': suffix[1:] if suffix else 'medium',
1528 'url': image_url,
1529 'width': int_or_none(image.get('width')),
1530 'height': int_or_none(image.get('height')),
1531 })
1532
1533 yield {
1534 'formats': formats,
1535 'subtitles': subtitles,
1536 'thumbnails': thumbnails,
1537 'duration': int_or_none(get_binding_value(
1538 'content_duration_seconds')),
1539 }
1540
1541 videos = traverse_obj(status, (
1542 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1543
1544 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1545 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1546 else:
1547 desired_obj = traverse_obj(status, (
1548 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1549 if not desired_obj:
1550 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1551 elif desired_obj.get('type') != 'video':
1552 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1553
1554 # Restore original archive id and video index in title
1555 for index, entry in enumerate(videos, 1):
1556 if entry.get('id') != desired_obj.get('id'):
1557 continue
1558 if index == 1:
1559 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1560 if len(videos) != 1:
1561 info['title'] += f' #{index}'
1562 break
1563
1564 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1565
1566 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1567 if not entries:
1568 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1569 if not expanded_url or expanded_url == url:
1570 self.raise_no_formats('No video could be found in this tweet', expected=True)
1571 return info
1572
1573 return self.url_result(expanded_url, display_id=twid, **info)
1574
1575 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1576
1577 if len(entries) == 1:
1578 return entries[0]
1579
1580 for index, entry in enumerate(entries, 1):
1581 entry['title'] += f' #{index}'
1582
1583 return self.playlist_result(entries, **info)
1584
1585
1586 class TwitterAmplifyIE(TwitterBaseIE):
1587 IE_NAME = 'twitter:amplify'
1588 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1589
1590 _TEST = {
1591 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1592 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1593 'info_dict': {
1594 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1595 'ext': 'mp4',
1596 'title': 'Twitter Video',
1597 'thumbnail': 're:^https?://.*',
1598 },
1599 'params': {'format': '[protocol=https]'},
1600 }
1601
1602 def _real_extract(self, url):
1603 video_id = self._match_id(url)
1604 webpage = self._download_webpage(url, video_id)
1605
1606 vmap_url = self._html_search_meta(
1607 'twitter:amplify:vmap', webpage, 'vmap url')
1608 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1609
1610 thumbnails = []
1611 thumbnail = self._html_search_meta(
1612 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1613
1614 def _find_dimension(target):
1615 w = int_or_none(self._html_search_meta(
1616 'twitter:%s:width' % target, webpage, fatal=False))
1617 h = int_or_none(self._html_search_meta(
1618 'twitter:%s:height' % target, webpage, fatal=False))
1619 return w, h
1620
1621 if thumbnail:
1622 thumbnail_w, thumbnail_h = _find_dimension('image')
1623 thumbnails.append({
1624 'url': thumbnail,
1625 'width': thumbnail_w,
1626 'height': thumbnail_h,
1627 })
1628
1629 video_w, video_h = _find_dimension('player')
1630 formats[0].update({
1631 'width': video_w,
1632 'height': video_h,
1633 })
1634
1635 return {
1636 'id': video_id,
1637 'title': 'Twitter Video',
1638 'formats': formats,
1639 'thumbnails': thumbnails,
1640 }
1641
1642
1643 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1644 IE_NAME = 'twitter:broadcast'
1645 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1646
1647 _TESTS = [{
1648 # untitled Periscope video
1649 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1650 'info_dict': {
1651 'id': '1yNGaQLWpejGj',
1652 'ext': 'mp4',
1653 'title': 'Andrea May Sahouri - Periscope Broadcast',
1654 'uploader': 'Andrea May Sahouri',
1655 'uploader_id': 'andreamsahouri',
1656 'uploader_url': 'https://twitter.com/andreamsahouri',
1657 'timestamp': 1590973638,
1658 'upload_date': '20200601',
1659 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1660 'view_count': int,
1661 },
1662 }, {
1663 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1664 'info_dict': {
1665 'id': '1ZkKzeyrPbaxv',
1666 'ext': 'mp4',
1667 'title': 'Starship | SN10 | High-Altitude Flight Test',
1668 'uploader': 'SpaceX',
1669 'uploader_id': 'SpaceX',
1670 'uploader_url': 'https://twitter.com/SpaceX',
1671 'timestamp': 1614812942,
1672 'upload_date': '20210303',
1673 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1674 'view_count': int,
1675 },
1676 }, {
1677 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1678 'info_dict': {
1679 'id': '1OyKAVQrgzwGb',
1680 'ext': 'mp4',
1681 'title': 'Starship Flight Test',
1682 'uploader': 'SpaceX',
1683 'uploader_id': 'SpaceX',
1684 'uploader_url': 'https://twitter.com/SpaceX',
1685 'timestamp': 1681993964,
1686 'upload_date': '20230420',
1687 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1688 'view_count': int,
1689 },
1690 }]
1691
1692 def _real_extract(self, url):
1693 broadcast_id = self._match_id(url)
1694 broadcast = self._call_api(
1695 'broadcasts/show.json', broadcast_id,
1696 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1697 if not broadcast:
1698 raise ExtractorError('Broadcast no longer exists', expected=True)
1699 info = self._parse_broadcast_data(broadcast, broadcast_id)
1700 info['title'] = broadcast.get('status') or info.get('title')
1701 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1702 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1703 if info['live_status'] == 'is_upcoming':
1704 return info
1705
1706 media_key = broadcast['media_key']
1707 source = self._call_api(
1708 f'live_video_stream/status/{media_key}', media_key)['source']
1709 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1710 if '/live_video_stream/geoblocked/' in m3u8_url:
1711 self.raise_geo_restricted()
1712 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1713 m3u8_url).query).get('type', [None])[0]
1714 state, width, height = self._extract_common_format_info(broadcast)
1715 info['formats'] = self._extract_pscp_m3u8_formats(
1716 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1717 return info
1718
1719
1720 class TwitterSpacesIE(TwitterBaseIE):
1721 IE_NAME = 'twitter:spaces'
1722 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1723
1724 _TESTS = [{
1725 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1726 'info_dict': {
1727 'id': '1RDxlgyvNXzJL',
1728 'ext': 'm4a',
1729 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1730 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1731 'uploader': r're:Lucio Di Gaetano.*?',
1732 'uploader_id': 'luciodigaetano',
1733 'live_status': 'was_live',
1734 'timestamp': 1659877956,
1735 'upload_date': '20220807',
1736 'release_timestamp': 1659904215,
1737 'release_date': '20220807',
1738 },
1739 'params': {'skip_download': 'm3u8'},
1740 }, {
1741 # post_live/TimedOut but downloadable
1742 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1743 'info_dict': {
1744 'id': '1vAxRAVQWONJl',
1745 'ext': 'm4a',
1746 'title': 'Framing Up FinOps: Billing Tools',
1747 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1748 'uploader': 'Google Cloud',
1749 'uploader_id': 'googlecloud',
1750 'live_status': 'post_live',
1751 'timestamp': 1681409554,
1752 'upload_date': '20230413',
1753 'release_timestamp': 1681839000,
1754 'release_date': '20230418',
1755 },
1756 'params': {'skip_download': 'm3u8'},
1757 }, {
1758 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1759 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1760 'info_dict': {
1761 'id': '1eaKbrQbjoRKX',
1762 'ext': 'm4a',
1763 'title': 'あ',
1764 'description': 'Twitter Space participated by nobody yet',
1765 'uploader': '息根とめる🔪Twitchで復活',
1766 'uploader_id': 'tomeru_ikinone',
1767 'live_status': 'was_live',
1768 'timestamp': 1685617198,
1769 'upload_date': '20230601',
1770 },
1771 'params': {'skip_download': 'm3u8'},
1772 }]
1773
1774 SPACE_STATUS = {
1775 'notstarted': 'is_upcoming',
1776 'ended': 'was_live',
1777 'running': 'is_live',
1778 'timedout': 'post_live',
1779 }
1780
1781 def _build_graphql_query(self, space_id):
1782 return {
1783 'variables': {
1784 'id': space_id,
1785 'isMetatagsQuery': True,
1786 'withDownvotePerspective': False,
1787 'withReactionsMetadata': False,
1788 'withReactionsPerspective': False,
1789 'withReplays': True,
1790 'withSuperFollowsUserFields': True,
1791 'withSuperFollowsTweetFields': True,
1792 },
1793 'features': {
1794 'dont_mention_me_view_api_enabled': True,
1795 'interactive_text_enabled': True,
1796 'responsive_web_edit_tweet_api_enabled': True,
1797 'responsive_web_enhance_cards_enabled': True,
1798 'responsive_web_uc_gql_enabled': True,
1799 'spaces_2022_h2_clipping': True,
1800 'spaces_2022_h2_spaces_communities': False,
1801 'standardized_nudges_misinfo': True,
1802 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1803 'vibe_api_enabled': True,
1804 },
1805 }
1806
1807 def _real_extract(self, url):
1808 space_id = self._match_id(url)
1809 if not self.is_logged_in:
1810 self.raise_login_required('Twitter Spaces require authentication')
1811 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1812 if not space_data:
1813 raise ExtractorError('Twitter Space not found', expected=True)
1814
1815 metadata = space_data['metadata']
1816 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1817 is_live = live_status == 'is_live'
1818
1819 formats = []
1820 headers = {'Referer': 'https://twitter.com/'}
1821 if live_status == 'is_upcoming':
1822 self.raise_no_formats('Twitter Space not started yet', expected=True)
1823 elif not is_live and not metadata.get('is_space_available_for_replay'):
1824 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1825 elif metadata.get('media_key'):
1826 source = traverse_obj(
1827 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1828 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1829 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1830 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
1831 headers=headers, fatal=False) if source else []
1832 for fmt in formats:
1833 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1834 if not is_live:
1835 fmt['container'] = 'm4a_dash'
1836
1837 participants = ', '.join(traverse_obj(
1838 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1839
1840 if not formats and live_status == 'post_live':
1841 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1842
1843 return {
1844 'id': space_id,
1845 'title': metadata.get('title'),
1846 'description': f'Twitter Space participated by {participants}',
1847 'uploader': traverse_obj(
1848 metadata, ('creator_results', 'result', 'legacy', 'name')),
1849 'uploader_id': traverse_obj(
1850 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1851 'live_status': live_status,
1852 'release_timestamp': try_call(
1853 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1854 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1855 'formats': formats,
1856 'http_headers': headers,
1857 }
1858
1859
1860 class TwitterShortenerIE(TwitterBaseIE):
1861 IE_NAME = 'twitter:shortener'
1862 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1863 _BASE_URL = 'https://t.co/'
1864
1865 def _real_extract(self, url):
1866 mobj = self._match_valid_url(url)
1867 eid, id = mobj.group('eid', 'id')
1868 if eid:
1869 id = eid
1870 url = self._BASE_URL + id
1871 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
1872 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1873 if new_url.startswith(__UNSAFE_LINK):
1874 new_url = new_url.replace(__UNSAFE_LINK, "")
1875 return self.url_result(new_url)