]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[ie/PrankCastPost] Add extractor (#8933)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
7a26ce26 1import json
a006ce2b 2import random
23e7cba8
S
3import re
4
5from .common import InfoExtractor
13b2ae29 6from .periscope import PeriscopeBaseIE, PeriscopeIE
a006ce2b 7from ..compat import functools # isort: split
18ca61c5 8from ..compat import (
18ca61c5
RA
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12)
116c2684 13from ..networking.exceptions import HTTPError
23e7cba8 14from ..utils import (
2edfd745 15 ExtractorError,
13b2ae29 16 dict_get,
92315c03 17 filter_dict,
23e7cba8 18 float_or_none,
13b2ae29 19 format_field,
cf5881fc 20 int_or_none,
13b2ae29 21 make_archive_id,
147e62fc 22 remove_end,
13b2ae29
SS
23 str_or_none,
24 strip_or_none,
f1150b9e 25 traverse_obj,
7a26ce26 26 try_call,
2edfd745 27 try_get,
18ca61c5
RA
28 unified_timestamp,
29 update_url_query,
41d1cca3 30 url_or_none,
2edfd745 31 xpath_text,
23e7cba8
S
32)
33
34
445d72b8 35class TwitterBaseIE(InfoExtractor):
d1795f4a 36 _NETRC_MACHINE = 'twitter'
18ca61c5 37 _API_BASE = 'https://api.twitter.com/1.1/'
7a26ce26 38 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
82fb2357 39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
92315c03 40 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
41 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
d1795f4a 42 _flow_token = None
43
44 _LOGIN_INIT_DATA = json.dumps({
45 'input_flow_data': {
46 'flow_context': {
47 'debug_overrides': {},
48 'start_location': {
49 'location': 'unknown'
50 }
51 }
52 },
53 'subtask_versions': {
54 'action_list': 2,
55 'alert_dialog': 1,
56 'app_download_cta': 1,
57 'check_logged_in_account': 1,
58 'choice_selection': 3,
59 'contacts_live_sync_permission_prompt': 0,
60 'cta': 7,
61 'email_verification': 2,
62 'end_flow': 1,
63 'enter_date': 1,
64 'enter_email': 2,
65 'enter_password': 5,
66 'enter_phone': 2,
67 'enter_recaptcha': 1,
68 'enter_text': 5,
69 'enter_username': 2,
70 'generic_urt': 3,
71 'in_app_notification': 1,
72 'interest_picker': 3,
73 'js_instrumentation': 1,
74 'menu_dialog': 1,
75 'notifications_permission_prompt': 2,
76 'open_account': 2,
77 'open_home_timeline': 1,
78 'open_link': 1,
79 'phone_verification': 4,
80 'privacy_options': 1,
81 'security_key': 3,
82 'select_avatar': 4,
83 'select_banner': 2,
84 'settings_list': 7,
85 'show_code': 1,
86 'sign_up': 2,
87 'sign_up_review': 4,
88 'tweet_selection_urt': 1,
89 'update_users': 1,
90 'upload_media': 1,
91 'user_recommendations_list': 4,
92 'user_recommendations_urt': 1,
93 'wait_spinner': 3,
94 'web_modal': 1
95 }
96 }, separators=(',', ':')).encode()
18ca61c5
RA
97
98 def _extract_variant_formats(self, variant, video_id):
99 variant_url = variant.get('url')
100 if not variant_url:
4bed4363 101 return [], {}
18ca61c5 102 elif '.m3u8' in variant_url:
4bed4363 103 return self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
104 variant_url, video_id, 'mp4', 'm3u8_native',
105 m3u8_id='hls', fatal=False)
106 else:
107 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
108 f = {
109 'url': variant_url,
110 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
111 'tbr': tbr,
112 }
113 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 114 return [f], {}
18ca61c5 115
9be31e77 116 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 117 vmap_url = url_or_none(vmap_url)
118 if not vmap_url:
f1150b9e 119 return [], {}
445d72b8 120 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 121 formats = []
4bed4363 122 subtitles = {}
18ca61c5
RA
123 urls = []
124 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
125 video_variant.attrib['url'] = compat_urllib_parse_unquote(
126 video_variant.attrib['url'])
127 urls.append(video_variant.attrib['url'])
4bed4363
F
128 fmts, subs = self._extract_variant_formats(
129 video_variant.attrib, video_id)
130 formats.extend(fmts)
131 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
132 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
133 if video_url not in urls:
4bed4363
F
134 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
135 formats.extend(fmts)
136 subtitles = self._merge_subtitles(subtitles, subs)
137 return formats, subtitles
445d72b8 138
2edfd745
YCH
139 @staticmethod
140 def _search_dimensions_in_video_url(a_format, video_url):
141 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
142 if m:
143 a_format.update({
144 'width': int(m.group('width')),
145 'height': int(m.group('height')),
146 })
147
d1795f4a 148 @property
7a26ce26
SS
149 def is_logged_in(self):
150 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
151
a006ce2b 152 @functools.cached_property
153 def _selected_api(self):
154 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
155
92315c03 156 def _fetch_guest_token(self, display_id):
157 guest_token = traverse_obj(self._download_json(
158 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
a006ce2b 159 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
92315c03 160 ('guest_token', {str}))
161 if not guest_token:
b03fa783 162 raise ExtractorError('Could not retrieve guest token')
92315c03 163 return guest_token
b03fa783 164
92315c03 165 def _set_base_headers(self, legacy=False):
166 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
167 return filter_dict({
168 'Authorization': f'Bearer {bearer_token}',
169 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
170 })
d1795f4a 171
172 def _call_login_api(self, note, headers, query={}, data=None):
173 response = self._download_json(
174 f'{self._API_BASE}onboarding/task.json', None, note,
175 headers=headers, query=query, data=data, expected_status=400)
176 error = traverse_obj(response, ('errors', 0, 'message', {str}))
177 if error:
178 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
179 elif traverse_obj(response, 'status') != 'success':
180 raise ExtractorError('Login was unsuccessful')
181
182 subtask = traverse_obj(
183 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
184 if not subtask:
185 raise ExtractorError('Twitter API did not return next login subtask')
186
187 self._flow_token = response['flow_token']
7a26ce26 188
d1795f4a 189 return subtask
190
191 def _perform_login(self, username, password):
192 if self.is_logged_in:
193 return
194
92315c03 195 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
196 guest_token = self._search_regex(
197 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
198 headers = {
199 **self._set_base_headers(),
d1795f4a 200 'content-type': 'application/json',
92315c03 201 'x-guest-token': guest_token,
d1795f4a 202 'x-twitter-client-language': 'en',
203 'x-twitter-active-user': 'yes',
204 'Referer': 'https://twitter.com/',
205 'Origin': 'https://twitter.com',
92315c03 206 }
d1795f4a 207
208 def build_login_json(*subtask_inputs):
209 return json.dumps({
210 'flow_token': self._flow_token,
211 'subtask_inputs': subtask_inputs
212 }, separators=(',', ':')).encode()
213
214 def input_dict(subtask_id, text):
215 return {
216 'subtask_id': subtask_id,
217 'enter_text': {
218 'text': text,
219 'link': 'next_link'
220 }
221 }
7a26ce26 222
d1795f4a 223 next_subtask = self._call_login_api(
224 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
225
226 while not self.is_logged_in:
227 if next_subtask == 'LoginJsInstrumentationSubtask':
228 next_subtask = self._call_login_api(
229 'Submitting JS instrumentation response', headers, data=build_login_json({
230 'subtask_id': next_subtask,
231 'js_instrumentation': {
232 'response': '{}',
233 'link': 'next_link'
234 }
235 }))
236
237 elif next_subtask == 'LoginEnterUserIdentifierSSO':
238 next_subtask = self._call_login_api(
239 'Submitting username', headers, data=build_login_json({
240 'subtask_id': next_subtask,
241 'settings_list': {
242 'setting_responses': [{
243 'key': 'user_identifier',
244 'response_data': {
245 'text_data': {
246 'result': username
247 }
248 }
249 }],
250 'link': 'next_link'
251 }
252 }))
253
254 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
255 next_subtask = self._call_login_api(
256 'Submitting alternate identifier', headers,
257 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
258 'one of username, phone number or email that was not used as --username'))))
259
260 elif next_subtask == 'LoginEnterPassword':
261 next_subtask = self._call_login_api(
262 'Submitting password', headers, data=build_login_json({
263 'subtask_id': next_subtask,
264 'enter_password': {
265 'password': password,
266 'link': 'next_link'
267 }
268 }))
269
270 elif next_subtask == 'AccountDuplicationCheck':
271 next_subtask = self._call_login_api(
272 'Submitting account duplication check', headers, data=build_login_json({
273 'subtask_id': next_subtask,
274 'check_logged_in_account': {
275 'link': 'AccountDuplicationCheck_false'
276 }
277 }))
278
279 elif next_subtask == 'LoginTwoFactorAuthChallenge':
280 next_subtask = self._call_login_api(
281 'Submitting 2FA token', headers, data=build_login_json(input_dict(
282 next_subtask, self._get_tfa_info('two-factor authentication token'))))
283
284 elif next_subtask == 'LoginAcid':
285 next_subtask = self._call_login_api(
286 'Submitting confirmation code', headers, data=build_login_json(input_dict(
287 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
288
6014355c 289 elif next_subtask == 'ArkoseLogin':
290 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
291
292 elif next_subtask == 'DenyLoginSubtask':
293 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
294
d1795f4a 295 elif next_subtask == 'LoginSuccessSubtask':
296 raise ExtractorError('Twitter API did not grant auth token cookie')
297
298 else:
299 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
300
301 self.report_login()
302
303 def _call_api(self, path, video_id, query={}, graphql=False):
a006ce2b 304 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
92315c03 305 headers.update({
306 'x-twitter-auth-type': 'OAuth2Session',
307 'x-twitter-client-language': 'en',
308 'x-twitter-active-user': 'yes',
309 } if self.is_logged_in else {
310 'x-guest-token': self._fetch_guest_token(video_id)
311 })
312 allowed_status = {400, 401, 403, 404} if graphql else {403}
313 result = self._download_json(
314 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
315 video_id, headers=headers, query=query, expected_status=allowed_status,
316 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
b03fa783 317
92315c03 318 if result.get('errors'):
319 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
6014355c 320 if errors and 'not authorized' in errors:
321 self.raise_login_required(remove_end(errors, '.'))
322 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
b03fa783 323
92315c03 324 return result
7a26ce26
SS
325
326 def _build_graphql_query(self, media_id):
327 raise NotImplementedError('Method must be implemented to support GraphQL')
328
329 def _call_graphql_api(self, endpoint, media_id):
330 data = self._build_graphql_query(media_id)
331 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
332 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
333
334
335class TwitterCardIE(InfoExtractor):
014e8803 336 IE_NAME = 'twitter:card'
18ca61c5 337 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 338 _TESTS = [
339 {
340 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 341 # MD5 checksums are different in different places
c3dea3f8 342 'info_dict': {
7a26ce26 343 'id': '560070131976392705',
c3dea3f8 344 'ext': 'mp4',
18ca61c5
RA
345 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
346 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
347 'uploader': 'Twitter',
348 'uploader_id': 'Twitter',
349 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 350 'duration': 30.033,
18ca61c5
RA
351 'timestamp': 1422366112,
352 'upload_date': '20150127',
7a26ce26
SS
353 'age_limit': 0,
354 'comment_count': int,
355 'tags': [],
356 'repost_count': int,
357 'like_count': int,
358 'display_id': '560070183650213889',
359 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 360 },
23e7cba8 361 },
c3dea3f8 362 {
363 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 364 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 365 'info_dict': {
366 'id': '623160978427936768',
367 'ext': 'mp4',
18ca61c5
RA
368 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
369 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
370 'uploader': 'NASA',
371 'uploader_id': 'NASA',
372 'timestamp': 1437408129,
373 'upload_date': '20150720',
7a26ce26
SS
374 'uploader_url': 'https://twitter.com/NASA',
375 'age_limit': 0,
376 'comment_count': int,
377 'like_count': int,
378 'repost_count': int,
379 'tags': ['PlutoFlyby'],
c3dea3f8 380 },
7a26ce26 381 'params': {'format': '[protocol=https]'}
4a7b7903
YCH
382 },
383 {
384 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 385 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
386 'info_dict': {
387 'id': 'dq4Oj5quskI',
388 'ext': 'mp4',
389 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 390 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 391 'upload_date': '20111013',
18ca61c5 392 'uploader': 'OMG! UBUNTU!',
4a7b7903 393 'uploader_id': 'omgubuntu',
7a26ce26
SS
394 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
395 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
396 'channel_follower_count': int,
397 'chapters': 'count:8',
398 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
399 'duration': 138,
400 'categories': ['Film & Animation'],
401 'age_limit': 0,
402 'comment_count': int,
403 'availability': 'public',
404 'like_count': int,
405 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
406 'view_count': int,
407 'tags': 'count:12',
408 'channel': 'OMG! UBUNTU!',
409 'playable_in_embed': True,
4a7b7903 410 },
31752f76 411 'add_ie': ['Youtube'],
5f1b2aea
YCH
412 },
413 {
414 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
415 'info_dict': {
416 'id': 'iBb2x00UVlv',
417 'ext': 'mp4',
418 'upload_date': '20151113',
419 'uploader_id': '1189339351084113920',
acb6e97e
YCH
420 'uploader': 'ArsenalTerje',
421 'title': 'Vine by ArsenalTerje',
e8f20ffa 422 'timestamp': 1447451307,
7a26ce26
SS
423 'alt_title': 'Vine by ArsenalTerje',
424 'comment_count': int,
425 'like_count': int,
426 'thumbnail': r're:^https?://[^?#]+\.jpg',
427 'view_count': int,
428 'repost_count': int,
5f1b2aea
YCH
429 },
430 'add_ie': ['Vine'],
7a26ce26
SS
431 'params': {'skip_download': 'm3u8'},
432 },
433 {
0ae937a7 434 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 435 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
436 'info_dict': {
437 'id': '705235433198714880',
438 'ext': 'mp4',
18ca61c5
RA
439 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
440 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
441 'uploader': 'Brent Yarina',
442 'uploader_id': 'BTNBrentYarina',
443 'timestamp': 1456976204,
444 'upload_date': '20160303',
0ae937a7 445 },
18ca61c5 446 'skip': 'This content is no longer available.',
7a26ce26
SS
447 },
448 {
748a462f
S
449 'url': 'https://twitter.com/i/videos/752274308186120192',
450 'only_matching': True,
0ae937a7 451 },
c3dea3f8 452 ]
23e7cba8
S
453
454 def _real_extract(self, url):
18ca61c5
RA
455 status_id = self._match_id(url)
456 return self.url_result(
457 'https://twitter.com/statuses/' + status_id,
458 TwitterIE.ie_key(), status_id)
c8398a9b 459
03879ff0 460
18ca61c5 461class TwitterIE(TwitterBaseIE):
014e8803 462 IE_NAME = 'twitter'
b6795fd3 463 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 464
cf5881fc 465 _TESTS = [{
48aae2d2 466 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 467 'info_dict': {
13b2ae29
SS
468 'id': '643211870443208704',
469 'display_id': '643211948184596480',
f57f84f6 470 'ext': 'mp4',
575036b4 471 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 472 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 473 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
48aae2d2
YCH
474 'uploader': 'FREE THE NIPPLE',
475 'uploader_id': 'freethenipple',
3b65a6fb 476 'duration': 12.922,
18ca61c5
RA
477 'timestamp': 1442188653,
478 'upload_date': '20150913',
13b2ae29 479 'uploader_url': 'https://twitter.com/freethenipple',
b03fa783 480 'comment_count': int,
481 'repost_count': int,
13b2ae29
SS
482 'like_count': int,
483 'tags': [],
484 'age_limit': 18,
1c54a98e 485 '_old_archive_ids': ['twitter 643211948184596480'],
f57f84f6 486 },
cf5881fc
YCH
487 }, {
488 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
489 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
490 'info_dict': {
491 'id': '657991469417025536',
492 'ext': 'mp4',
493 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
494 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 495 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
496 'uploader': 'Gifs',
497 'uploader_id': 'giphz',
498 },
7efc1c2b 499 'expected_warnings': ['height', 'width'],
fc0a45fa 500 'skip': 'Account suspended',
b703ebee
JMF
501 }, {
502 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
503 'info_dict': {
504 'id': '665052190608723968',
13b2ae29 505 'display_id': '665052190608723968',
b703ebee 506 'ext': 'mp4',
b6795fd3 507 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 508 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
b703ebee 509 'uploader_id': 'starwars',
7a26ce26 510 'uploader': r're:Star Wars.*',
18ca61c5
RA
511 'timestamp': 1447395772,
512 'upload_date': '20151113',
13b2ae29 513 'uploader_url': 'https://twitter.com/starwars',
b03fa783 514 'comment_count': int,
515 'repost_count': int,
13b2ae29
SS
516 'like_count': int,
517 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
518 'age_limit': 0,
1c54a98e 519 '_old_archive_ids': ['twitter 665052190608723968'],
b703ebee 520 },
0ae937a7
YCH
521 }, {
522 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
523 'info_dict': {
524 'id': '705235433198714880',
525 'ext': 'mp4',
18ca61c5
RA
526 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
527 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
528 'uploader_id': 'BTNBrentYarina',
529 'uploader': 'Brent Yarina',
18ca61c5
RA
530 'timestamp': 1456976204,
531 'upload_date': '20160303',
13b2ae29
SS
532 'uploader_url': 'https://twitter.com/BTNBrentYarina',
533 'comment_count': int,
534 'repost_count': int,
535 'like_count': int,
536 'tags': [],
537 'age_limit': 0,
0ae937a7
YCH
538 },
539 'params': {
540 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
541 # Test case of TwitterCardIE
542 'skip_download': True,
543 },
352e7d98 544 'skip': 'Dead external link',
03879ff0
YCH
545 }, {
546 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 547 'info_dict': {
13b2ae29
SS
548 'id': '700207414000242688',
549 'display_id': '700207533655363584',
03879ff0 550 'ext': 'mp4',
13b2ae29 551 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 552 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 553 'thumbnail': r're:^https?://.*\.jpg',
13b2ae29
SS
554 'uploader': 'jaydin donte geer',
555 'uploader_id': 'jaydingeer',
3b65a6fb 556 'duration': 30.0,
18ca61c5
RA
557 'timestamp': 1455777459,
558 'upload_date': '20160218',
13b2ae29 559 'uploader_url': 'https://twitter.com/jaydingeer',
b03fa783 560 'comment_count': int,
561 'repost_count': int,
13b2ae29
SS
562 'like_count': int,
563 'tags': ['Damndaniel'],
564 'age_limit': 0,
1c54a98e 565 '_old_archive_ids': ['twitter 700207533655363584'],
03879ff0 566 },
395fd4b0
YCH
567 }, {
568 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
569 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
570 'info_dict': {
571 'id': 'MIOxnrUteUd',
572 'ext': 'mp4',
18ca61c5
RA
573 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
574 'uploader': 'TAKUMA',
575 'uploader_id': '1004126642786242560',
3615bfe1 576 'timestamp': 1402826626,
395fd4b0 577 'upload_date': '20140615',
13b2ae29
SS
578 'thumbnail': r're:^https?://.*\.jpg',
579 'alt_title': 'Vine by TAKUMA',
580 'comment_count': int,
581 'repost_count': int,
582 'like_count': int,
583 'view_count': int,
395fd4b0
YCH
584 },
585 'add_ie': ['Vine'],
36b7d9db
YCH
586 }, {
587 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 588 'info_dict': {
13b2ae29
SS
589 'id': '717462543795523584',
590 'display_id': '719944021058060289',
36b7d9db
YCH
591 'ext': 'mp4',
592 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5
RA
593 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
594 'uploader_id': 'CaptainAmerica',
36b7d9db 595 'uploader': 'Captain America',
3b65a6fb 596 'duration': 3.17,
18ca61c5
RA
597 'timestamp': 1460483005,
598 'upload_date': '20160412',
13b2ae29
SS
599 'uploader_url': 'https://twitter.com/CaptainAmerica',
600 'thumbnail': r're:^https?://.*\.jpg',
b03fa783 601 'comment_count': int,
602 'repost_count': int,
13b2ae29
SS
603 'like_count': int,
604 'tags': [],
605 'age_limit': 0,
1c54a98e 606 '_old_archive_ids': ['twitter 719944021058060289'],
36b7d9db 607 },
f0bc5a86
YCH
608 }, {
609 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
610 'info_dict': {
611 'id': '1zqKVVlkqLaKB',
612 'ext': 'mp4',
18ca61c5 613 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 614 'upload_date': '20160923',
18ca61c5
RA
615 'uploader_id': '1PmKqpJdOJQoY',
616 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 617 'timestamp': 1474613214,
13b2ae29 618 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
619 },
620 'add_ie': ['Periscope'],
1c54a98e 621 'skip': 'Broadcast not found',
2edfd745
YCH
622 }, {
623 # has mp4 formats via mobile API
624 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
625 'info_dict': {
6014355c 626 'id': '852077943283097602',
2edfd745
YCH
627 'ext': 'mp4',
628 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 629 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
2edfd745
YCH
630 'uploader': 'عالم الأخبار',
631 'uploader_id': 'news_al3alm',
3b65a6fb 632 'duration': 277.4,
18ca61c5
RA
633 'timestamp': 1492000653,
634 'upload_date': '20170412',
6014355c 635 'display_id': '852138619213144067',
636 'age_limit': 0,
637 'uploader_url': 'https://twitter.com/news_al3alm',
638 'thumbnail': r're:^https?://.*\.jpg',
639 'tags': [],
640 'repost_count': int,
6014355c 641 'like_count': int,
642 'comment_count': int,
1c54a98e 643 '_old_archive_ids': ['twitter 852138619213144067'],
2edfd745 644 },
5c1452e8
GF
645 }, {
646 'url': 'https://twitter.com/i/web/status/910031516746514432',
647 'info_dict': {
13b2ae29
SS
648 'id': '910030238373089285',
649 'display_id': '910031516746514432',
5c1452e8
GF
650 'ext': 'mp4',
651 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
652 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 653 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
5c1452e8
GF
654 'uploader': 'Préfet de Guadeloupe',
655 'uploader_id': 'Prefet971',
656 'duration': 47.48,
18ca61c5
RA
657 'timestamp': 1505803395,
658 'upload_date': '20170919',
13b2ae29 659 'uploader_url': 'https://twitter.com/Prefet971',
b03fa783 660 'comment_count': int,
661 'repost_count': int,
13b2ae29
SS
662 'like_count': int,
663 'tags': ['Maria'],
664 'age_limit': 0,
1c54a98e 665 '_old_archive_ids': ['twitter 910031516746514432'],
5c1452e8
GF
666 },
667 'params': {
668 'skip_download': True, # requires ffmpeg
669 },
2593725a
S
670 }, {
671 # card via api.twitter.com/1.1/videos/tweet/config
672 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
673 'info_dict': {
13b2ae29
SS
674 'id': '1001551417340022785',
675 'display_id': '1001551623938805763',
2593725a
S
676 'ext': 'mp4',
677 'title': 're:.*?Shep is on a roll today.*?',
678 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 679 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
2593725a
S
680 'uploader': 'Lis Power',
681 'uploader_id': 'LisPower1',
682 'duration': 111.278,
18ca61c5
RA
683 'timestamp': 1527623489,
684 'upload_date': '20180529',
13b2ae29 685 'uploader_url': 'https://twitter.com/LisPower1',
b03fa783 686 'comment_count': int,
687 'repost_count': int,
13b2ae29
SS
688 'like_count': int,
689 'tags': [],
690 'age_limit': 0,
1c54a98e 691 '_old_archive_ids': ['twitter 1001551623938805763'],
2593725a
S
692 },
693 'params': {
694 'skip_download': True, # requires ffmpeg
695 },
b7ef93f0
S
696 }, {
697 'url': 'https://twitter.com/foobar/status/1087791357756956680',
698 'info_dict': {
13b2ae29
SS
699 'id': '1087791272830607360',
700 'display_id': '1087791357756956680',
b7ef93f0 701 'ext': 'mp4',
6014355c 702 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
b7ef93f0 703 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 704 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
6014355c 705 'uploader': 'X',
706 'uploader_id': 'X',
b7ef93f0 707 'duration': 61.567,
18ca61c5
RA
708 'timestamp': 1548184644,
709 'upload_date': '20190122',
6014355c 710 'uploader_url': 'https://twitter.com/X',
b03fa783 711 'comment_count': int,
712 'repost_count': int,
13b2ae29 713 'like_count': int,
b03fa783 714 'view_count': int,
13b2ae29
SS
715 'tags': [],
716 'age_limit': 0,
18ca61c5 717 },
a006ce2b 718 'skip': 'This Tweet is unavailable',
18ca61c5
RA
719 }, {
720 # not available in Periscope
721 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
722 'info_dict': {
723 'id': '1vOGwqejwoWxB',
724 'ext': 'mp4',
725 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
726 'uploader': 'Vivi',
727 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
728 'thumbnail': r're:^https?://.*\.jpg',
729 'tags': ['EduTECH2019'],
730 'view_count': int,
b7ef93f0 731 },
18ca61c5 732 'add_ie': ['TwitterBroadcast'],
a006ce2b 733 'skip': 'Broadcast no longer exists',
30a074c2 734 }, {
735 # unified card
736 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
737 'info_dict': {
13b2ae29
SS
738 'id': '1349774757969989634',
739 'display_id': '1349794411333394432',
30a074c2 740 'ext': 'mp4',
741 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
742 'thumbnail': r're:^https?://.*\.jpg',
743 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
744 'uploader': 'Brooklyn Nets',
745 'uploader_id': 'BrooklynNets',
746 'duration': 324.484,
747 'timestamp': 1610651040,
748 'upload_date': '20210114',
13b2ae29 749 'uploader_url': 'https://twitter.com/BrooklynNets',
b03fa783 750 'comment_count': int,
751 'repost_count': int,
13b2ae29
SS
752 'like_count': int,
753 'tags': [],
754 'age_limit': 0,
1c54a98e 755 '_old_archive_ids': ['twitter 1349794411333394432'],
30a074c2 756 },
757 'params': {
758 'skip_download': True,
759 },
13b2ae29
SS
760 }, {
761 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
762 'info_dict': {
763 'id': '1577855447914409984',
764 'display_id': '1577855540407197696',
765 'ext': 'mp4',
352e7d98 766 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
767 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 768 'upload_date': '20221006',
352e7d98 769 'uploader': 'oshtru',
13b2ae29
SS
770 'uploader_id': 'oshtru',
771 'uploader_url': 'https://twitter.com/oshtru',
772 'thumbnail': r're:^https?://.*\.jpg',
773 'duration': 30.03,
7a26ce26 774 'timestamp': 1665025050,
b03fa783 775 'comment_count': int,
776 'repost_count': int,
13b2ae29
SS
777 'like_count': int,
778 'tags': [],
779 'age_limit': 0,
1c54a98e 780 '_old_archive_ids': ['twitter 1577855540407197696'],
13b2ae29
SS
781 },
782 'params': {'skip_download': True},
783 }, {
784 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
785 'info_dict': {
786 'id': '1577719286659006464',
1c54a98e 787 'title': 'Ultima - Test',
13b2ae29 788 'description': 'Test https://t.co/Y3KEZD7Dad',
1c54a98e 789 'uploader': 'Ultima',
13b2ae29
SS
790 'uploader_id': 'UltimaShadowX',
791 'uploader_url': 'https://twitter.com/UltimaShadowX',
792 'upload_date': '20221005',
7a26ce26 793 'timestamp': 1664992565,
b03fa783 794 'comment_count': int,
795 'repost_count': int,
13b2ae29
SS
796 'like_count': int,
797 'tags': [],
798 'age_limit': 0,
799 },
800 'playlist_count': 4,
801 'params': {'skip_download': True},
7a26ce26
SS
802 }, {
803 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
804 'info_dict': {
805 'id': '1575559336759263233',
806 'display_id': '1575560063510810624',
807 'ext': 'mp4',
808 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
809 'thumbnail': r're:^https?://.*\.jpg',
810 'description': 'md5:95aea692fda36a12081b9629b02daa92',
811 'uploader': 'Max Olson',
812 'uploader_id': 'MesoMax919',
813 'uploader_url': 'https://twitter.com/MesoMax919',
814 'duration': 21.321,
815 'timestamp': 1664477766,
816 'upload_date': '20220929',
b03fa783 817 'comment_count': int,
818 'repost_count': int,
7a26ce26
SS
819 'like_count': int,
820 'tags': ['HurricaneIan'],
821 'age_limit': 0,
1c54a98e 822 '_old_archive_ids': ['twitter 1575560063510810624'],
7a26ce26
SS
823 },
824 }, {
a006ce2b 825 # Adult content, fails if not logged in
7a26ce26
SS
826 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
827 'info_dict': {
828 'id': '1575199163847000068',
829 'display_id': '1575199173472927762',
830 'ext': 'mp4',
831 'title': str,
832 'description': str,
833 'uploader': str,
834 'uploader_id': 'Rizdraws',
835 'uploader_url': 'https://twitter.com/Rizdraws',
836 'upload_date': '20220928',
837 'timestamp': 1664391723,
16bed382 838 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
839 'like_count': int,
840 'repost_count': int,
841 'comment_count': int,
842 'age_limit': 18,
843 'tags': []
844 },
a006ce2b 845 'params': {'skip_download': 'The media could not be played'},
147e62fc 846 'skip': 'Requires authentication',
7a26ce26 847 }, {
a006ce2b 848 # Playlist result only with graphql API
7a26ce26
SS
849 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
850 'playlist_mincount': 2,
851 'info_dict': {
852 'id': '1395079556562706435',
853 'title': str,
854 'tags': [],
855 'uploader': str,
856 'like_count': int,
857 'upload_date': '20210519',
858 'age_limit': 0,
859 'repost_count': int,
147e62fc 860 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
7a26ce26
SS
861 'uploader_id': 'Srirachachau',
862 'comment_count': int,
863 'uploader_url': 'https://twitter.com/Srirachachau',
864 'timestamp': 1621447860,
865 },
866 }, {
7a26ce26
SS
867 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
868 'playlist_mincount': 2,
869 'info_dict': {
870 'id': '1578353380363501568',
871 'title': str,
872 'uploader_id': 'DavidToons_',
873 'repost_count': int,
874 'like_count': int,
875 'uploader': str,
876 'timestamp': 1665143744,
877 'uploader_url': 'https://twitter.com/DavidToons_',
147e62fc 878 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
7a26ce26
SS
879 'tags': [],
880 'comment_count': int,
881 'upload_date': '20221007',
882 'age_limit': 0,
883 },
884 }, {
885 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
886 'playlist_count': 2,
887 'info_dict': {
888 'id': '1578401165338976258',
889 'title': str,
890 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
891 'uploader': str,
892 'uploader_id': 'primevideouk',
893 'timestamp': 1665155137,
894 'upload_date': '20221007',
895 'age_limit': 0,
896 'uploader_url': 'https://twitter.com/primevideouk',
b03fa783 897 'comment_count': int,
898 'repost_count': int,
7a26ce26
SS
899 'like_count': int,
900 'tags': ['TheRingsOfPower'],
901 },
902 }, {
903 # Twitter Spaces
904 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
905 'info_dict': {
906 'id': '1lPJqmBeeNAJb',
907 'ext': 'm4a',
908 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
909 'uploader': r're:Monique Camarra.+?',
910 'uploader_id': 'MoniqueCamarra',
911 'live_status': 'was_live',
1c16d9df 912 'release_timestamp': 1658417414,
a006ce2b 913 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
1cffd621 914 'timestamp': 1658407771,
915 'release_date': '20220721',
916 'upload_date': '20220721',
7a26ce26
SS
917 },
918 'add_ie': ['TwitterSpaces'],
919 'params': {'skip_download': 'm3u8'},
92315c03 920 'skip': 'Requires authentication',
16bed382 921 }, {
922 # URL specifies video number but --yes-playlist
923 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
924 'playlist_mincount': 2,
925 'info_dict': {
926 'id': '1600649710662213632',
927 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
928 'timestamp': 1670459604.0,
929 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
b03fa783 930 'comment_count': int,
16bed382 931 'uploader_id': 'CTVJLaidlaw',
b03fa783 932 'repost_count': int,
16bed382 933 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
934 'upload_date': '20221208',
935 'age_limit': 0,
936 'uploader': 'Jocelyn Laidlaw',
937 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
938 'like_count': int,
939 },
940 }, {
941 # URL specifies video number and --no-playlist
942 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
943 'info_dict': {
944 'id': '1600649511827013632',
945 'ext': 'mp4',
147e62fc 946 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
16bed382 947 'thumbnail': r're:^https?://.+\.jpg',
948 'timestamp': 1670459604.0,
949 'uploader_id': 'CTVJLaidlaw',
950 'uploader': 'Jocelyn Laidlaw',
b03fa783 951 'repost_count': int,
952 'comment_count': int,
16bed382 953 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
954 'duration': 102.226,
955 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
956 'display_id': '1600649710662213632',
957 'like_count': int,
958 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
959 'upload_date': '20221208',
960 'age_limit': 0,
1c54a98e 961 '_old_archive_ids': ['twitter 1600649710662213632'],
16bed382 962 },
963 'params': {'noplaylist': True},
7543c9c9 964 }, {
965 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
966 # note the id different between extraction and url
967 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
968 'info_dict': {
969 'id': '1621117577354424321',
970 'display_id': '1621117700482416640',
971 'ext': 'mp4',
972 'title': '뽀 - 아 최우제 이동속도 봐',
973 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
974 'duration': 24.598,
975 'uploader': '뽀',
976 'uploader_id': 's2FAKER',
977 'uploader_url': 'https://twitter.com/s2FAKER',
978 'upload_date': '20230202',
979 'timestamp': 1675339553.0,
980 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
981 'age_limit': 18,
982 'tags': [],
983 'like_count': int,
b03fa783 984 'repost_count': int,
985 'comment_count': int,
1c54a98e 986 '_old_archive_ids': ['twitter 1621117700482416640'],
7543c9c9 987 },
b6795fd3
SS
988 }, {
989 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
990 'info_dict': {
991 'id': '1599108643743473680',
992 'display_id': '1599108751385972737',
993 'ext': 'mp4',
994 'title': '\u06ea - \U0001F48B',
995 'uploader_url': 'https://twitter.com/hlo_again',
996 'like_count': int,
997 'uploader_id': 'hlo_again',
998 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
b03fa783 999 'repost_count': int,
b6795fd3 1000 'duration': 9.531,
b03fa783 1001 'comment_count': int,
b6795fd3
SS
1002 'upload_date': '20221203',
1003 'age_limit': 0,
1004 'timestamp': 1670092210.0,
1005 'tags': [],
1006 'uploader': '\u06ea',
1007 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1c54a98e 1008 '_old_archive_ids': ['twitter 1599108751385972737'],
b6795fd3
SS
1009 },
1010 'params': {'noplaylist': True},
1011 }, {
b6795fd3
SS
1012 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1013 'info_dict': {
1014 'id': '1600009362759733248',
1015 'display_id': '1600009574919962625',
1016 'ext': 'mp4',
1017 'uploader_url': 'https://twitter.com/MunTheShinobi',
1018 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
b6795fd3
SS
1019 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1020 'age_limit': 0,
a006ce2b 1021 'uploader': 'Mün',
b03fa783 1022 'repost_count': int,
b6795fd3 1023 'upload_date': '20221206',
a006ce2b 1024 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
b03fa783 1025 'comment_count': int,
b6795fd3
SS
1026 'like_count': int,
1027 'tags': [],
1028 'uploader_id': 'MunTheShinobi',
1029 'duration': 139.987,
1030 'timestamp': 1670306984.0,
1c54a98e 1031 '_old_archive_ids': ['twitter 1600009574919962625'],
b6795fd3 1032 },
cf605226 1033 }, {
a006ce2b 1034 # retweeted_status (private)
cf605226 1035 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1036 'info_dict': {
1037 'id': '1623274794488659969',
1038 'display_id': '1623739803874349067',
1039 'ext': 'mp4',
1040 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
92315c03 1041 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
cf605226 1042 'uploader': 'Johnny Bullets',
1043 'uploader_id': 'Johnnybull3ts',
1044 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1045 'age_limit': 0,
1046 'tags': [],
1047 'duration': 8.033,
1048 'timestamp': 1675853859.0,
1049 'upload_date': '20230208',
1050 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1051 'like_count': int,
b03fa783 1052 'repost_count': int,
cf605226 1053 },
6014355c 1054 'skip': 'Protected tweet',
92315c03 1055 }, {
a006ce2b 1056 # retweeted_status
1057 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
92315c03 1058 'info_dict': {
a006ce2b 1059 'id': '1694928337846538240',
92315c03 1060 'ext': 'mp4',
a006ce2b 1061 'display_id': '1695424220702888009',
1062 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1063 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1064 'uploader': 'Benny Johnson',
1065 'uploader_id': 'bennyjohnson',
1066 'uploader_url': 'https://twitter.com/bennyjohnson',
92315c03 1067 'age_limit': 0,
1068 'tags': [],
a006ce2b 1069 'duration': 45.001,
1070 'timestamp': 1692962814.0,
1071 'upload_date': '20230825',
1072 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
92315c03 1073 'like_count': int,
92315c03 1074 'repost_count': int,
1075 'comment_count': int,
1c54a98e 1076 '_old_archive_ids': ['twitter 1695424220702888009'],
92315c03 1077 },
a006ce2b 1078 }, {
1079 # retweeted_status w/ legacy API
1080 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1081 'info_dict': {
1082 'id': '1694928337846538240',
1083 'ext': 'mp4',
1084 'display_id': '1695424220702888009',
1085 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1086 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1087 'uploader': 'Benny Johnson',
1088 'uploader_id': 'bennyjohnson',
1089 'uploader_url': 'https://twitter.com/bennyjohnson',
1090 'age_limit': 0,
1091 'tags': [],
1092 'duration': 45.001,
1093 'timestamp': 1692962814.0,
1094 'upload_date': '20230825',
1095 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1096 'like_count': int,
1097 'repost_count': int,
1c54a98e 1098 '_old_archive_ids': ['twitter 1695424220702888009'],
a006ce2b 1099 },
1100 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1101 }, {
1102 # Broadcast embedded in tweet
1c54a98e 1103 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
a006ce2b 1104 'info_dict': {
1c54a98e 1105 'id': '1rmxPMjLzAXKN',
a006ce2b 1106 'ext': 'mp4',
1c54a98e 1107 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
a006ce2b 1108 'uploader': 'Jessica Dobson',
1c54a98e 1109 'uploader_id': 'JessicaDobsonWX',
1110 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1111 'timestamp': 1701566398,
1112 'upload_date': '20231203',
1113 'live_status': 'was_live',
1114 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1115 'concurrent_view_count': int,
a006ce2b 1116 'view_count': int,
1117 },
1118 'add_ie': ['TwitterBroadcast'],
1119 }, {
1120 # Animated gif and quote tweet video, with syndication API
1121 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1122 'playlist_mincount': 2,
1123 'info_dict': {
1124 'id': '1696256659889565950',
1125 'title': 'BAKOON - https://t.co/zom968d0a0',
1126 'description': 'https://t.co/zom968d0a0',
1127 'tags': [],
1128 'uploader': 'BAKOON',
1129 'uploader_id': 'BAKKOOONN',
1130 'uploader_url': 'https://twitter.com/BAKKOOONN',
1131 'age_limit': 18,
1132 'timestamp': 1693254077.0,
1133 'upload_date': '20230828',
1134 'like_count': int,
1135 },
1136 'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
1137 'expected_warnings': ['Not all metadata'],
1c54a98e 1138 }, {
1139 # "stale tweet" with typename "TweetWithVisibilityResults"
1140 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1141 'md5': '62b1e11cdc2cdd0e527f83adb081f536',
1142 'info_dict': {
1143 'id': '1724883339285544960',
1144 'ext': 'mp4',
1145 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1146 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1147 'display_id': '1724884212803834154',
1148 'uploader': 'Robert F. Kennedy Jr',
1149 'uploader_id': 'RobertKennedyJr',
1150 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1151 'upload_date': '20231115',
1152 'timestamp': 1700079417.0,
1153 'duration': 341.048,
1154 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1155 'tags': ['Kennedy24'],
1156 'repost_count': int,
1157 'like_count': int,
1158 'comment_count': int,
1159 'age_limit': 0,
1160 '_old_archive_ids': ['twitter 1724884212803834154'],
1161 },
82fb2357 1162 }, {
1163 # onion route
1164 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1165 'only_matching': True,
18ca61c5
RA
1166 }, {
1167 # Twitch Clip Embed
1168 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1169 'only_matching': True,
10a5091e
RA
1170 }, {
1171 # promo_video_website card
1172 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1173 'only_matching': True,
00dd0cd5 1174 }, {
1175 # promo_video_convo card
1176 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1177 'only_matching': True,
1178 }, {
1179 # appplayer card
1180 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1181 'only_matching': True,
30a074c2 1182 }, {
1183 # video_direct_message card
1184 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1185 'only_matching': True,
1186 }, {
1187 # poll2choice_video card
1188 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1189 'only_matching': True,
1190 }, {
1191 # poll3choice_video card
1192 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1193 'only_matching': True,
1194 }, {
1195 # poll4choice_video card
1196 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1197 'only_matching': True,
cf5881fc 1198 }]
f57f84f6 1199
a006ce2b 1200 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1201
1202 @property
1203 def _GRAPHQL_ENDPOINT(self):
1204 if self.is_logged_in:
1205 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1206 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1207
7a26ce26
SS
1208 def _graphql_to_legacy(self, data, twid):
1209 result = traverse_obj(data, (
1210 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1211 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
92315c03 1212 'tweet_results', 'result', ('tweet', None), {dict},
1213 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1214 data, ('tweetResult', 'result', {dict}), default={})
7a26ce26 1215
1c54a98e 1216 typename = result.get('__typename')
1217 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1218 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
7543c9c9 1219
7a26ce26 1220 if 'tombstone' in result:
147e62fc 1221 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
7a26ce26 1222 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1c54a98e 1223 elif typename == 'TweetUnavailable':
92315c03 1224 reason = result.get('reason')
1225 if reason == 'NsfwLoggedOut':
1226 self.raise_login_required('NSFW tweet requires authentication')
6014355c 1227 elif reason == 'Protected':
1228 self.raise_login_required('You are not authorized to view this protected tweet')
92315c03 1229 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1c54a98e 1230 # Result for "stale tweet" needs additional transformation
1231 elif typename == 'TweetWithVisibilityResults':
1232 result = traverse_obj(result, ('tweet', {dict})) or {}
7a26ce26
SS
1233
1234 status = result.get('legacy', {})
1235 status.update(traverse_obj(result, {
1236 'user': ('core', 'user_results', 'result', 'legacy'),
1237 'card': ('card', 'legacy'),
1238 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
a006ce2b 1239 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
7a26ce26
SS
1240 }, expected_type=dict, default={}))
1241
a006ce2b 1242 # extra transformations needed since result does not match legacy format
1243 if status.get('retweeted_status'):
1244 status['retweeted_status']['user'] = traverse_obj(status, (
1245 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1246
7a26ce26
SS
1247 binding_values = {
1248 binding_value.get('key'): binding_value.get('value')
147e62fc 1249 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
7a26ce26
SS
1250 }
1251 if binding_values:
1252 status['card']['binding_values'] = binding_values
1253
1254 return status
1255
1256 def _build_graphql_query(self, media_id):
1257 return {
1258 'variables': {
1259 'focalTweetId': media_id,
1260 'includePromotedContent': True,
1261 'with_rux_injections': False,
1262 'withBirdwatchNotes': True,
1263 'withCommunity': True,
1264 'withDownvotePerspective': False,
1265 'withQuickPromoteEligibilityTweetFields': True,
1266 'withReactionsMetadata': False,
1267 'withReactionsPerspective': False,
1268 'withSuperFollowsTweetFields': True,
1269 'withSuperFollowsUserFields': True,
1270 'withV2Timeline': True,
1271 'withVoice': True,
1272 },
1273 'features': {
1274 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1275 'interactive_text_enabled': True,
1276 'responsive_web_edit_tweet_api_enabled': True,
1277 'responsive_web_enhance_cards_enabled': True,
1278 'responsive_web_graphql_timeline_navigation_enabled': False,
1279 'responsive_web_text_conversations_enabled': False,
1280 'responsive_web_uc_gql_enabled': True,
1281 'standardized_nudges_misinfo': True,
1282 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1283 'tweetypie_unmention_optimization_enabled': True,
1284 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1285 'verified_phone_label_enabled': False,
1286 'vibe_api_enabled': True,
1287 },
92315c03 1288 } if self.is_logged_in else {
1289 'variables': {
1290 'tweetId': media_id,
1291 'withCommunity': False,
1292 'includePromotedContent': False,
1293 'withVoice': False,
1294 },
1295 'features': {
1296 'creator_subscriptions_tweet_preview_api_enabled': True,
1297 'tweetypie_unmention_optimization_enabled': True,
1298 'responsive_web_edit_tweet_api_enabled': True,
1299 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1300 'view_counts_everywhere_api_enabled': True,
1301 'longform_notetweets_consumption_enabled': True,
1302 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1303 'tweet_awards_web_tipping_enabled': False,
1304 'freedom_of_speech_not_reach_fetch_enabled': True,
1305 'standardized_nudges_misinfo': True,
1306 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1307 'longform_notetweets_rich_text_read_enabled': True,
1308 'longform_notetweets_inline_media_enabled': True,
1309 'responsive_web_graphql_exclude_directive_enabled': True,
1310 'verified_phone_label_enabled': False,
1311 'responsive_web_media_download_video_enabled': False,
1312 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1313 'responsive_web_graphql_timeline_navigation_enabled': True,
1314 'responsive_web_enhance_cards_enabled': False
1315 },
1316 'fieldToggles': {
1317 'withArticleRichContentState': False
1318 }
7a26ce26
SS
1319 }
1320
116c2684 1321 def _call_syndication_api(self, twid):
1322 self.report_warning(
1323 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1324 status = self._download_json(
1325 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1326 headers={'User-Agent': 'Googlebot'}, query={
1327 'id': twid,
1328 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1329 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
a006ce2b 1330 })
116c2684 1331 if not status:
1332 raise ExtractorError('Syndication endpoint returned empty JSON response')
1333 # Transform the result so its structure matches that of legacy/graphql
1334 media = []
1335 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1336 detail['id_str'] = traverse_obj(detail, (
1337 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1338 media.append(detail)
1339 status['extended_entities'] = {'media': media}
1340
1341 return status
6014355c 1342
116c2684 1343 def _extract_status(self, twid):
1344 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1345 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1346
1347 try:
1348 if self.is_logged_in or self._selected_api == 'graphql':
1349 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1350 elif self._selected_api == 'legacy':
1351 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1352 'cards_platform': 'Web-12',
1353 'include_cards': 1,
1354 'include_reply_count': 1,
1355 'include_user_entities': 0,
1356 'tweet_mode': 'extended',
a006ce2b 1357 })
116c2684 1358 except ExtractorError as e:
1359 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1360 raise
1361 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1362 status = self._call_syndication_api(twid)
6014355c 1363
116c2684 1364 if self._selected_api == 'syndication':
1365 status = self._call_syndication_api(twid)
a006ce2b 1366
1367 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
6014355c 1368
1369 def _real_extract(self, url):
1370 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1371 status = self._extract_status(twid)
575036b4 1372
92315c03 1373 title = description = traverse_obj(
1374 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
575036b4 1375 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 1376 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
1377 user = status.get('user') or {}
1378 uploader = user.get('name')
1379 if uploader:
7a26ce26 1380 title = f'{uploader} - {title}'
18ca61c5
RA
1381 uploader_id = user.get('screen_name')
1382
cf5881fc 1383 info = {
18ca61c5
RA
1384 'id': twid,
1385 'title': title,
1386 'description': description,
1387 'uploader': uploader,
1388 'timestamp': unified_timestamp(status.get('created_at')),
1389 'uploader_id': uploader_id,
a70635b8 1390 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
1391 'like_count': int_or_none(status.get('favorite_count')),
1392 'repost_count': int_or_none(status.get('retweet_count')),
1393 'comment_count': int_or_none(status.get('reply_count')),
1394 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 1395 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 1396 }
cf5881fc 1397
30a074c2 1398 def extract_from_video_info(media):
a006ce2b 1399 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
13b2ae29 1400 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
1401
1402 formats = []
4bed4363 1403 subtitles = {}
92315c03 1404 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
4bed4363
F
1405 fmts, subs = self._extract_variant_formats(variant, twid)
1406 subtitles = self._merge_subtitles(subtitles, subs)
1407 formats.extend(fmts)
18ca61c5
RA
1408
1409 thumbnails = []
1410 media_url = media.get('media_url_https') or media.get('media_url')
1411 if media_url:
1412 def add_thumbnail(name, size):
1413 thumbnails.append({
1414 'id': name,
1415 'url': update_url_query(media_url, {'name': name}),
1416 'width': int_or_none(size.get('w') or size.get('width')),
1417 'height': int_or_none(size.get('h') or size.get('height')),
1418 })
1419 for name, size in media.get('sizes', {}).items():
1420 add_thumbnail(name, size)
1421 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1422
13b2ae29 1423 return {
b03fa783 1424 'id': media_id,
18ca61c5 1425 'formats': formats,
4bed4363 1426 'subtitles': subtitles,
18ca61c5 1427 'thumbnails': thumbnails,
1c54a98e 1428 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
92315c03 1429 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
e7d22348 1430 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1431 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
13b2ae29 1432 }
30a074c2 1433
13b2ae29
SS
1434 def extract_from_card_info(card):
1435 if not card:
1436 return
1437
1438 self.write_debug(f'Extracting from card info: {card.get("url")}')
1439 binding_values = card['binding_values']
1440
1441 def get_binding_value(k):
1442 o = binding_values.get(k) or {}
1443 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1444
1445 card_name = card['name'].split(':')[-1]
1446 if card_name == 'player':
7a26ce26 1447 yield {
13b2ae29
SS
1448 '_type': 'url',
1449 'url': get_binding_value('player_url'),
1450 }
1451 elif card_name == 'periscope_broadcast':
7a26ce26 1452 yield {
13b2ae29
SS
1453 '_type': 'url',
1454 'url': get_binding_value('url') or get_binding_value('player_url'),
1455 'ie_key': PeriscopeIE.ie_key(),
1456 }
1457 elif card_name == 'broadcast':
7a26ce26 1458 yield {
13b2ae29
SS
1459 '_type': 'url',
1460 'url': get_binding_value('broadcast_url'),
1461 'ie_key': TwitterBroadcastIE.ie_key(),
1462 }
7a26ce26
SS
1463 elif card_name == 'audiospace':
1464 yield {
1465 '_type': 'url',
1466 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1467 'ie_key': TwitterSpacesIE.ie_key(),
1468 }
13b2ae29 1469 elif card_name == 'summary':
7a26ce26 1470 yield {
18ca61c5 1471 '_type': 'url',
13b2ae29
SS
1472 'url': get_binding_value('card_url'),
1473 }
1474 elif card_name == 'unified_card':
7a26ce26
SS
1475 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1476 yield from map(extract_from_video_info, traverse_obj(
1477 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1478 # amplify, promo_video_website, promo_video_convo, appplayer,
1479 # video_direct_message, poll2choice_video, poll3choice_video,
1480 # poll4choice_video, ...
1481 else:
1482 is_amplify = card_name == 'amplify'
1483 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1484 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1485 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1486
1487 thumbnails = []
1488 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1489 image = get_binding_value('player_image' + suffix) or {}
1490 image_url = image.get('url')
1491 if not image_url or '/player-placeholder' in image_url:
1492 continue
1493 thumbnails.append({
1494 'id': suffix[1:] if suffix else 'medium',
1495 'url': image_url,
1496 'width': int_or_none(image.get('width')),
1497 'height': int_or_none(image.get('height')),
1498 })
1499
7a26ce26 1500 yield {
13b2ae29
SS
1501 'formats': formats,
1502 'subtitles': subtitles,
1503 'thumbnails': thumbnails,
1504 'duration': int_or_none(get_binding_value(
1505 'content_duration_seconds')),
1506 }
1507
b6795fd3 1508 videos = traverse_obj(status, (
b03fa783 1509 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1510
b6795fd3
SS
1511 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1512 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1513 else:
92315c03 1514 desired_obj = traverse_obj(status, (
1515 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
b6795fd3
SS
1516 if not desired_obj:
1517 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1518 elif desired_obj.get('type') != 'video':
1519 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1520
1521 # Restore original archive id and video index in title
1522 for index, entry in enumerate(videos, 1):
1523 if entry.get('id') != desired_obj.get('id'):
1524 continue
1525 if index == 1:
1526 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1527 if len(videos) != 1:
1528 info['title'] += f' #{index}'
1529 break
1530
1531 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1532
1533 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1534 if not entries:
1535 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1536 if not expanded_url or expanded_url == url:
147e62fc 1537 self.raise_no_formats('No video could be found in this tweet', expected=True)
1538 return info
13b2ae29
SS
1539
1540 return self.url_result(expanded_url, display_id=twid, **info)
1541
1542 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1543
1544 if len(entries) == 1:
1545 return entries[0]
1546
1547 for index, entry in enumerate(entries, 1):
1548 entry['title'] += f' #{index}'
1549
1550 return self.playlist_result(entries, **info)
445d72b8
YCH
1551
1552
1553class TwitterAmplifyIE(TwitterBaseIE):
1554 IE_NAME = 'twitter:amplify'
25042f73 1555 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1556
1557 _TEST = {
1558 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1559 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1560 'info_dict': {
1561 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1562 'ext': 'mp4',
1563 'title': 'Twitter Video',
bdbf4ba4 1564 'thumbnail': 're:^https?://.*',
445d72b8 1565 },
7a26ce26 1566 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1567 }
1568
1569 def _real_extract(self, url):
1570 video_id = self._match_id(url)
1571 webpage = self._download_webpage(url, video_id)
1572
1573 vmap_url = self._html_search_meta(
1574 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1575 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1576
bdbf4ba4
YCH
1577 thumbnails = []
1578 thumbnail = self._html_search_meta(
1579 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1580
1581 def _find_dimension(target):
1582 w = int_or_none(self._html_search_meta(
1583 'twitter:%s:width' % target, webpage, fatal=False))
1584 h = int_or_none(self._html_search_meta(
1585 'twitter:%s:height' % target, webpage, fatal=False))
1586 return w, h
1587
1588 if thumbnail:
1589 thumbnail_w, thumbnail_h = _find_dimension('image')
1590 thumbnails.append({
1591 'url': thumbnail,
1592 'width': thumbnail_w,
1593 'height': thumbnail_h,
1594 })
1595
1596 video_w, video_h = _find_dimension('player')
9be31e77 1597 formats[0].update({
bdbf4ba4
YCH
1598 'width': video_w,
1599 'height': video_h,
9be31e77 1600 })
bdbf4ba4 1601
445d72b8
YCH
1602 return {
1603 'id': video_id,
1604 'title': 'Twitter Video',
bdbf4ba4
YCH
1605 'formats': formats,
1606 'thumbnails': thumbnails,
445d72b8 1607 }
18ca61c5
RA
1608
1609
1610class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1611 IE_NAME = 'twitter:broadcast'
1612 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1613
7d337ca9 1614 _TESTS = [{
7b0b53ea
S
1615 # untitled Periscope video
1616 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1617 'info_dict': {
1618 'id': '1yNGaQLWpejGj',
1619 'ext': 'mp4',
1620 'title': 'Andrea May Sahouri - Periscope Broadcast',
1621 'uploader': 'Andrea May Sahouri',
7d337ca9
H
1622 'uploader_id': 'andreamsahouri',
1623 'uploader_url': 'https://twitter.com/andreamsahouri',
1624 'timestamp': 1590973638,
1625 'upload_date': '20200601',
7a26ce26
SS
1626 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1627 'view_count': int,
7b0b53ea 1628 },
7d337ca9
H
1629 }, {
1630 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1631 'info_dict': {
1632 'id': '1ZkKzeyrPbaxv',
1633 'ext': 'mp4',
1634 'title': 'Starship | SN10 | High-Altitude Flight Test',
1635 'uploader': 'SpaceX',
1636 'uploader_id': 'SpaceX',
1637 'uploader_url': 'https://twitter.com/SpaceX',
1638 'timestamp': 1614812942,
1639 'upload_date': '20210303',
1640 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1641 'view_count': int,
1642 },
1643 }, {
1644 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1645 'info_dict': {
1646 'id': '1OyKAVQrgzwGb',
1647 'ext': 'mp4',
1648 'title': 'Starship Flight Test',
1649 'uploader': 'SpaceX',
1650 'uploader_id': 'SpaceX',
1651 'uploader_url': 'https://twitter.com/SpaceX',
1652 'timestamp': 1681993964,
1653 'upload_date': '20230420',
1654 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1655 'view_count': int,
1656 },
1657 }]
7b0b53ea 1658
18ca61c5
RA
1659 def _real_extract(self, url):
1660 broadcast_id = self._match_id(url)
1661 broadcast = self._call_api(
1662 'broadcasts/show.json', broadcast_id,
1663 {'ids': broadcast_id})['broadcasts'][broadcast_id]
a006ce2b 1664 if not broadcast:
1665 raise ExtractorError('Broadcast no longer exists', expected=True)
18ca61c5 1666 info = self._parse_broadcast_data(broadcast, broadcast_id)
7d337ca9
H
1667 info['title'] = broadcast.get('status') or info.get('title')
1668 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1669 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
f6e97090 1670 if info['live_status'] == 'is_upcoming':
1671 return info
1672
18ca61c5
RA
1673 media_key = broadcast['media_key']
1674 source = self._call_api(
7a26ce26 1675 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1676 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1677 if '/live_video_stream/geoblocked/' in m3u8_url:
1678 self.raise_geo_restricted()
1679 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1680 m3u8_url).query).get('type', [None])[0]
1681 state, width, height = self._extract_common_format_info(broadcast)
1682 info['formats'] = self._extract_pscp_m3u8_formats(
1683 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1684 return info
86b868c6
U
1685
1686
7a26ce26
SS
1687class TwitterSpacesIE(TwitterBaseIE):
1688 IE_NAME = 'twitter:spaces'
1689 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1690
1691 _TESTS = [{
1692 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1693 'info_dict': {
1694 'id': '1RDxlgyvNXzJL',
1695 'ext': 'm4a',
1696 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1697 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1698 'uploader': r're:Lucio Di Gaetano.*?',
1699 'uploader_id': 'luciodigaetano',
1700 'live_status': 'was_live',
1cffd621 1701 'timestamp': 1659877956,
1702 'upload_date': '20220807',
1703 'release_timestamp': 1659904215,
1704 'release_date': '20220807',
7a26ce26
SS
1705 },
1706 'params': {'skip_download': 'm3u8'},
613dbce1 1707 }, {
1708 # post_live/TimedOut but downloadable
1709 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1710 'info_dict': {
1711 'id': '1vAxRAVQWONJl',
1712 'ext': 'm4a',
1713 'title': 'Framing Up FinOps: Billing Tools',
1714 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1715 'uploader': 'Google Cloud',
1716 'uploader_id': 'googlecloud',
1717 'live_status': 'post_live',
1718 'timestamp': 1681409554,
1719 'upload_date': '20230413',
1720 'release_timestamp': 1681839000,
1721 'release_date': '20230418',
1722 },
1723 'params': {'skip_download': 'm3u8'},
1724 }, {
1725 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1726 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1727 'info_dict': {
1728 'id': '1eaKbrQbjoRKX',
1729 'ext': 'm4a',
1730 'title': 'あ',
1731 'description': 'Twitter Space participated by nobody yet',
1732 'uploader': '息根とめる🔪Twitchで復活',
1733 'uploader_id': 'tomeru_ikinone',
1734 'live_status': 'was_live',
1735 'timestamp': 1685617198,
1736 'upload_date': '20230601',
1737 },
1738 'params': {'skip_download': 'm3u8'},
7a26ce26
SS
1739 }]
1740
1741 SPACE_STATUS = {
1742 'notstarted': 'is_upcoming',
1743 'ended': 'was_live',
1744 'running': 'is_live',
1745 'timedout': 'post_live',
1746 }
1747
1748 def _build_graphql_query(self, space_id):
1749 return {
1750 'variables': {
1751 'id': space_id,
1752 'isMetatagsQuery': True,
1753 'withDownvotePerspective': False,
1754 'withReactionsMetadata': False,
1755 'withReactionsPerspective': False,
1756 'withReplays': True,
1757 'withSuperFollowsUserFields': True,
1758 'withSuperFollowsTweetFields': True,
1759 },
1760 'features': {
1761 'dont_mention_me_view_api_enabled': True,
1762 'interactive_text_enabled': True,
1763 'responsive_web_edit_tweet_api_enabled': True,
1764 'responsive_web_enhance_cards_enabled': True,
1765 'responsive_web_uc_gql_enabled': True,
1766 'spaces_2022_h2_clipping': True,
1767 'spaces_2022_h2_spaces_communities': False,
1768 'standardized_nudges_misinfo': True,
1769 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1770 'vibe_api_enabled': True,
1771 },
1772 }
1773
1774 def _real_extract(self, url):
1775 space_id = self._match_id(url)
92315c03 1776 if not self.is_logged_in:
1777 self.raise_login_required('Twitter Spaces require authentication')
7a26ce26
SS
1778 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1779 if not space_data:
1780 raise ExtractorError('Twitter Space not found', expected=True)
1781
1782 metadata = space_data['metadata']
1783 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1cffd621 1784 is_live = live_status == 'is_live'
7a26ce26
SS
1785
1786 formats = []
c6ef5537 1787 headers = {'Referer': 'https://twitter.com/'}
7a26ce26
SS
1788 if live_status == 'is_upcoming':
1789 self.raise_no_formats('Twitter Space not started yet', expected=True)
1cffd621 1790 elif not is_live and not metadata.get('is_space_available_for_replay'):
1791 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1792 elif metadata.get('media_key'):
1793 source = traverse_obj(
1794 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1795 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
613dbce1 1796 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1797 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
c6ef5537 1798 headers=headers, fatal=False) if source else []
7a26ce26
SS
1799 for fmt in formats:
1800 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1cffd621 1801 if not is_live:
1802 fmt['container'] = 'm4a_dash'
7a26ce26
SS
1803
1804 participants = ', '.join(traverse_obj(
1805 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1cffd621 1806
1807 if not formats and live_status == 'post_live':
1808 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1809
7a26ce26
SS
1810 return {
1811 'id': space_id,
1812 'title': metadata.get('title'),
1813 'description': f'Twitter Space participated by {participants}',
1814 'uploader': traverse_obj(
1815 metadata, ('creator_results', 'result', 'legacy', 'name')),
1816 'uploader_id': traverse_obj(
1817 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1818 'live_status': live_status,
1c16d9df
C
1819 'release_timestamp': try_call(
1820 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1cffd621 1821 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
7a26ce26 1822 'formats': formats,
c6ef5537 1823 'http_headers': headers,
7a26ce26
SS
1824 }
1825
1826
86b868c6
U
1827class TwitterShortenerIE(TwitterBaseIE):
1828 IE_NAME = 'twitter:shortener'
b634ba74 1829 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
a537ab1a 1830 _BASE_URL = 'https://t.co/'
86b868c6
U
1831
1832 def _real_extract(self, url):
5ad28e7f 1833 mobj = self._match_valid_url(url)
a537ab1a
U
1834 eid, id = mobj.group('eid', 'id')
1835 if eid:
1836 id = eid
1837 url = self._BASE_URL + id
3d2623a8 1838 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
a537ab1a
U
1839 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1840 if new_url.startswith(__UNSAFE_LINK):
1841 new_url = new_url.replace(__UNSAFE_LINK, "")
9e20a9c4 1842 return self.url_result(new_url)