]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[core] Warn if lack of ffmpeg alters format selection (#9805)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
7a26ce26 1import json
a006ce2b 2import random
23e7cba8
S
3import re
4
5from .common import InfoExtractor
13b2ae29 6from .periscope import PeriscopeBaseIE, PeriscopeIE
a006ce2b 7from ..compat import functools # isort: split
18ca61c5 8from ..compat import (
18ca61c5
RA
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12)
116c2684 13from ..networking.exceptions import HTTPError
23e7cba8 14from ..utils import (
2edfd745 15 ExtractorError,
13b2ae29 16 dict_get,
92315c03 17 filter_dict,
23e7cba8 18 float_or_none,
13b2ae29 19 format_field,
cf5881fc 20 int_or_none,
13b2ae29 21 make_archive_id,
147e62fc 22 remove_end,
13b2ae29
SS
23 str_or_none,
24 strip_or_none,
f1150b9e 25 traverse_obj,
7a26ce26 26 try_call,
2edfd745 27 try_get,
18ca61c5
RA
28 unified_timestamp,
29 update_url_query,
41d1cca3 30 url_or_none,
2edfd745 31 xpath_text,
23e7cba8
S
32)
33
34
445d72b8 35class TwitterBaseIE(InfoExtractor):
d1795f4a 36 _NETRC_MACHINE = 'twitter'
18ca61c5 37 _API_BASE = 'https://api.twitter.com/1.1/'
7a26ce26 38 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
82fb2357 39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
92315c03 40 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
41 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
d1795f4a 42 _flow_token = None
43
44 _LOGIN_INIT_DATA = json.dumps({
45 'input_flow_data': {
46 'flow_context': {
47 'debug_overrides': {},
48 'start_location': {
49 'location': 'unknown'
50 }
51 }
52 },
53 'subtask_versions': {
54 'action_list': 2,
55 'alert_dialog': 1,
56 'app_download_cta': 1,
57 'check_logged_in_account': 1,
58 'choice_selection': 3,
59 'contacts_live_sync_permission_prompt': 0,
60 'cta': 7,
61 'email_verification': 2,
62 'end_flow': 1,
63 'enter_date': 1,
64 'enter_email': 2,
65 'enter_password': 5,
66 'enter_phone': 2,
67 'enter_recaptcha': 1,
68 'enter_text': 5,
69 'enter_username': 2,
70 'generic_urt': 3,
71 'in_app_notification': 1,
72 'interest_picker': 3,
73 'js_instrumentation': 1,
74 'menu_dialog': 1,
75 'notifications_permission_prompt': 2,
76 'open_account': 2,
77 'open_home_timeline': 1,
78 'open_link': 1,
79 'phone_verification': 4,
80 'privacy_options': 1,
81 'security_key': 3,
82 'select_avatar': 4,
83 'select_banner': 2,
84 'settings_list': 7,
85 'show_code': 1,
86 'sign_up': 2,
87 'sign_up_review': 4,
88 'tweet_selection_urt': 1,
89 'update_users': 1,
90 'upload_media': 1,
91 'user_recommendations_list': 4,
92 'user_recommendations_urt': 1,
93 'wait_spinner': 3,
94 'web_modal': 1
95 }
96 }, separators=(',', ':')).encode()
18ca61c5
RA
97
98 def _extract_variant_formats(self, variant, video_id):
99 variant_url = variant.get('url')
100 if not variant_url:
4bed4363 101 return [], {}
18ca61c5 102 elif '.m3u8' in variant_url:
28e53d60 103 fmts, subs = self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
104 variant_url, video_id, 'mp4', 'm3u8_native',
105 m3u8_id='hls', fatal=False)
28e53d60 106 for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
107 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
108 f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
109 return fmts, subs
18ca61c5
RA
110 else:
111 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
112 f = {
113 'url': variant_url,
114 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
115 'tbr': tbr,
116 }
117 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 118 return [f], {}
18ca61c5 119
9be31e77 120 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 121 vmap_url = url_or_none(vmap_url)
122 if not vmap_url:
f1150b9e 123 return [], {}
445d72b8 124 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 125 formats = []
4bed4363 126 subtitles = {}
18ca61c5
RA
127 urls = []
128 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
129 video_variant.attrib['url'] = compat_urllib_parse_unquote(
130 video_variant.attrib['url'])
131 urls.append(video_variant.attrib['url'])
4bed4363
F
132 fmts, subs = self._extract_variant_formats(
133 video_variant.attrib, video_id)
134 formats.extend(fmts)
135 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
136 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
137 if video_url not in urls:
4bed4363
F
138 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
139 formats.extend(fmts)
140 subtitles = self._merge_subtitles(subtitles, subs)
141 return formats, subtitles
445d72b8 142
2edfd745
YCH
143 @staticmethod
144 def _search_dimensions_in_video_url(a_format, video_url):
145 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
146 if m:
147 a_format.update({
148 'width': int(m.group('width')),
149 'height': int(m.group('height')),
150 })
151
d1795f4a 152 @property
7a26ce26
SS
153 def is_logged_in(self):
154 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
155
a006ce2b 156 @functools.cached_property
157 def _selected_api(self):
158 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
159
92315c03 160 def _fetch_guest_token(self, display_id):
161 guest_token = traverse_obj(self._download_json(
162 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
a006ce2b 163 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
92315c03 164 ('guest_token', {str}))
165 if not guest_token:
b03fa783 166 raise ExtractorError('Could not retrieve guest token')
92315c03 167 return guest_token
b03fa783 168
92315c03 169 def _set_base_headers(self, legacy=False):
170 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
171 return filter_dict({
172 'Authorization': f'Bearer {bearer_token}',
173 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
174 })
d1795f4a 175
176 def _call_login_api(self, note, headers, query={}, data=None):
177 response = self._download_json(
178 f'{self._API_BASE}onboarding/task.json', None, note,
179 headers=headers, query=query, data=data, expected_status=400)
180 error = traverse_obj(response, ('errors', 0, 'message', {str}))
181 if error:
182 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
183 elif traverse_obj(response, 'status') != 'success':
184 raise ExtractorError('Login was unsuccessful')
185
186 subtask = traverse_obj(
187 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
188 if not subtask:
189 raise ExtractorError('Twitter API did not return next login subtask')
190
191 self._flow_token = response['flow_token']
7a26ce26 192
d1795f4a 193 return subtask
194
195 def _perform_login(self, username, password):
196 if self.is_logged_in:
197 return
198
92315c03 199 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
200 guest_token = self._search_regex(
201 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
202 headers = {
203 **self._set_base_headers(),
d1795f4a 204 'content-type': 'application/json',
92315c03 205 'x-guest-token': guest_token,
d1795f4a 206 'x-twitter-client-language': 'en',
207 'x-twitter-active-user': 'yes',
208 'Referer': 'https://twitter.com/',
209 'Origin': 'https://twitter.com',
92315c03 210 }
d1795f4a 211
212 def build_login_json(*subtask_inputs):
213 return json.dumps({
214 'flow_token': self._flow_token,
215 'subtask_inputs': subtask_inputs
216 }, separators=(',', ':')).encode()
217
218 def input_dict(subtask_id, text):
219 return {
220 'subtask_id': subtask_id,
221 'enter_text': {
222 'text': text,
223 'link': 'next_link'
224 }
225 }
7a26ce26 226
d1795f4a 227 next_subtask = self._call_login_api(
228 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
229
230 while not self.is_logged_in:
231 if next_subtask == 'LoginJsInstrumentationSubtask':
232 next_subtask = self._call_login_api(
233 'Submitting JS instrumentation response', headers, data=build_login_json({
234 'subtask_id': next_subtask,
235 'js_instrumentation': {
236 'response': '{}',
237 'link': 'next_link'
238 }
239 }))
240
241 elif next_subtask == 'LoginEnterUserIdentifierSSO':
242 next_subtask = self._call_login_api(
243 'Submitting username', headers, data=build_login_json({
244 'subtask_id': next_subtask,
245 'settings_list': {
246 'setting_responses': [{
247 'key': 'user_identifier',
248 'response_data': {
249 'text_data': {
250 'result': username
251 }
252 }
253 }],
254 'link': 'next_link'
255 }
256 }))
257
258 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
259 next_subtask = self._call_login_api(
260 'Submitting alternate identifier', headers,
261 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
262 'one of username, phone number or email that was not used as --username'))))
263
264 elif next_subtask == 'LoginEnterPassword':
265 next_subtask = self._call_login_api(
266 'Submitting password', headers, data=build_login_json({
267 'subtask_id': next_subtask,
268 'enter_password': {
269 'password': password,
270 'link': 'next_link'
271 }
272 }))
273
274 elif next_subtask == 'AccountDuplicationCheck':
275 next_subtask = self._call_login_api(
276 'Submitting account duplication check', headers, data=build_login_json({
277 'subtask_id': next_subtask,
278 'check_logged_in_account': {
279 'link': 'AccountDuplicationCheck_false'
280 }
281 }))
282
283 elif next_subtask == 'LoginTwoFactorAuthChallenge':
284 next_subtask = self._call_login_api(
285 'Submitting 2FA token', headers, data=build_login_json(input_dict(
286 next_subtask, self._get_tfa_info('two-factor authentication token'))))
287
288 elif next_subtask == 'LoginAcid':
289 next_subtask = self._call_login_api(
290 'Submitting confirmation code', headers, data=build_login_json(input_dict(
291 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
292
6014355c 293 elif next_subtask == 'ArkoseLogin':
294 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
295
296 elif next_subtask == 'DenyLoginSubtask':
297 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
298
d1795f4a 299 elif next_subtask == 'LoginSuccessSubtask':
300 raise ExtractorError('Twitter API did not grant auth token cookie')
301
302 else:
303 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
304
305 self.report_login()
306
307 def _call_api(self, path, video_id, query={}, graphql=False):
a006ce2b 308 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
92315c03 309 headers.update({
310 'x-twitter-auth-type': 'OAuth2Session',
311 'x-twitter-client-language': 'en',
312 'x-twitter-active-user': 'yes',
313 } if self.is_logged_in else {
314 'x-guest-token': self._fetch_guest_token(video_id)
315 })
316 allowed_status = {400, 401, 403, 404} if graphql else {403}
317 result = self._download_json(
318 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
319 video_id, headers=headers, query=query, expected_status=allowed_status,
320 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
b03fa783 321
92315c03 322 if result.get('errors'):
323 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
6014355c 324 if errors and 'not authorized' in errors:
325 self.raise_login_required(remove_end(errors, '.'))
326 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
b03fa783 327
92315c03 328 return result
7a26ce26
SS
329
330 def _build_graphql_query(self, media_id):
331 raise NotImplementedError('Method must be implemented to support GraphQL')
332
333 def _call_graphql_api(self, endpoint, media_id):
334 data = self._build_graphql_query(media_id)
335 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
336 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
337
338
339class TwitterCardIE(InfoExtractor):
014e8803 340 IE_NAME = 'twitter:card'
18ca61c5 341 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 342 _TESTS = [
343 {
344 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 345 # MD5 checksums are different in different places
c3dea3f8 346 'info_dict': {
7a26ce26 347 'id': '560070131976392705',
c3dea3f8 348 'ext': 'mp4',
18ca61c5
RA
349 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
350 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
351 'uploader': 'Twitter',
352 'uploader_id': 'Twitter',
353 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 354 'duration': 30.033,
18ca61c5
RA
355 'timestamp': 1422366112,
356 'upload_date': '20150127',
7a26ce26
SS
357 'age_limit': 0,
358 'comment_count': int,
359 'tags': [],
360 'repost_count': int,
361 'like_count': int,
362 'display_id': '560070183650213889',
363 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 364 },
23e7cba8 365 },
c3dea3f8 366 {
367 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 368 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 369 'info_dict': {
370 'id': '623160978427936768',
371 'ext': 'mp4',
18ca61c5
RA
372 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
373 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
374 'uploader': 'NASA',
375 'uploader_id': 'NASA',
376 'timestamp': 1437408129,
377 'upload_date': '20150720',
7a26ce26
SS
378 'uploader_url': 'https://twitter.com/NASA',
379 'age_limit': 0,
380 'comment_count': int,
381 'like_count': int,
382 'repost_count': int,
383 'tags': ['PlutoFlyby'],
c3dea3f8 384 },
7a26ce26 385 'params': {'format': '[protocol=https]'}
4a7b7903
YCH
386 },
387 {
388 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 389 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
390 'info_dict': {
391 'id': 'dq4Oj5quskI',
392 'ext': 'mp4',
393 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 394 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 395 'upload_date': '20111013',
18ca61c5 396 'uploader': 'OMG! UBUNTU!',
4a7b7903 397 'uploader_id': 'omgubuntu',
7a26ce26
SS
398 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
399 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
400 'channel_follower_count': int,
401 'chapters': 'count:8',
402 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
403 'duration': 138,
404 'categories': ['Film & Animation'],
405 'age_limit': 0,
406 'comment_count': int,
407 'availability': 'public',
408 'like_count': int,
409 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
410 'view_count': int,
411 'tags': 'count:12',
412 'channel': 'OMG! UBUNTU!',
413 'playable_in_embed': True,
4a7b7903 414 },
31752f76 415 'add_ie': ['Youtube'],
5f1b2aea
YCH
416 },
417 {
418 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
419 'info_dict': {
420 'id': 'iBb2x00UVlv',
421 'ext': 'mp4',
422 'upload_date': '20151113',
423 'uploader_id': '1189339351084113920',
acb6e97e
YCH
424 'uploader': 'ArsenalTerje',
425 'title': 'Vine by ArsenalTerje',
e8f20ffa 426 'timestamp': 1447451307,
7a26ce26
SS
427 'alt_title': 'Vine by ArsenalTerje',
428 'comment_count': int,
429 'like_count': int,
430 'thumbnail': r're:^https?://[^?#]+\.jpg',
431 'view_count': int,
432 'repost_count': int,
5f1b2aea
YCH
433 },
434 'add_ie': ['Vine'],
7a26ce26
SS
435 'params': {'skip_download': 'm3u8'},
436 },
437 {
0ae937a7 438 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 439 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
440 'info_dict': {
441 'id': '705235433198714880',
442 'ext': 'mp4',
18ca61c5
RA
443 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
444 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
445 'uploader': 'Brent Yarina',
446 'uploader_id': 'BTNBrentYarina',
447 'timestamp': 1456976204,
448 'upload_date': '20160303',
0ae937a7 449 },
18ca61c5 450 'skip': 'This content is no longer available.',
7a26ce26
SS
451 },
452 {
748a462f
S
453 'url': 'https://twitter.com/i/videos/752274308186120192',
454 'only_matching': True,
0ae937a7 455 },
c3dea3f8 456 ]
23e7cba8
S
457
458 def _real_extract(self, url):
18ca61c5
RA
459 status_id = self._match_id(url)
460 return self.url_result(
461 'https://twitter.com/statuses/' + status_id,
462 TwitterIE.ie_key(), status_id)
c8398a9b 463
03879ff0 464
18ca61c5 465class TwitterIE(TwitterBaseIE):
014e8803 466 IE_NAME = 'twitter'
b6795fd3 467 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 468
cf5881fc 469 _TESTS = [{
48aae2d2 470 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 471 'info_dict': {
13b2ae29
SS
472 'id': '643211870443208704',
473 'display_id': '643211948184596480',
f57f84f6 474 'ext': 'mp4',
575036b4 475 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 476 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 477 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
55f18333 478 'channel_id': '549749560',
48aae2d2
YCH
479 'uploader': 'FREE THE NIPPLE',
480 'uploader_id': 'freethenipple',
3b65a6fb 481 'duration': 12.922,
18ca61c5
RA
482 'timestamp': 1442188653,
483 'upload_date': '20150913',
13b2ae29 484 'uploader_url': 'https://twitter.com/freethenipple',
b03fa783 485 'comment_count': int,
486 'repost_count': int,
13b2ae29
SS
487 'like_count': int,
488 'tags': [],
489 'age_limit': 18,
1c54a98e 490 '_old_archive_ids': ['twitter 643211948184596480'],
f57f84f6 491 },
55f18333 492 'skip': 'Requires authentication',
cf5881fc
YCH
493 }, {
494 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
495 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
496 'info_dict': {
497 'id': '657991469417025536',
498 'ext': 'mp4',
499 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
500 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 501 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
502 'uploader': 'Gifs',
503 'uploader_id': 'giphz',
504 },
7efc1c2b 505 'expected_warnings': ['height', 'width'],
fc0a45fa 506 'skip': 'Account suspended',
b703ebee
JMF
507 }, {
508 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
509 'info_dict': {
510 'id': '665052190608723968',
13b2ae29 511 'display_id': '665052190608723968',
b703ebee 512 'ext': 'mp4',
b6795fd3 513 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 514 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
55f18333 515 'channel_id': '20106852',
b703ebee 516 'uploader_id': 'starwars',
7a26ce26 517 'uploader': r're:Star Wars.*',
18ca61c5
RA
518 'timestamp': 1447395772,
519 'upload_date': '20151113',
13b2ae29 520 'uploader_url': 'https://twitter.com/starwars',
b03fa783 521 'comment_count': int,
522 'repost_count': int,
13b2ae29
SS
523 'like_count': int,
524 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
525 'age_limit': 0,
1c54a98e 526 '_old_archive_ids': ['twitter 665052190608723968'],
b703ebee 527 },
0ae937a7
YCH
528 }, {
529 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
530 'info_dict': {
531 'id': '705235433198714880',
532 'ext': 'mp4',
18ca61c5
RA
533 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
534 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
535 'uploader_id': 'BTNBrentYarina',
536 'uploader': 'Brent Yarina',
18ca61c5
RA
537 'timestamp': 1456976204,
538 'upload_date': '20160303',
13b2ae29
SS
539 'uploader_url': 'https://twitter.com/BTNBrentYarina',
540 'comment_count': int,
541 'repost_count': int,
542 'like_count': int,
543 'tags': [],
544 'age_limit': 0,
0ae937a7
YCH
545 },
546 'params': {
547 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
548 # Test case of TwitterCardIE
549 'skip_download': True,
550 },
352e7d98 551 'skip': 'Dead external link',
03879ff0
YCH
552 }, {
553 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 554 'info_dict': {
13b2ae29
SS
555 'id': '700207414000242688',
556 'display_id': '700207533655363584',
03879ff0 557 'ext': 'mp4',
13b2ae29 558 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 559 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 560 'thumbnail': r're:^https?://.*\.jpg',
55f18333 561 'channel_id': '1383165541',
13b2ae29
SS
562 'uploader': 'jaydin donte geer',
563 'uploader_id': 'jaydingeer',
3b65a6fb 564 'duration': 30.0,
18ca61c5
RA
565 'timestamp': 1455777459,
566 'upload_date': '20160218',
13b2ae29 567 'uploader_url': 'https://twitter.com/jaydingeer',
b03fa783 568 'comment_count': int,
569 'repost_count': int,
13b2ae29
SS
570 'like_count': int,
571 'tags': ['Damndaniel'],
572 'age_limit': 0,
1c54a98e 573 '_old_archive_ids': ['twitter 700207533655363584'],
03879ff0 574 },
395fd4b0
YCH
575 }, {
576 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
577 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
578 'info_dict': {
579 'id': 'MIOxnrUteUd',
580 'ext': 'mp4',
18ca61c5
RA
581 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
582 'uploader': 'TAKUMA',
583 'uploader_id': '1004126642786242560',
3615bfe1 584 'timestamp': 1402826626,
395fd4b0 585 'upload_date': '20140615',
13b2ae29
SS
586 'thumbnail': r're:^https?://.*\.jpg',
587 'alt_title': 'Vine by TAKUMA',
588 'comment_count': int,
589 'repost_count': int,
590 'like_count': int,
591 'view_count': int,
395fd4b0
YCH
592 },
593 'add_ie': ['Vine'],
36b7d9db
YCH
594 }, {
595 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 596 'info_dict': {
13b2ae29
SS
597 'id': '717462543795523584',
598 'display_id': '719944021058060289',
36b7d9db
YCH
599 'ext': 'mp4',
600 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5 601 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
55f18333 602 'channel_id': '701615052',
18ca61c5 603 'uploader_id': 'CaptainAmerica',
36b7d9db 604 'uploader': 'Captain America',
3b65a6fb 605 'duration': 3.17,
18ca61c5
RA
606 'timestamp': 1460483005,
607 'upload_date': '20160412',
13b2ae29
SS
608 'uploader_url': 'https://twitter.com/CaptainAmerica',
609 'thumbnail': r're:^https?://.*\.jpg',
b03fa783 610 'comment_count': int,
611 'repost_count': int,
13b2ae29
SS
612 'like_count': int,
613 'tags': [],
614 'age_limit': 0,
1c54a98e 615 '_old_archive_ids': ['twitter 719944021058060289'],
36b7d9db 616 },
f0bc5a86
YCH
617 }, {
618 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
619 'info_dict': {
620 'id': '1zqKVVlkqLaKB',
621 'ext': 'mp4',
18ca61c5 622 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 623 'upload_date': '20160923',
18ca61c5
RA
624 'uploader_id': '1PmKqpJdOJQoY',
625 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 626 'timestamp': 1474613214,
13b2ae29 627 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
628 },
629 'add_ie': ['Periscope'],
1c54a98e 630 'skip': 'Broadcast not found',
2edfd745
YCH
631 }, {
632 # has mp4 formats via mobile API
633 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
634 'info_dict': {
6014355c 635 'id': '852077943283097602',
2edfd745
YCH
636 'ext': 'mp4',
637 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 638 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
55f18333 639 'channel_id': '2526757026',
2edfd745
YCH
640 'uploader': 'عالم الأخبار',
641 'uploader_id': 'news_al3alm',
3b65a6fb 642 'duration': 277.4,
18ca61c5
RA
643 'timestamp': 1492000653,
644 'upload_date': '20170412',
6014355c 645 'display_id': '852138619213144067',
646 'age_limit': 0,
647 'uploader_url': 'https://twitter.com/news_al3alm',
648 'thumbnail': r're:^https?://.*\.jpg',
649 'tags': [],
650 'repost_count': int,
6014355c 651 'like_count': int,
652 'comment_count': int,
1c54a98e 653 '_old_archive_ids': ['twitter 852138619213144067'],
2edfd745 654 },
5c1452e8
GF
655 }, {
656 'url': 'https://twitter.com/i/web/status/910031516746514432',
657 'info_dict': {
13b2ae29
SS
658 'id': '910030238373089285',
659 'display_id': '910031516746514432',
5c1452e8
GF
660 'ext': 'mp4',
661 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
662 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 663 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
55f18333 664 'channel_id': '2319432498',
5c1452e8
GF
665 'uploader': 'Préfet de Guadeloupe',
666 'uploader_id': 'Prefet971',
667 'duration': 47.48,
18ca61c5
RA
668 'timestamp': 1505803395,
669 'upload_date': '20170919',
13b2ae29 670 'uploader_url': 'https://twitter.com/Prefet971',
b03fa783 671 'comment_count': int,
672 'repost_count': int,
13b2ae29
SS
673 'like_count': int,
674 'tags': ['Maria'],
675 'age_limit': 0,
1c54a98e 676 '_old_archive_ids': ['twitter 910031516746514432'],
5c1452e8
GF
677 },
678 'params': {
679 'skip_download': True, # requires ffmpeg
680 },
2593725a
S
681 }, {
682 # card via api.twitter.com/1.1/videos/tweet/config
683 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
684 'info_dict': {
13b2ae29
SS
685 'id': '1001551417340022785',
686 'display_id': '1001551623938805763',
2593725a
S
687 'ext': 'mp4',
688 'title': 're:.*?Shep is on a roll today.*?',
689 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 690 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
55f18333 691 'channel_id': '255036353',
2593725a
S
692 'uploader': 'Lis Power',
693 'uploader_id': 'LisPower1',
694 'duration': 111.278,
18ca61c5
RA
695 'timestamp': 1527623489,
696 'upload_date': '20180529',
13b2ae29 697 'uploader_url': 'https://twitter.com/LisPower1',
b03fa783 698 'comment_count': int,
699 'repost_count': int,
13b2ae29
SS
700 'like_count': int,
701 'tags': [],
702 'age_limit': 0,
1c54a98e 703 '_old_archive_ids': ['twitter 1001551623938805763'],
2593725a
S
704 },
705 'params': {
706 'skip_download': True, # requires ffmpeg
707 },
b7ef93f0
S
708 }, {
709 'url': 'https://twitter.com/foobar/status/1087791357756956680',
710 'info_dict': {
13b2ae29
SS
711 'id': '1087791272830607360',
712 'display_id': '1087791357756956680',
b7ef93f0 713 'ext': 'mp4',
6014355c 714 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
b7ef93f0 715 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 716 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
6014355c 717 'uploader': 'X',
718 'uploader_id': 'X',
b7ef93f0 719 'duration': 61.567,
18ca61c5
RA
720 'timestamp': 1548184644,
721 'upload_date': '20190122',
6014355c 722 'uploader_url': 'https://twitter.com/X',
b03fa783 723 'comment_count': int,
724 'repost_count': int,
13b2ae29 725 'like_count': int,
b03fa783 726 'view_count': int,
13b2ae29
SS
727 'tags': [],
728 'age_limit': 0,
18ca61c5 729 },
a006ce2b 730 'skip': 'This Tweet is unavailable',
18ca61c5
RA
731 }, {
732 # not available in Periscope
733 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
734 'info_dict': {
735 'id': '1vOGwqejwoWxB',
736 'ext': 'mp4',
737 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
738 'uploader': 'Vivi',
739 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
740 'thumbnail': r're:^https?://.*\.jpg',
741 'tags': ['EduTECH2019'],
742 'view_count': int,
b7ef93f0 743 },
18ca61c5 744 'add_ie': ['TwitterBroadcast'],
a006ce2b 745 'skip': 'Broadcast no longer exists',
30a074c2 746 }, {
747 # unified card
748 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
749 'info_dict': {
13b2ae29
SS
750 'id': '1349774757969989634',
751 'display_id': '1349794411333394432',
30a074c2 752 'ext': 'mp4',
753 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
754 'thumbnail': r're:^https?://.*\.jpg',
755 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
55f18333 756 'channel_id': '18552281',
30a074c2 757 'uploader': 'Brooklyn Nets',
758 'uploader_id': 'BrooklynNets',
759 'duration': 324.484,
760 'timestamp': 1610651040,
761 'upload_date': '20210114',
13b2ae29 762 'uploader_url': 'https://twitter.com/BrooklynNets',
b03fa783 763 'comment_count': int,
764 'repost_count': int,
13b2ae29
SS
765 'like_count': int,
766 'tags': [],
767 'age_limit': 0,
1c54a98e 768 '_old_archive_ids': ['twitter 1349794411333394432'],
30a074c2 769 },
770 'params': {
771 'skip_download': True,
772 },
13b2ae29
SS
773 }, {
774 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
775 'info_dict': {
776 'id': '1577855447914409984',
777 'display_id': '1577855540407197696',
778 'ext': 'mp4',
55f18333 779 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
352e7d98 780 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 781 'upload_date': '20221006',
55f18333 782 'channel_id': '143077138',
783 'uploader': 'Oshtru',
13b2ae29
SS
784 'uploader_id': 'oshtru',
785 'uploader_url': 'https://twitter.com/oshtru',
786 'thumbnail': r're:^https?://.*\.jpg',
787 'duration': 30.03,
7a26ce26 788 'timestamp': 1665025050,
b03fa783 789 'comment_count': int,
790 'repost_count': int,
13b2ae29
SS
791 'like_count': int,
792 'tags': [],
793 'age_limit': 0,
1c54a98e 794 '_old_archive_ids': ['twitter 1577855540407197696'],
13b2ae29
SS
795 },
796 'params': {'skip_download': True},
797 }, {
798 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
799 'info_dict': {
800 'id': '1577719286659006464',
55f18333 801 'title': 'Ultima Reload - Test',
13b2ae29 802 'description': 'Test https://t.co/Y3KEZD7Dad',
55f18333 803 'channel_id': '168922496',
804 'uploader': 'Ultima Reload',
13b2ae29
SS
805 'uploader_id': 'UltimaShadowX',
806 'uploader_url': 'https://twitter.com/UltimaShadowX',
807 'upload_date': '20221005',
7a26ce26 808 'timestamp': 1664992565,
b03fa783 809 'comment_count': int,
810 'repost_count': int,
13b2ae29
SS
811 'like_count': int,
812 'tags': [],
813 'age_limit': 0,
814 },
815 'playlist_count': 4,
816 'params': {'skip_download': True},
7a26ce26
SS
817 }, {
818 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
819 'info_dict': {
820 'id': '1575559336759263233',
821 'display_id': '1575560063510810624',
822 'ext': 'mp4',
823 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
824 'thumbnail': r're:^https?://.*\.jpg',
825 'description': 'md5:95aea692fda36a12081b9629b02daa92',
55f18333 826 'channel_id': '1094109584',
7a26ce26
SS
827 'uploader': 'Max Olson',
828 'uploader_id': 'MesoMax919',
829 'uploader_url': 'https://twitter.com/MesoMax919',
830 'duration': 21.321,
831 'timestamp': 1664477766,
832 'upload_date': '20220929',
b03fa783 833 'comment_count': int,
834 'repost_count': int,
7a26ce26
SS
835 'like_count': int,
836 'tags': ['HurricaneIan'],
837 'age_limit': 0,
1c54a98e 838 '_old_archive_ids': ['twitter 1575560063510810624'],
7a26ce26
SS
839 },
840 }, {
a006ce2b 841 # Adult content, fails if not logged in
7a26ce26
SS
842 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
843 'info_dict': {
844 'id': '1575199163847000068',
845 'display_id': '1575199173472927762',
846 'ext': 'mp4',
847 'title': str,
848 'description': str,
55f18333 849 'channel_id': '1217167793541480450',
7a26ce26
SS
850 'uploader': str,
851 'uploader_id': 'Rizdraws',
852 'uploader_url': 'https://twitter.com/Rizdraws',
853 'upload_date': '20220928',
854 'timestamp': 1664391723,
16bed382 855 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
856 'like_count': int,
857 'repost_count': int,
858 'comment_count': int,
859 'age_limit': 18,
55f18333 860 'tags': [],
861 '_old_archive_ids': ['twitter 1575199173472927762'],
7a26ce26 862 },
a006ce2b 863 'params': {'skip_download': 'The media could not be played'},
147e62fc 864 'skip': 'Requires authentication',
7a26ce26 865 }, {
a006ce2b 866 # Playlist result only with graphql API
7a26ce26
SS
867 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
868 'playlist_mincount': 2,
869 'info_dict': {
870 'id': '1395079556562706435',
871 'title': str,
872 'tags': [],
55f18333 873 'channel_id': '21539378',
7a26ce26
SS
874 'uploader': str,
875 'like_count': int,
876 'upload_date': '20210519',
877 'age_limit': 0,
878 'repost_count': int,
147e62fc 879 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
7a26ce26
SS
880 'uploader_id': 'Srirachachau',
881 'comment_count': int,
882 'uploader_url': 'https://twitter.com/Srirachachau',
883 'timestamp': 1621447860,
884 },
885 }, {
7a26ce26
SS
886 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
887 'playlist_mincount': 2,
888 'info_dict': {
889 'id': '1578353380363501568',
890 'title': str,
55f18333 891 'channel_id': '2195866214',
7a26ce26
SS
892 'uploader_id': 'DavidToons_',
893 'repost_count': int,
894 'like_count': int,
895 'uploader': str,
896 'timestamp': 1665143744,
897 'uploader_url': 'https://twitter.com/DavidToons_',
147e62fc 898 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
7a26ce26
SS
899 'tags': [],
900 'comment_count': int,
901 'upload_date': '20221007',
902 'age_limit': 0,
903 },
904 }, {
905 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
906 'playlist_count': 2,
907 'info_dict': {
908 'id': '1578401165338976258',
909 'title': str,
910 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
55f18333 911 'channel_id': '19338359',
7a26ce26
SS
912 'uploader': str,
913 'uploader_id': 'primevideouk',
914 'timestamp': 1665155137,
915 'upload_date': '20221007',
916 'age_limit': 0,
917 'uploader_url': 'https://twitter.com/primevideouk',
b03fa783 918 'comment_count': int,
919 'repost_count': int,
7a26ce26
SS
920 'like_count': int,
921 'tags': ['TheRingsOfPower'],
922 },
923 }, {
924 # Twitter Spaces
925 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
926 'info_dict': {
927 'id': '1lPJqmBeeNAJb',
928 'ext': 'm4a',
929 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
930 'uploader': r're:Monique Camarra.+?',
931 'uploader_id': 'MoniqueCamarra',
932 'live_status': 'was_live',
1c16d9df 933 'release_timestamp': 1658417414,
a006ce2b 934 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
1cffd621 935 'timestamp': 1658407771,
936 'release_date': '20220721',
937 'upload_date': '20220721',
7a26ce26
SS
938 },
939 'add_ie': ['TwitterSpaces'],
940 'params': {'skip_download': 'm3u8'},
92315c03 941 'skip': 'Requires authentication',
16bed382 942 }, {
943 # URL specifies video number but --yes-playlist
944 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
945 'playlist_mincount': 2,
946 'info_dict': {
947 'id': '1600649710662213632',
948 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
949 'timestamp': 1670459604.0,
950 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
b03fa783 951 'comment_count': int,
16bed382 952 'uploader_id': 'CTVJLaidlaw',
55f18333 953 'channel_id': '80082014',
b03fa783 954 'repost_count': int,
16bed382 955 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
956 'upload_date': '20221208',
957 'age_limit': 0,
958 'uploader': 'Jocelyn Laidlaw',
959 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
960 'like_count': int,
961 },
962 }, {
963 # URL specifies video number and --no-playlist
964 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
965 'info_dict': {
966 'id': '1600649511827013632',
967 'ext': 'mp4',
147e62fc 968 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
16bed382 969 'thumbnail': r're:^https?://.+\.jpg',
970 'timestamp': 1670459604.0,
55f18333 971 'channel_id': '80082014',
16bed382 972 'uploader_id': 'CTVJLaidlaw',
973 'uploader': 'Jocelyn Laidlaw',
b03fa783 974 'repost_count': int,
975 'comment_count': int,
16bed382 976 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
977 'duration': 102.226,
978 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
979 'display_id': '1600649710662213632',
980 'like_count': int,
981 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
982 'upload_date': '20221208',
983 'age_limit': 0,
1c54a98e 984 '_old_archive_ids': ['twitter 1600649710662213632'],
16bed382 985 },
986 'params': {'noplaylist': True},
7543c9c9 987 }, {
988 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
989 # note the id different between extraction and url
990 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
991 'info_dict': {
992 'id': '1621117577354424321',
993 'display_id': '1621117700482416640',
994 'ext': 'mp4',
995 'title': '뽀 - 아 최우제 이동속도 봐',
996 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
997 'duration': 24.598,
55f18333 998 'channel_id': '1281839411068432384',
7543c9c9 999 'uploader': '뽀',
1000 'uploader_id': 's2FAKER',
1001 'uploader_url': 'https://twitter.com/s2FAKER',
1002 'upload_date': '20230202',
1003 'timestamp': 1675339553.0,
1004 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
1005 'age_limit': 18,
1006 'tags': [],
1007 'like_count': int,
b03fa783 1008 'repost_count': int,
1009 'comment_count': int,
1c54a98e 1010 '_old_archive_ids': ['twitter 1621117700482416640'],
7543c9c9 1011 },
55f18333 1012 'skip': 'Requires authentication',
b6795fd3
SS
1013 }, {
1014 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1015 'info_dict': {
1016 'id': '1599108643743473680',
1017 'display_id': '1599108751385972737',
1018 'ext': 'mp4',
1019 'title': '\u06ea - \U0001F48B',
55f18333 1020 'channel_id': '1347791436809441283',
b6795fd3
SS
1021 'uploader_url': 'https://twitter.com/hlo_again',
1022 'like_count': int,
1023 'uploader_id': 'hlo_again',
1024 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
b03fa783 1025 'repost_count': int,
b6795fd3 1026 'duration': 9.531,
b03fa783 1027 'comment_count': int,
b6795fd3
SS
1028 'upload_date': '20221203',
1029 'age_limit': 0,
1030 'timestamp': 1670092210.0,
1031 'tags': [],
1032 'uploader': '\u06ea',
1033 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1c54a98e 1034 '_old_archive_ids': ['twitter 1599108751385972737'],
b6795fd3
SS
1035 },
1036 'params': {'noplaylist': True},
1037 }, {
b6795fd3
SS
1038 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1039 'info_dict': {
1040 'id': '1600009362759733248',
1041 'display_id': '1600009574919962625',
1042 'ext': 'mp4',
55f18333 1043 'channel_id': '211814412',
b6795fd3
SS
1044 'uploader_url': 'https://twitter.com/MunTheShinobi',
1045 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
b6795fd3
SS
1046 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1047 'age_limit': 0,
a006ce2b 1048 'uploader': 'Mün',
b03fa783 1049 'repost_count': int,
b6795fd3 1050 'upload_date': '20221206',
a006ce2b 1051 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
b03fa783 1052 'comment_count': int,
b6795fd3
SS
1053 'like_count': int,
1054 'tags': [],
1055 'uploader_id': 'MunTheShinobi',
1056 'duration': 139.987,
1057 'timestamp': 1670306984.0,
1c54a98e 1058 '_old_archive_ids': ['twitter 1600009574919962625'],
b6795fd3 1059 },
cf605226 1060 }, {
a006ce2b 1061 # retweeted_status (private)
cf605226 1062 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1063 'info_dict': {
1064 'id': '1623274794488659969',
1065 'display_id': '1623739803874349067',
1066 'ext': 'mp4',
1067 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
92315c03 1068 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
cf605226 1069 'uploader': 'Johnny Bullets',
1070 'uploader_id': 'Johnnybull3ts',
1071 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1072 'age_limit': 0,
1073 'tags': [],
1074 'duration': 8.033,
1075 'timestamp': 1675853859.0,
1076 'upload_date': '20230208',
1077 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1078 'like_count': int,
b03fa783 1079 'repost_count': int,
cf605226 1080 },
6014355c 1081 'skip': 'Protected tweet',
92315c03 1082 }, {
a006ce2b 1083 # retweeted_status
1084 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
92315c03 1085 'info_dict': {
a006ce2b 1086 'id': '1694928337846538240',
92315c03 1087 'ext': 'mp4',
a006ce2b 1088 'display_id': '1695424220702888009',
1089 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1090 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
55f18333 1091 'channel_id': '15212187',
a006ce2b 1092 'uploader': 'Benny Johnson',
1093 'uploader_id': 'bennyjohnson',
1094 'uploader_url': 'https://twitter.com/bennyjohnson',
92315c03 1095 'age_limit': 0,
1096 'tags': [],
a006ce2b 1097 'duration': 45.001,
1098 'timestamp': 1692962814.0,
1099 'upload_date': '20230825',
1100 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
92315c03 1101 'like_count': int,
92315c03 1102 'repost_count': int,
1103 'comment_count': int,
1c54a98e 1104 '_old_archive_ids': ['twitter 1695424220702888009'],
92315c03 1105 },
a006ce2b 1106 }, {
1107 # retweeted_status w/ legacy API
1108 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1109 'info_dict': {
1110 'id': '1694928337846538240',
1111 'ext': 'mp4',
1112 'display_id': '1695424220702888009',
1113 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1114 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
55f18333 1115 'channel_id': '15212187',
a006ce2b 1116 'uploader': 'Benny Johnson',
1117 'uploader_id': 'bennyjohnson',
1118 'uploader_url': 'https://twitter.com/bennyjohnson',
1119 'age_limit': 0,
1120 'tags': [],
1121 'duration': 45.001,
1122 'timestamp': 1692962814.0,
1123 'upload_date': '20230825',
1124 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1125 'like_count': int,
1126 'repost_count': int,
1c54a98e 1127 '_old_archive_ids': ['twitter 1695424220702888009'],
a006ce2b 1128 },
1129 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1130 }, {
1131 # Broadcast embedded in tweet
1c54a98e 1132 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
a006ce2b 1133 'info_dict': {
1c54a98e 1134 'id': '1rmxPMjLzAXKN',
a006ce2b 1135 'ext': 'mp4',
1c54a98e 1136 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
a006ce2b 1137 'uploader': 'Jessica Dobson',
1c54a98e 1138 'uploader_id': 'JessicaDobsonWX',
1139 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1140 'timestamp': 1701566398,
1141 'upload_date': '20231203',
1142 'live_status': 'was_live',
1143 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1144 'concurrent_view_count': int,
a006ce2b 1145 'view_count': int,
1146 },
1147 'add_ie': ['TwitterBroadcast'],
1148 }, {
55f18333 1149 # Animated gif and quote tweet video
a006ce2b 1150 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1151 'playlist_mincount': 2,
1152 'info_dict': {
1153 'id': '1696256659889565950',
1154 'title': 'BAKOON - https://t.co/zom968d0a0',
1155 'description': 'https://t.co/zom968d0a0',
1156 'tags': [],
55f18333 1157 'channel_id': '1263540390',
a006ce2b 1158 'uploader': 'BAKOON',
1159 'uploader_id': 'BAKKOOONN',
1160 'uploader_url': 'https://twitter.com/BAKKOOONN',
1161 'age_limit': 18,
1162 'timestamp': 1693254077.0,
1163 'upload_date': '20230828',
1164 'like_count': int,
55f18333 1165 'comment_count': int,
1166 'repost_count': int,
a006ce2b 1167 },
55f18333 1168 'skip': 'Requires authentication',
1c54a98e 1169 }, {
1170 # "stale tweet" with typename "TweetWithVisibilityResults"
1171 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
55f18333 1172 'md5': '511377ff8dfa7545307084dca4dce319',
1c54a98e 1173 'info_dict': {
1174 'id': '1724883339285544960',
1175 'ext': 'mp4',
1176 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1177 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1178 'display_id': '1724884212803834154',
55f18333 1179 'channel_id': '337808606',
1c54a98e 1180 'uploader': 'Robert F. Kennedy Jr',
1181 'uploader_id': 'RobertKennedyJr',
1182 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1183 'upload_date': '20231115',
1184 'timestamp': 1700079417.0,
1185 'duration': 341.048,
1186 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1187 'tags': ['Kennedy24'],
1188 'repost_count': int,
1189 'like_count': int,
1190 'comment_count': int,
1191 'age_limit': 0,
1192 '_old_archive_ids': ['twitter 1724884212803834154'],
1193 },
82fb2357 1194 }, {
1195 # onion route
1196 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1197 'only_matching': True,
18ca61c5
RA
1198 }, {
1199 # Twitch Clip Embed
1200 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1201 'only_matching': True,
10a5091e
RA
1202 }, {
1203 # promo_video_website card
1204 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1205 'only_matching': True,
00dd0cd5 1206 }, {
1207 # promo_video_convo card
1208 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1209 'only_matching': True,
1210 }, {
1211 # appplayer card
1212 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1213 'only_matching': True,
30a074c2 1214 }, {
1215 # video_direct_message card
1216 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1217 'only_matching': True,
1218 }, {
1219 # poll2choice_video card
1220 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1221 'only_matching': True,
1222 }, {
1223 # poll3choice_video card
1224 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1225 'only_matching': True,
1226 }, {
1227 # poll4choice_video card
1228 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1229 'only_matching': True,
cf5881fc 1230 }]
f57f84f6 1231
a006ce2b 1232 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1233
1234 @property
1235 def _GRAPHQL_ENDPOINT(self):
1236 if self.is_logged_in:
1237 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1238 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1239
7a26ce26
SS
1240 def _graphql_to_legacy(self, data, twid):
1241 result = traverse_obj(data, (
1242 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1243 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
92315c03 1244 'tweet_results', 'result', ('tweet', None), {dict},
1245 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1246 data, ('tweetResult', 'result', {dict}), default={})
7a26ce26 1247
1c54a98e 1248 typename = result.get('__typename')
1249 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1250 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
7543c9c9 1251
7a26ce26 1252 if 'tombstone' in result:
147e62fc 1253 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
7a26ce26 1254 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1c54a98e 1255 elif typename == 'TweetUnavailable':
92315c03 1256 reason = result.get('reason')
1257 if reason == 'NsfwLoggedOut':
1258 self.raise_login_required('NSFW tweet requires authentication')
6014355c 1259 elif reason == 'Protected':
1260 self.raise_login_required('You are not authorized to view this protected tweet')
92315c03 1261 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1c54a98e 1262 # Result for "stale tweet" needs additional transformation
1263 elif typename == 'TweetWithVisibilityResults':
1264 result = traverse_obj(result, ('tweet', {dict})) or {}
7a26ce26
SS
1265
1266 status = result.get('legacy', {})
1267 status.update(traverse_obj(result, {
1268 'user': ('core', 'user_results', 'result', 'legacy'),
1269 'card': ('card', 'legacy'),
1270 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
a006ce2b 1271 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
7a26ce26
SS
1272 }, expected_type=dict, default={}))
1273
a006ce2b 1274 # extra transformations needed since result does not match legacy format
1275 if status.get('retweeted_status'):
1276 status['retweeted_status']['user'] = traverse_obj(status, (
1277 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1278
7a26ce26
SS
1279 binding_values = {
1280 binding_value.get('key'): binding_value.get('value')
147e62fc 1281 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
7a26ce26
SS
1282 }
1283 if binding_values:
1284 status['card']['binding_values'] = binding_values
1285
1286 return status
1287
1288 def _build_graphql_query(self, media_id):
1289 return {
1290 'variables': {
1291 'focalTweetId': media_id,
1292 'includePromotedContent': True,
1293 'with_rux_injections': False,
1294 'withBirdwatchNotes': True,
1295 'withCommunity': True,
1296 'withDownvotePerspective': False,
1297 'withQuickPromoteEligibilityTweetFields': True,
1298 'withReactionsMetadata': False,
1299 'withReactionsPerspective': False,
1300 'withSuperFollowsTweetFields': True,
1301 'withSuperFollowsUserFields': True,
1302 'withV2Timeline': True,
1303 'withVoice': True,
1304 },
1305 'features': {
1306 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1307 'interactive_text_enabled': True,
1308 'responsive_web_edit_tweet_api_enabled': True,
1309 'responsive_web_enhance_cards_enabled': True,
1310 'responsive_web_graphql_timeline_navigation_enabled': False,
1311 'responsive_web_text_conversations_enabled': False,
1312 'responsive_web_uc_gql_enabled': True,
1313 'standardized_nudges_misinfo': True,
1314 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1315 'tweetypie_unmention_optimization_enabled': True,
1316 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1317 'verified_phone_label_enabled': False,
1318 'vibe_api_enabled': True,
1319 },
92315c03 1320 } if self.is_logged_in else {
1321 'variables': {
1322 'tweetId': media_id,
1323 'withCommunity': False,
1324 'includePromotedContent': False,
1325 'withVoice': False,
1326 },
1327 'features': {
1328 'creator_subscriptions_tweet_preview_api_enabled': True,
1329 'tweetypie_unmention_optimization_enabled': True,
1330 'responsive_web_edit_tweet_api_enabled': True,
1331 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1332 'view_counts_everywhere_api_enabled': True,
1333 'longform_notetweets_consumption_enabled': True,
1334 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1335 'tweet_awards_web_tipping_enabled': False,
1336 'freedom_of_speech_not_reach_fetch_enabled': True,
1337 'standardized_nudges_misinfo': True,
1338 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1339 'longform_notetweets_rich_text_read_enabled': True,
1340 'longform_notetweets_inline_media_enabled': True,
1341 'responsive_web_graphql_exclude_directive_enabled': True,
1342 'verified_phone_label_enabled': False,
1343 'responsive_web_media_download_video_enabled': False,
1344 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1345 'responsive_web_graphql_timeline_navigation_enabled': True,
1346 'responsive_web_enhance_cards_enabled': False
1347 },
1348 'fieldToggles': {
1349 'withArticleRichContentState': False
1350 }
7a26ce26
SS
1351 }
1352
116c2684 1353 def _call_syndication_api(self, twid):
1354 self.report_warning(
1355 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1356 status = self._download_json(
1357 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1358 headers={'User-Agent': 'Googlebot'}, query={
1359 'id': twid,
1360 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1361 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
a006ce2b 1362 })
116c2684 1363 if not status:
1364 raise ExtractorError('Syndication endpoint returned empty JSON response')
1365 # Transform the result so its structure matches that of legacy/graphql
1366 media = []
1367 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1368 detail['id_str'] = traverse_obj(detail, (
1369 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1370 media.append(detail)
1371 status['extended_entities'] = {'media': media}
1372
1373 return status
6014355c 1374
116c2684 1375 def _extract_status(self, twid):
1376 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1377 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1378
1379 try:
1380 if self.is_logged_in or self._selected_api == 'graphql':
1381 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1382 elif self._selected_api == 'legacy':
1383 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1384 'cards_platform': 'Web-12',
1385 'include_cards': 1,
1386 'include_reply_count': 1,
1387 'include_user_entities': 0,
1388 'tweet_mode': 'extended',
a006ce2b 1389 })
116c2684 1390 except ExtractorError as e:
1391 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1392 raise
1393 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1394 status = self._call_syndication_api(twid)
6014355c 1395
116c2684 1396 if self._selected_api == 'syndication':
1397 status = self._call_syndication_api(twid)
a006ce2b 1398
1399 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
6014355c 1400
1401 def _real_extract(self, url):
1402 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1403 status = self._extract_status(twid)
575036b4 1404
92315c03 1405 title = description = traverse_obj(
1406 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
575036b4 1407 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 1408 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
1409 user = status.get('user') or {}
1410 uploader = user.get('name')
1411 if uploader:
7a26ce26 1412 title = f'{uploader} - {title}'
18ca61c5
RA
1413 uploader_id = user.get('screen_name')
1414
cf5881fc 1415 info = {
18ca61c5
RA
1416 'id': twid,
1417 'title': title,
1418 'description': description,
1419 'uploader': uploader,
1420 'timestamp': unified_timestamp(status.get('created_at')),
55f18333 1421 'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
18ca61c5 1422 'uploader_id': uploader_id,
a70635b8 1423 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
1424 'like_count': int_or_none(status.get('favorite_count')),
1425 'repost_count': int_or_none(status.get('retweet_count')),
1426 'comment_count': int_or_none(status.get('reply_count')),
1427 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 1428 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 1429 }
cf5881fc 1430
30a074c2 1431 def extract_from_video_info(media):
a006ce2b 1432 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
13b2ae29 1433 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
1434
1435 formats = []
4bed4363 1436 subtitles = {}
92315c03 1437 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
4bed4363
F
1438 fmts, subs = self._extract_variant_formats(variant, twid)
1439 subtitles = self._merge_subtitles(subtitles, subs)
1440 formats.extend(fmts)
18ca61c5
RA
1441
1442 thumbnails = []
1443 media_url = media.get('media_url_https') or media.get('media_url')
1444 if media_url:
1445 def add_thumbnail(name, size):
1446 thumbnails.append({
1447 'id': name,
1448 'url': update_url_query(media_url, {'name': name}),
1449 'width': int_or_none(size.get('w') or size.get('width')),
1450 'height': int_or_none(size.get('h') or size.get('height')),
1451 })
1452 for name, size in media.get('sizes', {}).items():
1453 add_thumbnail(name, size)
1454 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1455
13b2ae29 1456 return {
b03fa783 1457 'id': media_id,
18ca61c5 1458 'formats': formats,
4bed4363 1459 'subtitles': subtitles,
18ca61c5 1460 'thumbnails': thumbnails,
1c54a98e 1461 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
92315c03 1462 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
e7d22348 1463 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1464 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
13b2ae29 1465 }
30a074c2 1466
13b2ae29
SS
1467 def extract_from_card_info(card):
1468 if not card:
1469 return
1470
1471 self.write_debug(f'Extracting from card info: {card.get("url")}')
1472 binding_values = card['binding_values']
1473
1474 def get_binding_value(k):
1475 o = binding_values.get(k) or {}
1476 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1477
1478 card_name = card['name'].split(':')[-1]
1479 if card_name == 'player':
7a26ce26 1480 yield {
13b2ae29
SS
1481 '_type': 'url',
1482 'url': get_binding_value('player_url'),
1483 }
1484 elif card_name == 'periscope_broadcast':
7a26ce26 1485 yield {
13b2ae29
SS
1486 '_type': 'url',
1487 'url': get_binding_value('url') or get_binding_value('player_url'),
1488 'ie_key': PeriscopeIE.ie_key(),
1489 }
1490 elif card_name == 'broadcast':
7a26ce26 1491 yield {
13b2ae29
SS
1492 '_type': 'url',
1493 'url': get_binding_value('broadcast_url'),
1494 'ie_key': TwitterBroadcastIE.ie_key(),
1495 }
7a26ce26
SS
1496 elif card_name == 'audiospace':
1497 yield {
1498 '_type': 'url',
1499 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1500 'ie_key': TwitterSpacesIE.ie_key(),
1501 }
13b2ae29 1502 elif card_name == 'summary':
7a26ce26 1503 yield {
18ca61c5 1504 '_type': 'url',
13b2ae29
SS
1505 'url': get_binding_value('card_url'),
1506 }
1507 elif card_name == 'unified_card':
7a26ce26
SS
1508 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1509 yield from map(extract_from_video_info, traverse_obj(
1510 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1511 # amplify, promo_video_website, promo_video_convo, appplayer,
1512 # video_direct_message, poll2choice_video, poll3choice_video,
1513 # poll4choice_video, ...
1514 else:
1515 is_amplify = card_name == 'amplify'
1516 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1517 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1518 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1519
1520 thumbnails = []
1521 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1522 image = get_binding_value('player_image' + suffix) or {}
1523 image_url = image.get('url')
1524 if not image_url or '/player-placeholder' in image_url:
1525 continue
1526 thumbnails.append({
1527 'id': suffix[1:] if suffix else 'medium',
1528 'url': image_url,
1529 'width': int_or_none(image.get('width')),
1530 'height': int_or_none(image.get('height')),
1531 })
1532
7a26ce26 1533 yield {
13b2ae29
SS
1534 'formats': formats,
1535 'subtitles': subtitles,
1536 'thumbnails': thumbnails,
1537 'duration': int_or_none(get_binding_value(
1538 'content_duration_seconds')),
1539 }
1540
b6795fd3 1541 videos = traverse_obj(status, (
b03fa783 1542 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1543
b6795fd3
SS
1544 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1545 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1546 else:
92315c03 1547 desired_obj = traverse_obj(status, (
1548 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
b6795fd3
SS
1549 if not desired_obj:
1550 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1551 elif desired_obj.get('type') != 'video':
1552 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1553
1554 # Restore original archive id and video index in title
1555 for index, entry in enumerate(videos, 1):
1556 if entry.get('id') != desired_obj.get('id'):
1557 continue
1558 if index == 1:
1559 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1560 if len(videos) != 1:
1561 info['title'] += f' #{index}'
1562 break
1563
1564 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1565
1566 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1567 if not entries:
1568 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1569 if not expanded_url or expanded_url == url:
147e62fc 1570 self.raise_no_formats('No video could be found in this tweet', expected=True)
1571 return info
13b2ae29
SS
1572
1573 return self.url_result(expanded_url, display_id=twid, **info)
1574
1575 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1576
1577 if len(entries) == 1:
1578 return entries[0]
1579
1580 for index, entry in enumerate(entries, 1):
1581 entry['title'] += f' #{index}'
1582
1583 return self.playlist_result(entries, **info)
445d72b8
YCH
1584
1585
1586class TwitterAmplifyIE(TwitterBaseIE):
1587 IE_NAME = 'twitter:amplify'
25042f73 1588 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1589
1590 _TEST = {
1591 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1592 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1593 'info_dict': {
1594 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1595 'ext': 'mp4',
1596 'title': 'Twitter Video',
bdbf4ba4 1597 'thumbnail': 're:^https?://.*',
445d72b8 1598 },
7a26ce26 1599 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1600 }
1601
1602 def _real_extract(self, url):
1603 video_id = self._match_id(url)
1604 webpage = self._download_webpage(url, video_id)
1605
1606 vmap_url = self._html_search_meta(
1607 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1608 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1609
bdbf4ba4
YCH
1610 thumbnails = []
1611 thumbnail = self._html_search_meta(
1612 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1613
1614 def _find_dimension(target):
1615 w = int_or_none(self._html_search_meta(
1616 'twitter:%s:width' % target, webpage, fatal=False))
1617 h = int_or_none(self._html_search_meta(
1618 'twitter:%s:height' % target, webpage, fatal=False))
1619 return w, h
1620
1621 if thumbnail:
1622 thumbnail_w, thumbnail_h = _find_dimension('image')
1623 thumbnails.append({
1624 'url': thumbnail,
1625 'width': thumbnail_w,
1626 'height': thumbnail_h,
1627 })
1628
1629 video_w, video_h = _find_dimension('player')
9be31e77 1630 formats[0].update({
bdbf4ba4
YCH
1631 'width': video_w,
1632 'height': video_h,
9be31e77 1633 })
bdbf4ba4 1634
445d72b8
YCH
1635 return {
1636 'id': video_id,
1637 'title': 'Twitter Video',
bdbf4ba4
YCH
1638 'formats': formats,
1639 'thumbnails': thumbnails,
445d72b8 1640 }
18ca61c5
RA
1641
1642
1643class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1644 IE_NAME = 'twitter:broadcast'
1645 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1646
7d337ca9 1647 _TESTS = [{
7b0b53ea
S
1648 # untitled Periscope video
1649 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1650 'info_dict': {
1651 'id': '1yNGaQLWpejGj',
1652 'ext': 'mp4',
1653 'title': 'Andrea May Sahouri - Periscope Broadcast',
1654 'uploader': 'Andrea May Sahouri',
7d337ca9
H
1655 'uploader_id': 'andreamsahouri',
1656 'uploader_url': 'https://twitter.com/andreamsahouri',
1657 'timestamp': 1590973638,
1658 'upload_date': '20200601',
7a26ce26
SS
1659 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1660 'view_count': int,
7b0b53ea 1661 },
7d337ca9
H
1662 }, {
1663 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1664 'info_dict': {
1665 'id': '1ZkKzeyrPbaxv',
1666 'ext': 'mp4',
1667 'title': 'Starship | SN10 | High-Altitude Flight Test',
1668 'uploader': 'SpaceX',
1669 'uploader_id': 'SpaceX',
1670 'uploader_url': 'https://twitter.com/SpaceX',
1671 'timestamp': 1614812942,
1672 'upload_date': '20210303',
1673 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1674 'view_count': int,
1675 },
1676 }, {
1677 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1678 'info_dict': {
1679 'id': '1OyKAVQrgzwGb',
1680 'ext': 'mp4',
1681 'title': 'Starship Flight Test',
1682 'uploader': 'SpaceX',
1683 'uploader_id': 'SpaceX',
1684 'uploader_url': 'https://twitter.com/SpaceX',
1685 'timestamp': 1681993964,
1686 'upload_date': '20230420',
1687 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1688 'view_count': int,
1689 },
1690 }]
7b0b53ea 1691
18ca61c5
RA
1692 def _real_extract(self, url):
1693 broadcast_id = self._match_id(url)
1694 broadcast = self._call_api(
1695 'broadcasts/show.json', broadcast_id,
1696 {'ids': broadcast_id})['broadcasts'][broadcast_id]
a006ce2b 1697 if not broadcast:
1698 raise ExtractorError('Broadcast no longer exists', expected=True)
18ca61c5 1699 info = self._parse_broadcast_data(broadcast, broadcast_id)
7d337ca9
H
1700 info['title'] = broadcast.get('status') or info.get('title')
1701 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1702 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
f6e97090 1703 if info['live_status'] == 'is_upcoming':
1704 return info
1705
18ca61c5
RA
1706 media_key = broadcast['media_key']
1707 source = self._call_api(
7a26ce26 1708 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1709 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1710 if '/live_video_stream/geoblocked/' in m3u8_url:
1711 self.raise_geo_restricted()
1712 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1713 m3u8_url).query).get('type', [None])[0]
1714 state, width, height = self._extract_common_format_info(broadcast)
1715 info['formats'] = self._extract_pscp_m3u8_formats(
1716 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1717 return info
86b868c6
U
1718
1719
7a26ce26
SS
1720class TwitterSpacesIE(TwitterBaseIE):
1721 IE_NAME = 'twitter:spaces'
1722 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1723
1724 _TESTS = [{
1725 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1726 'info_dict': {
1727 'id': '1RDxlgyvNXzJL',
1728 'ext': 'm4a',
1729 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1730 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1731 'uploader': r're:Lucio Di Gaetano.*?',
1732 'uploader_id': 'luciodigaetano',
1733 'live_status': 'was_live',
1cffd621 1734 'timestamp': 1659877956,
1735 'upload_date': '20220807',
1736 'release_timestamp': 1659904215,
1737 'release_date': '20220807',
7a26ce26
SS
1738 },
1739 'params': {'skip_download': 'm3u8'},
613dbce1 1740 }, {
1741 # post_live/TimedOut but downloadable
1742 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1743 'info_dict': {
1744 'id': '1vAxRAVQWONJl',
1745 'ext': 'm4a',
1746 'title': 'Framing Up FinOps: Billing Tools',
1747 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1748 'uploader': 'Google Cloud',
1749 'uploader_id': 'googlecloud',
1750 'live_status': 'post_live',
1751 'timestamp': 1681409554,
1752 'upload_date': '20230413',
1753 'release_timestamp': 1681839000,
1754 'release_date': '20230418',
1755 },
1756 'params': {'skip_download': 'm3u8'},
1757 }, {
1758 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1759 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1760 'info_dict': {
1761 'id': '1eaKbrQbjoRKX',
1762 'ext': 'm4a',
1763 'title': 'あ',
1764 'description': 'Twitter Space participated by nobody yet',
1765 'uploader': '息根とめる🔪Twitchで復活',
1766 'uploader_id': 'tomeru_ikinone',
1767 'live_status': 'was_live',
1768 'timestamp': 1685617198,
1769 'upload_date': '20230601',
1770 },
1771 'params': {'skip_download': 'm3u8'},
7a26ce26
SS
1772 }]
1773
1774 SPACE_STATUS = {
1775 'notstarted': 'is_upcoming',
1776 'ended': 'was_live',
1777 'running': 'is_live',
1778 'timedout': 'post_live',
1779 }
1780
1781 def _build_graphql_query(self, space_id):
1782 return {
1783 'variables': {
1784 'id': space_id,
1785 'isMetatagsQuery': True,
1786 'withDownvotePerspective': False,
1787 'withReactionsMetadata': False,
1788 'withReactionsPerspective': False,
1789 'withReplays': True,
1790 'withSuperFollowsUserFields': True,
1791 'withSuperFollowsTweetFields': True,
1792 },
1793 'features': {
1794 'dont_mention_me_view_api_enabled': True,
1795 'interactive_text_enabled': True,
1796 'responsive_web_edit_tweet_api_enabled': True,
1797 'responsive_web_enhance_cards_enabled': True,
1798 'responsive_web_uc_gql_enabled': True,
1799 'spaces_2022_h2_clipping': True,
1800 'spaces_2022_h2_spaces_communities': False,
1801 'standardized_nudges_misinfo': True,
1802 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1803 'vibe_api_enabled': True,
1804 },
1805 }
1806
1807 def _real_extract(self, url):
1808 space_id = self._match_id(url)
92315c03 1809 if not self.is_logged_in:
1810 self.raise_login_required('Twitter Spaces require authentication')
7a26ce26
SS
1811 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1812 if not space_data:
1813 raise ExtractorError('Twitter Space not found', expected=True)
1814
1815 metadata = space_data['metadata']
1816 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1cffd621 1817 is_live = live_status == 'is_live'
7a26ce26
SS
1818
1819 formats = []
c6ef5537 1820 headers = {'Referer': 'https://twitter.com/'}
7a26ce26
SS
1821 if live_status == 'is_upcoming':
1822 self.raise_no_formats('Twitter Space not started yet', expected=True)
1cffd621 1823 elif not is_live and not metadata.get('is_space_available_for_replay'):
1824 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1825 elif metadata.get('media_key'):
1826 source = traverse_obj(
1827 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1828 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
613dbce1 1829 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1830 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
c6ef5537 1831 headers=headers, fatal=False) if source else []
7a26ce26
SS
1832 for fmt in formats:
1833 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1cffd621 1834 if not is_live:
1835 fmt['container'] = 'm4a_dash'
7a26ce26
SS
1836
1837 participants = ', '.join(traverse_obj(
1838 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1cffd621 1839
1840 if not formats and live_status == 'post_live':
1841 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1842
7a26ce26
SS
1843 return {
1844 'id': space_id,
1845 'title': metadata.get('title'),
1846 'description': f'Twitter Space participated by {participants}',
1847 'uploader': traverse_obj(
1848 metadata, ('creator_results', 'result', 'legacy', 'name')),
1849 'uploader_id': traverse_obj(
1850 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1851 'live_status': live_status,
1c16d9df
C
1852 'release_timestamp': try_call(
1853 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1cffd621 1854 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
7a26ce26 1855 'formats': formats,
c6ef5537 1856 'http_headers': headers,
7a26ce26
SS
1857 }
1858
1859
86b868c6
U
1860class TwitterShortenerIE(TwitterBaseIE):
1861 IE_NAME = 'twitter:shortener'
b634ba74 1862 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
a537ab1a 1863 _BASE_URL = 'https://t.co/'
86b868c6
U
1864
1865 def _real_extract(self, url):
5ad28e7f 1866 mobj = self._match_valid_url(url)
a537ab1a
U
1867 eid, id = mobj.group('eid', 'id')
1868 if eid:
1869 id = eid
1870 url = self._BASE_URL + id
3d2623a8 1871 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
a537ab1a
U
1872 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1873 if new_url.startswith(__UNSAFE_LINK):
1874 new_url = new_url.replace(__UNSAFE_LINK, "")
9e20a9c4 1875 return self.url_result(new_url)