]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[ie/twitter] Fix retweet extraction and syndication API (#8016)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
7a26ce26 1import json
a006ce2b 2import random
23e7cba8
S
3import re
4
5from .common import InfoExtractor
13b2ae29 6from .periscope import PeriscopeBaseIE, PeriscopeIE
a006ce2b 7from ..compat import functools # isort: split
18ca61c5 8from ..compat import (
18ca61c5
RA
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12)
23e7cba8 13from ..utils import (
2edfd745 14 ExtractorError,
13b2ae29 15 dict_get,
92315c03 16 filter_dict,
23e7cba8 17 float_or_none,
13b2ae29 18 format_field,
cf5881fc 19 int_or_none,
13b2ae29 20 make_archive_id,
147e62fc 21 remove_end,
13b2ae29
SS
22 str_or_none,
23 strip_or_none,
f1150b9e 24 traverse_obj,
7a26ce26 25 try_call,
2edfd745 26 try_get,
18ca61c5
RA
27 unified_timestamp,
28 update_url_query,
41d1cca3 29 url_or_none,
2edfd745 30 xpath_text,
23e7cba8
S
31)
32
33
445d72b8 34class TwitterBaseIE(InfoExtractor):
d1795f4a 35 _NETRC_MACHINE = 'twitter'
18ca61c5 36 _API_BASE = 'https://api.twitter.com/1.1/'
7a26ce26 37 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
82fb2357 38 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
92315c03 39 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
40 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
d1795f4a 41 _flow_token = None
42
43 _LOGIN_INIT_DATA = json.dumps({
44 'input_flow_data': {
45 'flow_context': {
46 'debug_overrides': {},
47 'start_location': {
48 'location': 'unknown'
49 }
50 }
51 },
52 'subtask_versions': {
53 'action_list': 2,
54 'alert_dialog': 1,
55 'app_download_cta': 1,
56 'check_logged_in_account': 1,
57 'choice_selection': 3,
58 'contacts_live_sync_permission_prompt': 0,
59 'cta': 7,
60 'email_verification': 2,
61 'end_flow': 1,
62 'enter_date': 1,
63 'enter_email': 2,
64 'enter_password': 5,
65 'enter_phone': 2,
66 'enter_recaptcha': 1,
67 'enter_text': 5,
68 'enter_username': 2,
69 'generic_urt': 3,
70 'in_app_notification': 1,
71 'interest_picker': 3,
72 'js_instrumentation': 1,
73 'menu_dialog': 1,
74 'notifications_permission_prompt': 2,
75 'open_account': 2,
76 'open_home_timeline': 1,
77 'open_link': 1,
78 'phone_verification': 4,
79 'privacy_options': 1,
80 'security_key': 3,
81 'select_avatar': 4,
82 'select_banner': 2,
83 'settings_list': 7,
84 'show_code': 1,
85 'sign_up': 2,
86 'sign_up_review': 4,
87 'tweet_selection_urt': 1,
88 'update_users': 1,
89 'upload_media': 1,
90 'user_recommendations_list': 4,
91 'user_recommendations_urt': 1,
92 'wait_spinner': 3,
93 'web_modal': 1
94 }
95 }, separators=(',', ':')).encode()
18ca61c5
RA
96
97 def _extract_variant_formats(self, variant, video_id):
98 variant_url = variant.get('url')
99 if not variant_url:
4bed4363 100 return [], {}
18ca61c5 101 elif '.m3u8' in variant_url:
4bed4363 102 return self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
103 variant_url, video_id, 'mp4', 'm3u8_native',
104 m3u8_id='hls', fatal=False)
105 else:
106 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
107 f = {
108 'url': variant_url,
109 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
110 'tbr': tbr,
111 }
112 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 113 return [f], {}
18ca61c5 114
9be31e77 115 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 116 vmap_url = url_or_none(vmap_url)
117 if not vmap_url:
f1150b9e 118 return [], {}
445d72b8 119 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 120 formats = []
4bed4363 121 subtitles = {}
18ca61c5
RA
122 urls = []
123 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
124 video_variant.attrib['url'] = compat_urllib_parse_unquote(
125 video_variant.attrib['url'])
126 urls.append(video_variant.attrib['url'])
4bed4363
F
127 fmts, subs = self._extract_variant_formats(
128 video_variant.attrib, video_id)
129 formats.extend(fmts)
130 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
131 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
132 if video_url not in urls:
4bed4363
F
133 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
134 formats.extend(fmts)
135 subtitles = self._merge_subtitles(subtitles, subs)
136 return formats, subtitles
445d72b8 137
2edfd745
YCH
138 @staticmethod
139 def _search_dimensions_in_video_url(a_format, video_url):
140 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
141 if m:
142 a_format.update({
143 'width': int(m.group('width')),
144 'height': int(m.group('height')),
145 })
146
d1795f4a 147 @property
7a26ce26
SS
148 def is_logged_in(self):
149 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
150
a006ce2b 151 @functools.cached_property
152 def _selected_api(self):
153 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
154
92315c03 155 def _fetch_guest_token(self, display_id):
156 guest_token = traverse_obj(self._download_json(
157 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
a006ce2b 158 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
92315c03 159 ('guest_token', {str}))
160 if not guest_token:
b03fa783 161 raise ExtractorError('Could not retrieve guest token')
92315c03 162 return guest_token
b03fa783 163
92315c03 164 def _set_base_headers(self, legacy=False):
165 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
166 return filter_dict({
167 'Authorization': f'Bearer {bearer_token}',
168 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
169 })
d1795f4a 170
171 def _call_login_api(self, note, headers, query={}, data=None):
172 response = self._download_json(
173 f'{self._API_BASE}onboarding/task.json', None, note,
174 headers=headers, query=query, data=data, expected_status=400)
175 error = traverse_obj(response, ('errors', 0, 'message', {str}))
176 if error:
177 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
178 elif traverse_obj(response, 'status') != 'success':
179 raise ExtractorError('Login was unsuccessful')
180
181 subtask = traverse_obj(
182 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
183 if not subtask:
184 raise ExtractorError('Twitter API did not return next login subtask')
185
186 self._flow_token = response['flow_token']
7a26ce26 187
d1795f4a 188 return subtask
189
190 def _perform_login(self, username, password):
191 if self.is_logged_in:
192 return
193
92315c03 194 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
195 guest_token = self._search_regex(
196 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
197 headers = {
198 **self._set_base_headers(),
d1795f4a 199 'content-type': 'application/json',
92315c03 200 'x-guest-token': guest_token,
d1795f4a 201 'x-twitter-client-language': 'en',
202 'x-twitter-active-user': 'yes',
203 'Referer': 'https://twitter.com/',
204 'Origin': 'https://twitter.com',
92315c03 205 }
d1795f4a 206
207 def build_login_json(*subtask_inputs):
208 return json.dumps({
209 'flow_token': self._flow_token,
210 'subtask_inputs': subtask_inputs
211 }, separators=(',', ':')).encode()
212
213 def input_dict(subtask_id, text):
214 return {
215 'subtask_id': subtask_id,
216 'enter_text': {
217 'text': text,
218 'link': 'next_link'
219 }
220 }
7a26ce26 221
d1795f4a 222 next_subtask = self._call_login_api(
223 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
224
225 while not self.is_logged_in:
226 if next_subtask == 'LoginJsInstrumentationSubtask':
227 next_subtask = self._call_login_api(
228 'Submitting JS instrumentation response', headers, data=build_login_json({
229 'subtask_id': next_subtask,
230 'js_instrumentation': {
231 'response': '{}',
232 'link': 'next_link'
233 }
234 }))
235
236 elif next_subtask == 'LoginEnterUserIdentifierSSO':
237 next_subtask = self._call_login_api(
238 'Submitting username', headers, data=build_login_json({
239 'subtask_id': next_subtask,
240 'settings_list': {
241 'setting_responses': [{
242 'key': 'user_identifier',
243 'response_data': {
244 'text_data': {
245 'result': username
246 }
247 }
248 }],
249 'link': 'next_link'
250 }
251 }))
252
253 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
254 next_subtask = self._call_login_api(
255 'Submitting alternate identifier', headers,
256 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
257 'one of username, phone number or email that was not used as --username'))))
258
259 elif next_subtask == 'LoginEnterPassword':
260 next_subtask = self._call_login_api(
261 'Submitting password', headers, data=build_login_json({
262 'subtask_id': next_subtask,
263 'enter_password': {
264 'password': password,
265 'link': 'next_link'
266 }
267 }))
268
269 elif next_subtask == 'AccountDuplicationCheck':
270 next_subtask = self._call_login_api(
271 'Submitting account duplication check', headers, data=build_login_json({
272 'subtask_id': next_subtask,
273 'check_logged_in_account': {
274 'link': 'AccountDuplicationCheck_false'
275 }
276 }))
277
278 elif next_subtask == 'LoginTwoFactorAuthChallenge':
279 next_subtask = self._call_login_api(
280 'Submitting 2FA token', headers, data=build_login_json(input_dict(
281 next_subtask, self._get_tfa_info('two-factor authentication token'))))
282
283 elif next_subtask == 'LoginAcid':
284 next_subtask = self._call_login_api(
285 'Submitting confirmation code', headers, data=build_login_json(input_dict(
286 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
287
6014355c 288 elif next_subtask == 'ArkoseLogin':
289 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
290
291 elif next_subtask == 'DenyLoginSubtask':
292 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
293
d1795f4a 294 elif next_subtask == 'LoginSuccessSubtask':
295 raise ExtractorError('Twitter API did not grant auth token cookie')
296
297 else:
298 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
299
300 self.report_login()
301
302 def _call_api(self, path, video_id, query={}, graphql=False):
a006ce2b 303 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
92315c03 304 headers.update({
305 'x-twitter-auth-type': 'OAuth2Session',
306 'x-twitter-client-language': 'en',
307 'x-twitter-active-user': 'yes',
308 } if self.is_logged_in else {
309 'x-guest-token': self._fetch_guest_token(video_id)
310 })
311 allowed_status = {400, 401, 403, 404} if graphql else {403}
312 result = self._download_json(
313 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
314 video_id, headers=headers, query=query, expected_status=allowed_status,
315 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
b03fa783 316
92315c03 317 if result.get('errors'):
318 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
6014355c 319 if errors and 'not authorized' in errors:
320 self.raise_login_required(remove_end(errors, '.'))
321 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
b03fa783 322
92315c03 323 return result
7a26ce26
SS
324
325 def _build_graphql_query(self, media_id):
326 raise NotImplementedError('Method must be implemented to support GraphQL')
327
328 def _call_graphql_api(self, endpoint, media_id):
329 data = self._build_graphql_query(media_id)
330 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
331 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
332
333
334class TwitterCardIE(InfoExtractor):
014e8803 335 IE_NAME = 'twitter:card'
18ca61c5 336 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 337 _TESTS = [
338 {
339 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 340 # MD5 checksums are different in different places
c3dea3f8 341 'info_dict': {
7a26ce26 342 'id': '560070131976392705',
c3dea3f8 343 'ext': 'mp4',
18ca61c5
RA
344 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
345 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
346 'uploader': 'Twitter',
347 'uploader_id': 'Twitter',
348 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 349 'duration': 30.033,
18ca61c5
RA
350 'timestamp': 1422366112,
351 'upload_date': '20150127',
7a26ce26
SS
352 'age_limit': 0,
353 'comment_count': int,
354 'tags': [],
355 'repost_count': int,
356 'like_count': int,
357 'display_id': '560070183650213889',
358 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 359 },
23e7cba8 360 },
c3dea3f8 361 {
362 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 363 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 364 'info_dict': {
365 'id': '623160978427936768',
366 'ext': 'mp4',
18ca61c5
RA
367 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
368 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
369 'uploader': 'NASA',
370 'uploader_id': 'NASA',
371 'timestamp': 1437408129,
372 'upload_date': '20150720',
7a26ce26
SS
373 'uploader_url': 'https://twitter.com/NASA',
374 'age_limit': 0,
375 'comment_count': int,
376 'like_count': int,
377 'repost_count': int,
378 'tags': ['PlutoFlyby'],
c3dea3f8 379 },
7a26ce26 380 'params': {'format': '[protocol=https]'}
4a7b7903
YCH
381 },
382 {
383 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 384 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
385 'info_dict': {
386 'id': 'dq4Oj5quskI',
387 'ext': 'mp4',
388 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 389 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 390 'upload_date': '20111013',
18ca61c5 391 'uploader': 'OMG! UBUNTU!',
4a7b7903 392 'uploader_id': 'omgubuntu',
7a26ce26
SS
393 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
394 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
395 'channel_follower_count': int,
396 'chapters': 'count:8',
397 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
398 'duration': 138,
399 'categories': ['Film & Animation'],
400 'age_limit': 0,
401 'comment_count': int,
402 'availability': 'public',
403 'like_count': int,
404 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
405 'view_count': int,
406 'tags': 'count:12',
407 'channel': 'OMG! UBUNTU!',
408 'playable_in_embed': True,
4a7b7903 409 },
31752f76 410 'add_ie': ['Youtube'],
5f1b2aea
YCH
411 },
412 {
413 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
414 'info_dict': {
415 'id': 'iBb2x00UVlv',
416 'ext': 'mp4',
417 'upload_date': '20151113',
418 'uploader_id': '1189339351084113920',
acb6e97e
YCH
419 'uploader': 'ArsenalTerje',
420 'title': 'Vine by ArsenalTerje',
e8f20ffa 421 'timestamp': 1447451307,
7a26ce26
SS
422 'alt_title': 'Vine by ArsenalTerje',
423 'comment_count': int,
424 'like_count': int,
425 'thumbnail': r're:^https?://[^?#]+\.jpg',
426 'view_count': int,
427 'repost_count': int,
5f1b2aea
YCH
428 },
429 'add_ie': ['Vine'],
7a26ce26
SS
430 'params': {'skip_download': 'm3u8'},
431 },
432 {
0ae937a7 433 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 434 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
435 'info_dict': {
436 'id': '705235433198714880',
437 'ext': 'mp4',
18ca61c5
RA
438 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
439 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
440 'uploader': 'Brent Yarina',
441 'uploader_id': 'BTNBrentYarina',
442 'timestamp': 1456976204,
443 'upload_date': '20160303',
0ae937a7 444 },
18ca61c5 445 'skip': 'This content is no longer available.',
7a26ce26
SS
446 },
447 {
748a462f
S
448 'url': 'https://twitter.com/i/videos/752274308186120192',
449 'only_matching': True,
0ae937a7 450 },
c3dea3f8 451 ]
23e7cba8
S
452
453 def _real_extract(self, url):
18ca61c5
RA
454 status_id = self._match_id(url)
455 return self.url_result(
456 'https://twitter.com/statuses/' + status_id,
457 TwitterIE.ie_key(), status_id)
c8398a9b 458
03879ff0 459
18ca61c5 460class TwitterIE(TwitterBaseIE):
014e8803 461 IE_NAME = 'twitter'
b6795fd3 462 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 463
cf5881fc 464 _TESTS = [{
48aae2d2 465 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 466 'info_dict': {
13b2ae29
SS
467 'id': '643211870443208704',
468 'display_id': '643211948184596480',
f57f84f6 469 'ext': 'mp4',
575036b4 470 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 471 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 472 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
48aae2d2
YCH
473 'uploader': 'FREE THE NIPPLE',
474 'uploader_id': 'freethenipple',
3b65a6fb 475 'duration': 12.922,
18ca61c5
RA
476 'timestamp': 1442188653,
477 'upload_date': '20150913',
13b2ae29 478 'uploader_url': 'https://twitter.com/freethenipple',
b03fa783 479 'comment_count': int,
480 'repost_count': int,
13b2ae29 481 'like_count': int,
b03fa783 482 'view_count': int,
13b2ae29
SS
483 'tags': [],
484 'age_limit': 18,
f57f84f6 485 },
cf5881fc
YCH
486 }, {
487 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
488 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
489 'info_dict': {
490 'id': '657991469417025536',
491 'ext': 'mp4',
492 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
493 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 494 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
495 'uploader': 'Gifs',
496 'uploader_id': 'giphz',
497 },
7efc1c2b 498 'expected_warnings': ['height', 'width'],
fc0a45fa 499 'skip': 'Account suspended',
b703ebee
JMF
500 }, {
501 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
502 'info_dict': {
503 'id': '665052190608723968',
13b2ae29 504 'display_id': '665052190608723968',
b703ebee 505 'ext': 'mp4',
b6795fd3 506 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 507 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
b703ebee 508 'uploader_id': 'starwars',
7a26ce26 509 'uploader': r're:Star Wars.*',
18ca61c5
RA
510 'timestamp': 1447395772,
511 'upload_date': '20151113',
13b2ae29 512 'uploader_url': 'https://twitter.com/starwars',
b03fa783 513 'comment_count': int,
514 'repost_count': int,
13b2ae29
SS
515 'like_count': int,
516 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
517 'age_limit': 0,
b703ebee 518 },
0ae937a7
YCH
519 }, {
520 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
521 'info_dict': {
522 'id': '705235433198714880',
523 'ext': 'mp4',
18ca61c5
RA
524 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
525 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
526 'uploader_id': 'BTNBrentYarina',
527 'uploader': 'Brent Yarina',
18ca61c5
RA
528 'timestamp': 1456976204,
529 'upload_date': '20160303',
13b2ae29
SS
530 'uploader_url': 'https://twitter.com/BTNBrentYarina',
531 'comment_count': int,
532 'repost_count': int,
533 'like_count': int,
534 'tags': [],
535 'age_limit': 0,
0ae937a7
YCH
536 },
537 'params': {
538 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
539 # Test case of TwitterCardIE
540 'skip_download': True,
541 },
352e7d98 542 'skip': 'Dead external link',
03879ff0
YCH
543 }, {
544 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 545 'info_dict': {
13b2ae29
SS
546 'id': '700207414000242688',
547 'display_id': '700207533655363584',
03879ff0 548 'ext': 'mp4',
13b2ae29 549 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 550 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 551 'thumbnail': r're:^https?://.*\.jpg',
13b2ae29
SS
552 'uploader': 'jaydin donte geer',
553 'uploader_id': 'jaydingeer',
3b65a6fb 554 'duration': 30.0,
18ca61c5
RA
555 'timestamp': 1455777459,
556 'upload_date': '20160218',
13b2ae29 557 'uploader_url': 'https://twitter.com/jaydingeer',
b03fa783 558 'comment_count': int,
559 'repost_count': int,
13b2ae29 560 'like_count': int,
b03fa783 561 'view_count': int,
13b2ae29
SS
562 'tags': ['Damndaniel'],
563 'age_limit': 0,
03879ff0 564 },
395fd4b0
YCH
565 }, {
566 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
567 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
568 'info_dict': {
569 'id': 'MIOxnrUteUd',
570 'ext': 'mp4',
18ca61c5
RA
571 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
572 'uploader': 'TAKUMA',
573 'uploader_id': '1004126642786242560',
3615bfe1 574 'timestamp': 1402826626,
395fd4b0 575 'upload_date': '20140615',
13b2ae29
SS
576 'thumbnail': r're:^https?://.*\.jpg',
577 'alt_title': 'Vine by TAKUMA',
578 'comment_count': int,
579 'repost_count': int,
580 'like_count': int,
581 'view_count': int,
395fd4b0
YCH
582 },
583 'add_ie': ['Vine'],
36b7d9db
YCH
584 }, {
585 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 586 'info_dict': {
13b2ae29
SS
587 'id': '717462543795523584',
588 'display_id': '719944021058060289',
36b7d9db
YCH
589 'ext': 'mp4',
590 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5
RA
591 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
592 'uploader_id': 'CaptainAmerica',
36b7d9db 593 'uploader': 'Captain America',
3b65a6fb 594 'duration': 3.17,
18ca61c5
RA
595 'timestamp': 1460483005,
596 'upload_date': '20160412',
13b2ae29
SS
597 'uploader_url': 'https://twitter.com/CaptainAmerica',
598 'thumbnail': r're:^https?://.*\.jpg',
b03fa783 599 'comment_count': int,
600 'repost_count': int,
13b2ae29 601 'like_count': int,
b03fa783 602 'view_count': int,
13b2ae29
SS
603 'tags': [],
604 'age_limit': 0,
36b7d9db 605 },
f0bc5a86
YCH
606 }, {
607 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
608 'info_dict': {
609 'id': '1zqKVVlkqLaKB',
610 'ext': 'mp4',
18ca61c5 611 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 612 'upload_date': '20160923',
18ca61c5
RA
613 'uploader_id': '1PmKqpJdOJQoY',
614 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 615 'timestamp': 1474613214,
13b2ae29 616 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
617 },
618 'add_ie': ['Periscope'],
2edfd745
YCH
619 }, {
620 # has mp4 formats via mobile API
621 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
622 'info_dict': {
6014355c 623 'id': '852077943283097602',
2edfd745
YCH
624 'ext': 'mp4',
625 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 626 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
2edfd745
YCH
627 'uploader': 'عالم الأخبار',
628 'uploader_id': 'news_al3alm',
3b65a6fb 629 'duration': 277.4,
18ca61c5
RA
630 'timestamp': 1492000653,
631 'upload_date': '20170412',
6014355c 632 'display_id': '852138619213144067',
633 'age_limit': 0,
634 'uploader_url': 'https://twitter.com/news_al3alm',
635 'thumbnail': r're:^https?://.*\.jpg',
636 'tags': [],
637 'repost_count': int,
638 'view_count': int,
639 'like_count': int,
640 'comment_count': int,
2edfd745 641 },
5c1452e8
GF
642 }, {
643 'url': 'https://twitter.com/i/web/status/910031516746514432',
644 'info_dict': {
13b2ae29
SS
645 'id': '910030238373089285',
646 'display_id': '910031516746514432',
5c1452e8
GF
647 'ext': 'mp4',
648 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
649 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 650 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
5c1452e8
GF
651 'uploader': 'Préfet de Guadeloupe',
652 'uploader_id': 'Prefet971',
653 'duration': 47.48,
18ca61c5
RA
654 'timestamp': 1505803395,
655 'upload_date': '20170919',
13b2ae29 656 'uploader_url': 'https://twitter.com/Prefet971',
b03fa783 657 'comment_count': int,
658 'repost_count': int,
13b2ae29 659 'like_count': int,
b03fa783 660 'view_count': int,
13b2ae29
SS
661 'tags': ['Maria'],
662 'age_limit': 0,
5c1452e8
GF
663 },
664 'params': {
665 'skip_download': True, # requires ffmpeg
666 },
2593725a
S
667 }, {
668 # card via api.twitter.com/1.1/videos/tweet/config
669 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
670 'info_dict': {
13b2ae29
SS
671 'id': '1001551417340022785',
672 'display_id': '1001551623938805763',
2593725a
S
673 'ext': 'mp4',
674 'title': 're:.*?Shep is on a roll today.*?',
675 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 676 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
2593725a
S
677 'uploader': 'Lis Power',
678 'uploader_id': 'LisPower1',
679 'duration': 111.278,
18ca61c5
RA
680 'timestamp': 1527623489,
681 'upload_date': '20180529',
13b2ae29 682 'uploader_url': 'https://twitter.com/LisPower1',
b03fa783 683 'comment_count': int,
684 'repost_count': int,
13b2ae29 685 'like_count': int,
b03fa783 686 'view_count': int,
13b2ae29
SS
687 'tags': [],
688 'age_limit': 0,
2593725a
S
689 },
690 'params': {
691 'skip_download': True, # requires ffmpeg
692 },
b7ef93f0
S
693 }, {
694 'url': 'https://twitter.com/foobar/status/1087791357756956680',
695 'info_dict': {
13b2ae29
SS
696 'id': '1087791272830607360',
697 'display_id': '1087791357756956680',
b7ef93f0 698 'ext': 'mp4',
6014355c 699 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
b7ef93f0 700 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 701 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
6014355c 702 'uploader': 'X',
703 'uploader_id': 'X',
b7ef93f0 704 'duration': 61.567,
18ca61c5
RA
705 'timestamp': 1548184644,
706 'upload_date': '20190122',
6014355c 707 'uploader_url': 'https://twitter.com/X',
b03fa783 708 'comment_count': int,
709 'repost_count': int,
13b2ae29 710 'like_count': int,
b03fa783 711 'view_count': int,
13b2ae29
SS
712 'tags': [],
713 'age_limit': 0,
18ca61c5 714 },
a006ce2b 715 'skip': 'This Tweet is unavailable',
18ca61c5
RA
716 }, {
717 # not available in Periscope
718 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
719 'info_dict': {
720 'id': '1vOGwqejwoWxB',
721 'ext': 'mp4',
722 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
723 'uploader': 'Vivi',
724 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
725 'thumbnail': r're:^https?://.*\.jpg',
726 'tags': ['EduTECH2019'],
727 'view_count': int,
b7ef93f0 728 },
18ca61c5 729 'add_ie': ['TwitterBroadcast'],
a006ce2b 730 'skip': 'Broadcast no longer exists',
30a074c2 731 }, {
732 # unified card
733 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
734 'info_dict': {
13b2ae29
SS
735 'id': '1349774757969989634',
736 'display_id': '1349794411333394432',
30a074c2 737 'ext': 'mp4',
738 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
739 'thumbnail': r're:^https?://.*\.jpg',
740 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
741 'uploader': 'Brooklyn Nets',
742 'uploader_id': 'BrooklynNets',
743 'duration': 324.484,
744 'timestamp': 1610651040,
745 'upload_date': '20210114',
13b2ae29 746 'uploader_url': 'https://twitter.com/BrooklynNets',
b03fa783 747 'comment_count': int,
748 'repost_count': int,
13b2ae29
SS
749 'like_count': int,
750 'tags': [],
751 'age_limit': 0,
30a074c2 752 },
753 'params': {
754 'skip_download': True,
755 },
13b2ae29
SS
756 }, {
757 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
758 'info_dict': {
759 'id': '1577855447914409984',
760 'display_id': '1577855540407197696',
761 'ext': 'mp4',
352e7d98 762 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
763 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 764 'upload_date': '20221006',
352e7d98 765 'uploader': 'oshtru',
13b2ae29
SS
766 'uploader_id': 'oshtru',
767 'uploader_url': 'https://twitter.com/oshtru',
768 'thumbnail': r're:^https?://.*\.jpg',
769 'duration': 30.03,
7a26ce26 770 'timestamp': 1665025050,
b03fa783 771 'comment_count': int,
772 'repost_count': int,
13b2ae29 773 'like_count': int,
b03fa783 774 'view_count': int,
13b2ae29
SS
775 'tags': [],
776 'age_limit': 0,
777 },
778 'params': {'skip_download': True},
779 }, {
780 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
781 'info_dict': {
782 'id': '1577719286659006464',
a006ce2b 783 'title': 'Ultima📛| New Era - Test',
13b2ae29 784 'description': 'Test https://t.co/Y3KEZD7Dad',
a006ce2b 785 'uploader': 'Ultima📛| New Era',
13b2ae29
SS
786 'uploader_id': 'UltimaShadowX',
787 'uploader_url': 'https://twitter.com/UltimaShadowX',
788 'upload_date': '20221005',
7a26ce26 789 'timestamp': 1664992565,
b03fa783 790 'comment_count': int,
791 'repost_count': int,
13b2ae29
SS
792 'like_count': int,
793 'tags': [],
794 'age_limit': 0,
795 },
796 'playlist_count': 4,
797 'params': {'skip_download': True},
7a26ce26
SS
798 }, {
799 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
800 'info_dict': {
801 'id': '1575559336759263233',
802 'display_id': '1575560063510810624',
803 'ext': 'mp4',
804 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
805 'thumbnail': r're:^https?://.*\.jpg',
806 'description': 'md5:95aea692fda36a12081b9629b02daa92',
807 'uploader': 'Max Olson',
808 'uploader_id': 'MesoMax919',
809 'uploader_url': 'https://twitter.com/MesoMax919',
810 'duration': 21.321,
811 'timestamp': 1664477766,
812 'upload_date': '20220929',
b03fa783 813 'comment_count': int,
814 'repost_count': int,
7a26ce26 815 'like_count': int,
b03fa783 816 'view_count': int,
7a26ce26
SS
817 'tags': ['HurricaneIan'],
818 'age_limit': 0,
819 },
820 }, {
a006ce2b 821 # Adult content, fails if not logged in
7a26ce26
SS
822 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
823 'info_dict': {
824 'id': '1575199163847000068',
825 'display_id': '1575199173472927762',
826 'ext': 'mp4',
827 'title': str,
828 'description': str,
829 'uploader': str,
830 'uploader_id': 'Rizdraws',
831 'uploader_url': 'https://twitter.com/Rizdraws',
832 'upload_date': '20220928',
833 'timestamp': 1664391723,
16bed382 834 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
835 'like_count': int,
836 'repost_count': int,
837 'comment_count': int,
838 'age_limit': 18,
839 'tags': []
840 },
a006ce2b 841 'params': {'skip_download': 'The media could not be played'},
147e62fc 842 'skip': 'Requires authentication',
7a26ce26 843 }, {
a006ce2b 844 # Playlist result only with graphql API
7a26ce26
SS
845 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
846 'playlist_mincount': 2,
847 'info_dict': {
848 'id': '1395079556562706435',
849 'title': str,
850 'tags': [],
851 'uploader': str,
852 'like_count': int,
853 'upload_date': '20210519',
854 'age_limit': 0,
855 'repost_count': int,
147e62fc 856 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
7a26ce26
SS
857 'uploader_id': 'Srirachachau',
858 'comment_count': int,
859 'uploader_url': 'https://twitter.com/Srirachachau',
860 'timestamp': 1621447860,
861 },
862 }, {
7a26ce26
SS
863 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
864 'playlist_mincount': 2,
865 'info_dict': {
866 'id': '1578353380363501568',
867 'title': str,
868 'uploader_id': 'DavidToons_',
869 'repost_count': int,
870 'like_count': int,
871 'uploader': str,
872 'timestamp': 1665143744,
873 'uploader_url': 'https://twitter.com/DavidToons_',
147e62fc 874 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
7a26ce26
SS
875 'tags': [],
876 'comment_count': int,
877 'upload_date': '20221007',
878 'age_limit': 0,
879 },
880 }, {
881 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
882 'playlist_count': 2,
883 'info_dict': {
884 'id': '1578401165338976258',
885 'title': str,
886 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
887 'uploader': str,
888 'uploader_id': 'primevideouk',
889 'timestamp': 1665155137,
890 'upload_date': '20221007',
891 'age_limit': 0,
892 'uploader_url': 'https://twitter.com/primevideouk',
b03fa783 893 'comment_count': int,
894 'repost_count': int,
7a26ce26
SS
895 'like_count': int,
896 'tags': ['TheRingsOfPower'],
897 },
898 }, {
899 # Twitter Spaces
900 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
901 'info_dict': {
902 'id': '1lPJqmBeeNAJb',
903 'ext': 'm4a',
904 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
905 'uploader': r're:Monique Camarra.+?',
906 'uploader_id': 'MoniqueCamarra',
907 'live_status': 'was_live',
1c16d9df 908 'release_timestamp': 1658417414,
a006ce2b 909 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
1cffd621 910 'timestamp': 1658407771,
911 'release_date': '20220721',
912 'upload_date': '20220721',
7a26ce26
SS
913 },
914 'add_ie': ['TwitterSpaces'],
915 'params': {'skip_download': 'm3u8'},
92315c03 916 'skip': 'Requires authentication',
16bed382 917 }, {
918 # URL specifies video number but --yes-playlist
919 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
920 'playlist_mincount': 2,
921 'info_dict': {
922 'id': '1600649710662213632',
923 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
924 'timestamp': 1670459604.0,
925 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
b03fa783 926 'comment_count': int,
16bed382 927 'uploader_id': 'CTVJLaidlaw',
b03fa783 928 'repost_count': int,
16bed382 929 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
930 'upload_date': '20221208',
931 'age_limit': 0,
932 'uploader': 'Jocelyn Laidlaw',
933 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
934 'like_count': int,
935 },
936 }, {
937 # URL specifies video number and --no-playlist
938 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
939 'info_dict': {
940 'id': '1600649511827013632',
941 'ext': 'mp4',
147e62fc 942 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
16bed382 943 'thumbnail': r're:^https?://.+\.jpg',
944 'timestamp': 1670459604.0,
945 'uploader_id': 'CTVJLaidlaw',
946 'uploader': 'Jocelyn Laidlaw',
b03fa783 947 'repost_count': int,
948 'comment_count': int,
16bed382 949 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
950 'duration': 102.226,
951 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
952 'display_id': '1600649710662213632',
953 'like_count': int,
b03fa783 954 'view_count': int,
16bed382 955 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
956 'upload_date': '20221208',
957 'age_limit': 0,
958 },
959 'params': {'noplaylist': True},
7543c9c9 960 }, {
961 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
962 # note the id different between extraction and url
963 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
964 'info_dict': {
965 'id': '1621117577354424321',
966 'display_id': '1621117700482416640',
967 'ext': 'mp4',
968 'title': '뽀 - 아 최우제 이동속도 봐',
969 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
970 'duration': 24.598,
971 'uploader': '뽀',
972 'uploader_id': 's2FAKER',
973 'uploader_url': 'https://twitter.com/s2FAKER',
974 'upload_date': '20230202',
975 'timestamp': 1675339553.0,
976 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
977 'age_limit': 18,
978 'tags': [],
979 'like_count': int,
b03fa783 980 'repost_count': int,
981 'comment_count': int,
982 'view_count': int,
7543c9c9 983 },
b6795fd3
SS
984 }, {
985 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
986 'info_dict': {
987 'id': '1599108643743473680',
988 'display_id': '1599108751385972737',
989 'ext': 'mp4',
990 'title': '\u06ea - \U0001F48B',
991 'uploader_url': 'https://twitter.com/hlo_again',
992 'like_count': int,
993 'uploader_id': 'hlo_again',
994 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
b03fa783 995 'repost_count': int,
b6795fd3 996 'duration': 9.531,
b03fa783 997 'comment_count': int,
998 'view_count': int,
b6795fd3
SS
999 'upload_date': '20221203',
1000 'age_limit': 0,
1001 'timestamp': 1670092210.0,
1002 'tags': [],
1003 'uploader': '\u06ea',
1004 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1005 },
1006 'params': {'noplaylist': True},
1007 }, {
b6795fd3
SS
1008 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1009 'info_dict': {
1010 'id': '1600009362759733248',
1011 'display_id': '1600009574919962625',
1012 'ext': 'mp4',
1013 'uploader_url': 'https://twitter.com/MunTheShinobi',
1014 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
b03fa783 1015 'view_count': int,
b6795fd3
SS
1016 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1017 'age_limit': 0,
a006ce2b 1018 'uploader': 'Mün',
b03fa783 1019 'repost_count': int,
b6795fd3 1020 'upload_date': '20221206',
a006ce2b 1021 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
b03fa783 1022 'comment_count': int,
b6795fd3
SS
1023 'like_count': int,
1024 'tags': [],
1025 'uploader_id': 'MunTheShinobi',
1026 'duration': 139.987,
1027 'timestamp': 1670306984.0,
1028 },
cf605226 1029 }, {
a006ce2b 1030 # retweeted_status (private)
cf605226 1031 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1032 'info_dict': {
1033 'id': '1623274794488659969',
1034 'display_id': '1623739803874349067',
1035 'ext': 'mp4',
1036 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
92315c03 1037 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
cf605226 1038 'uploader': 'Johnny Bullets',
1039 'uploader_id': 'Johnnybull3ts',
1040 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1041 'age_limit': 0,
1042 'tags': [],
1043 'duration': 8.033,
1044 'timestamp': 1675853859.0,
1045 'upload_date': '20230208',
1046 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1047 'like_count': int,
b03fa783 1048 'repost_count': int,
cf605226 1049 },
6014355c 1050 'skip': 'Protected tweet',
92315c03 1051 }, {
a006ce2b 1052 # retweeted_status
1053 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
92315c03 1054 'info_dict': {
a006ce2b 1055 'id': '1694928337846538240',
92315c03 1056 'ext': 'mp4',
a006ce2b 1057 'display_id': '1695424220702888009',
1058 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1059 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1060 'uploader': 'Benny Johnson',
1061 'uploader_id': 'bennyjohnson',
1062 'uploader_url': 'https://twitter.com/bennyjohnson',
92315c03 1063 'age_limit': 0,
1064 'tags': [],
a006ce2b 1065 'duration': 45.001,
1066 'timestamp': 1692962814.0,
1067 'upload_date': '20230825',
1068 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
92315c03 1069 'like_count': int,
92315c03 1070 'repost_count': int,
a006ce2b 1071 'view_count': int,
92315c03 1072 'comment_count': int,
1073 },
a006ce2b 1074 }, {
1075 # retweeted_status w/ legacy API
1076 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1077 'info_dict': {
1078 'id': '1694928337846538240',
1079 'ext': 'mp4',
1080 'display_id': '1695424220702888009',
1081 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1082 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1083 'uploader': 'Benny Johnson',
1084 'uploader_id': 'bennyjohnson',
1085 'uploader_url': 'https://twitter.com/bennyjohnson',
1086 'age_limit': 0,
1087 'tags': [],
1088 'duration': 45.001,
1089 'timestamp': 1692962814.0,
1090 'upload_date': '20230825',
1091 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1092 'like_count': int,
1093 'repost_count': int,
1094 },
1095 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1096 }, {
1097 # Broadcast embedded in tweet
1098 'url': 'https://twitter.com/JessicaDobsonWX/status/1693057346933600402',
1099 'info_dict': {
1100 'id': '1yNGaNLjEblJj',
1101 'ext': 'mp4',
1102 'title': 'Jessica Dobson - WAVE Weather Now - Saturday 8/19/23 Update',
1103 'uploader': 'Jessica Dobson',
1104 'uploader_id': '1DZEoDwDovRQa',
1105 'thumbnail': r're:^https?://.*\.jpg',
1106 'view_count': int,
1107 },
1108 'add_ie': ['TwitterBroadcast'],
1109 }, {
1110 # Animated gif and quote tweet video, with syndication API
1111 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1112 'playlist_mincount': 2,
1113 'info_dict': {
1114 'id': '1696256659889565950',
1115 'title': 'BAKOON - https://t.co/zom968d0a0',
1116 'description': 'https://t.co/zom968d0a0',
1117 'tags': [],
1118 'uploader': 'BAKOON',
1119 'uploader_id': 'BAKKOOONN',
1120 'uploader_url': 'https://twitter.com/BAKKOOONN',
1121 'age_limit': 18,
1122 'timestamp': 1693254077.0,
1123 'upload_date': '20230828',
1124 'like_count': int,
1125 },
1126 'params': {'extractor_args': {'twitter': {'api': ['syndication']}}},
1127 'expected_warnings': ['Not all metadata'],
82fb2357 1128 }, {
1129 # onion route
1130 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1131 'only_matching': True,
18ca61c5
RA
1132 }, {
1133 # Twitch Clip Embed
1134 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1135 'only_matching': True,
10a5091e
RA
1136 }, {
1137 # promo_video_website card
1138 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1139 'only_matching': True,
00dd0cd5 1140 }, {
1141 # promo_video_convo card
1142 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1143 'only_matching': True,
1144 }, {
1145 # appplayer card
1146 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1147 'only_matching': True,
30a074c2 1148 }, {
1149 # video_direct_message card
1150 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1151 'only_matching': True,
1152 }, {
1153 # poll2choice_video card
1154 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1155 'only_matching': True,
1156 }, {
1157 # poll3choice_video card
1158 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1159 'only_matching': True,
1160 }, {
1161 # poll4choice_video card
1162 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1163 'only_matching': True,
cf5881fc 1164 }]
f57f84f6 1165
a006ce2b 1166 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1167
1168 @property
1169 def _GRAPHQL_ENDPOINT(self):
1170 if self.is_logged_in:
1171 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1172 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1173
7a26ce26
SS
1174 def _graphql_to_legacy(self, data, twid):
1175 result = traverse_obj(data, (
1176 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1177 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
92315c03 1178 'tweet_results', 'result', ('tweet', None), {dict},
1179 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1180 data, ('tweetResult', 'result', {dict}), default={})
7a26ce26 1181
92315c03 1182 if result.get('__typename') not in ('Tweet', 'TweetTombstone', 'TweetUnavailable', None):
7543c9c9 1183 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
1184
7a26ce26 1185 if 'tombstone' in result:
147e62fc 1186 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
7a26ce26 1187 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
92315c03 1188 elif result.get('__typename') == 'TweetUnavailable':
1189 reason = result.get('reason')
1190 if reason == 'NsfwLoggedOut':
1191 self.raise_login_required('NSFW tweet requires authentication')
6014355c 1192 elif reason == 'Protected':
1193 self.raise_login_required('You are not authorized to view this protected tweet')
92315c03 1194 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
7a26ce26
SS
1195
1196 status = result.get('legacy', {})
1197 status.update(traverse_obj(result, {
1198 'user': ('core', 'user_results', 'result', 'legacy'),
1199 'card': ('card', 'legacy'),
1200 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
a006ce2b 1201 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
7a26ce26
SS
1202 }, expected_type=dict, default={}))
1203
a006ce2b 1204 # extra transformations needed since result does not match legacy format
1205 if status.get('retweeted_status'):
1206 status['retweeted_status']['user'] = traverse_obj(status, (
1207 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1208
7a26ce26
SS
1209 binding_values = {
1210 binding_value.get('key'): binding_value.get('value')
147e62fc 1211 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
7a26ce26
SS
1212 }
1213 if binding_values:
1214 status['card']['binding_values'] = binding_values
1215
1216 return status
1217
1218 def _build_graphql_query(self, media_id):
1219 return {
1220 'variables': {
1221 'focalTweetId': media_id,
1222 'includePromotedContent': True,
1223 'with_rux_injections': False,
1224 'withBirdwatchNotes': True,
1225 'withCommunity': True,
1226 'withDownvotePerspective': False,
1227 'withQuickPromoteEligibilityTweetFields': True,
1228 'withReactionsMetadata': False,
1229 'withReactionsPerspective': False,
1230 'withSuperFollowsTweetFields': True,
1231 'withSuperFollowsUserFields': True,
1232 'withV2Timeline': True,
1233 'withVoice': True,
1234 },
1235 'features': {
1236 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1237 'interactive_text_enabled': True,
1238 'responsive_web_edit_tweet_api_enabled': True,
1239 'responsive_web_enhance_cards_enabled': True,
1240 'responsive_web_graphql_timeline_navigation_enabled': False,
1241 'responsive_web_text_conversations_enabled': False,
1242 'responsive_web_uc_gql_enabled': True,
1243 'standardized_nudges_misinfo': True,
1244 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1245 'tweetypie_unmention_optimization_enabled': True,
1246 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1247 'verified_phone_label_enabled': False,
1248 'vibe_api_enabled': True,
1249 },
92315c03 1250 } if self.is_logged_in else {
1251 'variables': {
1252 'tweetId': media_id,
1253 'withCommunity': False,
1254 'includePromotedContent': False,
1255 'withVoice': False,
1256 },
1257 'features': {
1258 'creator_subscriptions_tweet_preview_api_enabled': True,
1259 'tweetypie_unmention_optimization_enabled': True,
1260 'responsive_web_edit_tweet_api_enabled': True,
1261 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1262 'view_counts_everywhere_api_enabled': True,
1263 'longform_notetweets_consumption_enabled': True,
1264 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1265 'tweet_awards_web_tipping_enabled': False,
1266 'freedom_of_speech_not_reach_fetch_enabled': True,
1267 'standardized_nudges_misinfo': True,
1268 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1269 'longform_notetweets_rich_text_read_enabled': True,
1270 'longform_notetweets_inline_media_enabled': True,
1271 'responsive_web_graphql_exclude_directive_enabled': True,
1272 'verified_phone_label_enabled': False,
1273 'responsive_web_media_download_video_enabled': False,
1274 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1275 'responsive_web_graphql_timeline_navigation_enabled': True,
1276 'responsive_web_enhance_cards_enabled': False
1277 },
1278 'fieldToggles': {
1279 'withArticleRichContentState': False
1280 }
7a26ce26
SS
1281 }
1282
6014355c 1283 def _extract_status(self, twid):
a006ce2b 1284 if self.is_logged_in or self._selected_api == 'graphql':
1285 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1286
1287 elif self._selected_api == 'legacy':
1288 status = self._call_api(f'statuses/show/{twid}.json', twid, {
b03fa783 1289 'cards_platform': 'Web-12',
1290 'include_cards': 1,
1291 'include_reply_count': 1,
1292 'include_user_entities': 0,
1293 'tweet_mode': 'extended',
a006ce2b 1294 })
6014355c 1295
a006ce2b 1296 elif self._selected_api == 'syndication':
6014355c 1297 self.report_warning(
a006ce2b 1298 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1299 status = self._download_json(
1300 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1301 headers={'User-Agent': 'Googlebot'}, query={
1302 'id': twid,
1303 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1304 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1305 })
1306 if not status:
1307 raise ExtractorError('Syndication endpoint returned empty JSON response')
1308 # Transform the result so its structure matches that of legacy/graphql
1309 media = []
1310 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1311 detail['id_str'] = traverse_obj(detail, (
1312 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1313 media.append(detail)
1314 status['extended_entities'] = {'media': media}
6014355c 1315
a006ce2b 1316 else:
1317 raise ExtractorError(f'"{self._selected_api}" is not a valid API selection', expected=True)
1318
1319 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
6014355c 1320
1321 def _real_extract(self, url):
1322 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1323 status = self._extract_status(twid)
575036b4 1324
92315c03 1325 title = description = traverse_obj(
1326 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
575036b4 1327 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 1328 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
1329 user = status.get('user') or {}
1330 uploader = user.get('name')
1331 if uploader:
7a26ce26 1332 title = f'{uploader} - {title}'
18ca61c5
RA
1333 uploader_id = user.get('screen_name')
1334
cf5881fc 1335 info = {
18ca61c5
RA
1336 'id': twid,
1337 'title': title,
1338 'description': description,
1339 'uploader': uploader,
1340 'timestamp': unified_timestamp(status.get('created_at')),
1341 'uploader_id': uploader_id,
a70635b8 1342 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
1343 'like_count': int_or_none(status.get('favorite_count')),
1344 'repost_count': int_or_none(status.get('retweet_count')),
1345 'comment_count': int_or_none(status.get('reply_count')),
1346 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 1347 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 1348 }
cf5881fc 1349
30a074c2 1350 def extract_from_video_info(media):
a006ce2b 1351 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
13b2ae29 1352 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
1353
1354 formats = []
4bed4363 1355 subtitles = {}
92315c03 1356 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
4bed4363
F
1357 fmts, subs = self._extract_variant_formats(variant, twid)
1358 subtitles = self._merge_subtitles(subtitles, subs)
1359 formats.extend(fmts)
18ca61c5
RA
1360
1361 thumbnails = []
1362 media_url = media.get('media_url_https') or media.get('media_url')
1363 if media_url:
1364 def add_thumbnail(name, size):
1365 thumbnails.append({
1366 'id': name,
1367 'url': update_url_query(media_url, {'name': name}),
1368 'width': int_or_none(size.get('w') or size.get('width')),
1369 'height': int_or_none(size.get('h') or size.get('height')),
1370 })
1371 for name, size in media.get('sizes', {}).items():
1372 add_thumbnail(name, size)
1373 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1374
13b2ae29 1375 return {
b03fa783 1376 'id': media_id,
18ca61c5 1377 'formats': formats,
4bed4363 1378 'subtitles': subtitles,
18ca61c5 1379 'thumbnails': thumbnails,
b6795fd3 1380 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
92315c03 1381 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
9f14daf2 1382 # The codec of http formats are unknown
1383 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
13b2ae29 1384 }
30a074c2 1385
13b2ae29
SS
1386 def extract_from_card_info(card):
1387 if not card:
1388 return
1389
1390 self.write_debug(f'Extracting from card info: {card.get("url")}')
1391 binding_values = card['binding_values']
1392
1393 def get_binding_value(k):
1394 o = binding_values.get(k) or {}
1395 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1396
1397 card_name = card['name'].split(':')[-1]
1398 if card_name == 'player':
7a26ce26 1399 yield {
13b2ae29
SS
1400 '_type': 'url',
1401 'url': get_binding_value('player_url'),
1402 }
1403 elif card_name == 'periscope_broadcast':
7a26ce26 1404 yield {
13b2ae29
SS
1405 '_type': 'url',
1406 'url': get_binding_value('url') or get_binding_value('player_url'),
1407 'ie_key': PeriscopeIE.ie_key(),
1408 }
1409 elif card_name == 'broadcast':
7a26ce26 1410 yield {
13b2ae29
SS
1411 '_type': 'url',
1412 'url': get_binding_value('broadcast_url'),
1413 'ie_key': TwitterBroadcastIE.ie_key(),
1414 }
7a26ce26
SS
1415 elif card_name == 'audiospace':
1416 yield {
1417 '_type': 'url',
1418 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1419 'ie_key': TwitterSpacesIE.ie_key(),
1420 }
13b2ae29 1421 elif card_name == 'summary':
7a26ce26 1422 yield {
18ca61c5 1423 '_type': 'url',
13b2ae29
SS
1424 'url': get_binding_value('card_url'),
1425 }
1426 elif card_name == 'unified_card':
7a26ce26
SS
1427 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1428 yield from map(extract_from_video_info, traverse_obj(
1429 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1430 # amplify, promo_video_website, promo_video_convo, appplayer,
1431 # video_direct_message, poll2choice_video, poll3choice_video,
1432 # poll4choice_video, ...
1433 else:
1434 is_amplify = card_name == 'amplify'
1435 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1436 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1437 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1438
1439 thumbnails = []
1440 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1441 image = get_binding_value('player_image' + suffix) or {}
1442 image_url = image.get('url')
1443 if not image_url or '/player-placeholder' in image_url:
1444 continue
1445 thumbnails.append({
1446 'id': suffix[1:] if suffix else 'medium',
1447 'url': image_url,
1448 'width': int_or_none(image.get('width')),
1449 'height': int_or_none(image.get('height')),
1450 })
1451
7a26ce26 1452 yield {
13b2ae29
SS
1453 'formats': formats,
1454 'subtitles': subtitles,
1455 'thumbnails': thumbnails,
1456 'duration': int_or_none(get_binding_value(
1457 'content_duration_seconds')),
1458 }
1459
b6795fd3 1460 videos = traverse_obj(status, (
b03fa783 1461 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1462
b6795fd3
SS
1463 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1464 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1465 else:
92315c03 1466 desired_obj = traverse_obj(status, (
1467 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
b6795fd3
SS
1468 if not desired_obj:
1469 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1470 elif desired_obj.get('type') != 'video':
1471 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1472
1473 # Restore original archive id and video index in title
1474 for index, entry in enumerate(videos, 1):
1475 if entry.get('id') != desired_obj.get('id'):
1476 continue
1477 if index == 1:
1478 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1479 if len(videos) != 1:
1480 info['title'] += f' #{index}'
1481 break
1482
1483 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1484
1485 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1486 if not entries:
1487 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1488 if not expanded_url or expanded_url == url:
147e62fc 1489 self.raise_no_formats('No video could be found in this tweet', expected=True)
1490 return info
13b2ae29
SS
1491
1492 return self.url_result(expanded_url, display_id=twid, **info)
1493
1494 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1495
1496 if len(entries) == 1:
1497 return entries[0]
1498
1499 for index, entry in enumerate(entries, 1):
1500 entry['title'] += f' #{index}'
1501
1502 return self.playlist_result(entries, **info)
445d72b8
YCH
1503
1504
1505class TwitterAmplifyIE(TwitterBaseIE):
1506 IE_NAME = 'twitter:amplify'
25042f73 1507 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1508
1509 _TEST = {
1510 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1511 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1512 'info_dict': {
1513 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1514 'ext': 'mp4',
1515 'title': 'Twitter Video',
bdbf4ba4 1516 'thumbnail': 're:^https?://.*',
445d72b8 1517 },
7a26ce26 1518 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1519 }
1520
1521 def _real_extract(self, url):
1522 video_id = self._match_id(url)
1523 webpage = self._download_webpage(url, video_id)
1524
1525 vmap_url = self._html_search_meta(
1526 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1527 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1528
bdbf4ba4
YCH
1529 thumbnails = []
1530 thumbnail = self._html_search_meta(
1531 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1532
1533 def _find_dimension(target):
1534 w = int_or_none(self._html_search_meta(
1535 'twitter:%s:width' % target, webpage, fatal=False))
1536 h = int_or_none(self._html_search_meta(
1537 'twitter:%s:height' % target, webpage, fatal=False))
1538 return w, h
1539
1540 if thumbnail:
1541 thumbnail_w, thumbnail_h = _find_dimension('image')
1542 thumbnails.append({
1543 'url': thumbnail,
1544 'width': thumbnail_w,
1545 'height': thumbnail_h,
1546 })
1547
1548 video_w, video_h = _find_dimension('player')
9be31e77 1549 formats[0].update({
bdbf4ba4
YCH
1550 'width': video_w,
1551 'height': video_h,
9be31e77 1552 })
bdbf4ba4 1553
445d72b8
YCH
1554 return {
1555 'id': video_id,
1556 'title': 'Twitter Video',
bdbf4ba4
YCH
1557 'formats': formats,
1558 'thumbnails': thumbnails,
445d72b8 1559 }
18ca61c5
RA
1560
1561
1562class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1563 IE_NAME = 'twitter:broadcast'
1564 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1565
7b0b53ea
S
1566 _TEST = {
1567 # untitled Periscope video
1568 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1569 'info_dict': {
1570 'id': '1yNGaQLWpejGj',
1571 'ext': 'mp4',
1572 'title': 'Andrea May Sahouri - Periscope Broadcast',
1573 'uploader': 'Andrea May Sahouri',
1574 'uploader_id': '1PXEdBZWpGwKe',
7a26ce26
SS
1575 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1576 'view_count': int,
7b0b53ea
S
1577 },
1578 }
1579
18ca61c5
RA
1580 def _real_extract(self, url):
1581 broadcast_id = self._match_id(url)
1582 broadcast = self._call_api(
1583 'broadcasts/show.json', broadcast_id,
1584 {'ids': broadcast_id})['broadcasts'][broadcast_id]
a006ce2b 1585 if not broadcast:
1586 raise ExtractorError('Broadcast no longer exists', expected=True)
18ca61c5
RA
1587 info = self._parse_broadcast_data(broadcast, broadcast_id)
1588 media_key = broadcast['media_key']
1589 source = self._call_api(
7a26ce26 1590 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1591 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1592 if '/live_video_stream/geoblocked/' in m3u8_url:
1593 self.raise_geo_restricted()
1594 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1595 m3u8_url).query).get('type', [None])[0]
1596 state, width, height = self._extract_common_format_info(broadcast)
1597 info['formats'] = self._extract_pscp_m3u8_formats(
1598 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1599 return info
86b868c6
U
1600
1601
7a26ce26
SS
1602class TwitterSpacesIE(TwitterBaseIE):
1603 IE_NAME = 'twitter:spaces'
1604 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1605
1606 _TESTS = [{
1607 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1608 'info_dict': {
1609 'id': '1RDxlgyvNXzJL',
1610 'ext': 'm4a',
1611 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1612 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1613 'uploader': r're:Lucio Di Gaetano.*?',
1614 'uploader_id': 'luciodigaetano',
1615 'live_status': 'was_live',
1cffd621 1616 'timestamp': 1659877956,
1617 'upload_date': '20220807',
1618 'release_timestamp': 1659904215,
1619 'release_date': '20220807',
7a26ce26
SS
1620 },
1621 'params': {'skip_download': 'm3u8'},
613dbce1 1622 }, {
1623 # post_live/TimedOut but downloadable
1624 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1625 'info_dict': {
1626 'id': '1vAxRAVQWONJl',
1627 'ext': 'm4a',
1628 'title': 'Framing Up FinOps: Billing Tools',
1629 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1630 'uploader': 'Google Cloud',
1631 'uploader_id': 'googlecloud',
1632 'live_status': 'post_live',
1633 'timestamp': 1681409554,
1634 'upload_date': '20230413',
1635 'release_timestamp': 1681839000,
1636 'release_date': '20230418',
1637 },
1638 'params': {'skip_download': 'm3u8'},
1639 }, {
1640 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1641 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1642 'info_dict': {
1643 'id': '1eaKbrQbjoRKX',
1644 'ext': 'm4a',
1645 'title': 'あ',
1646 'description': 'Twitter Space participated by nobody yet',
1647 'uploader': '息根とめる🔪Twitchで復活',
1648 'uploader_id': 'tomeru_ikinone',
1649 'live_status': 'was_live',
1650 'timestamp': 1685617198,
1651 'upload_date': '20230601',
1652 },
1653 'params': {'skip_download': 'm3u8'},
7a26ce26
SS
1654 }]
1655
1656 SPACE_STATUS = {
1657 'notstarted': 'is_upcoming',
1658 'ended': 'was_live',
1659 'running': 'is_live',
1660 'timedout': 'post_live',
1661 }
1662
1663 def _build_graphql_query(self, space_id):
1664 return {
1665 'variables': {
1666 'id': space_id,
1667 'isMetatagsQuery': True,
1668 'withDownvotePerspective': False,
1669 'withReactionsMetadata': False,
1670 'withReactionsPerspective': False,
1671 'withReplays': True,
1672 'withSuperFollowsUserFields': True,
1673 'withSuperFollowsTweetFields': True,
1674 },
1675 'features': {
1676 'dont_mention_me_view_api_enabled': True,
1677 'interactive_text_enabled': True,
1678 'responsive_web_edit_tweet_api_enabled': True,
1679 'responsive_web_enhance_cards_enabled': True,
1680 'responsive_web_uc_gql_enabled': True,
1681 'spaces_2022_h2_clipping': True,
1682 'spaces_2022_h2_spaces_communities': False,
1683 'standardized_nudges_misinfo': True,
1684 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1685 'vibe_api_enabled': True,
1686 },
1687 }
1688
1689 def _real_extract(self, url):
1690 space_id = self._match_id(url)
92315c03 1691 if not self.is_logged_in:
1692 self.raise_login_required('Twitter Spaces require authentication')
7a26ce26
SS
1693 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1694 if not space_data:
1695 raise ExtractorError('Twitter Space not found', expected=True)
1696
1697 metadata = space_data['metadata']
1698 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1cffd621 1699 is_live = live_status == 'is_live'
7a26ce26
SS
1700
1701 formats = []
c6ef5537 1702 headers = {'Referer': 'https://twitter.com/'}
7a26ce26
SS
1703 if live_status == 'is_upcoming':
1704 self.raise_no_formats('Twitter Space not started yet', expected=True)
1cffd621 1705 elif not is_live and not metadata.get('is_space_available_for_replay'):
1706 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1707 elif metadata.get('media_key'):
1708 source = traverse_obj(
1709 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1710 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
613dbce1 1711 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1712 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
c6ef5537 1713 headers=headers, fatal=False) if source else []
7a26ce26
SS
1714 for fmt in formats:
1715 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1cffd621 1716 if not is_live:
1717 fmt['container'] = 'm4a_dash'
7a26ce26
SS
1718
1719 participants = ', '.join(traverse_obj(
1720 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1cffd621 1721
1722 if not formats and live_status == 'post_live':
1723 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1724
7a26ce26
SS
1725 return {
1726 'id': space_id,
1727 'title': metadata.get('title'),
1728 'description': f'Twitter Space participated by {participants}',
1729 'uploader': traverse_obj(
1730 metadata, ('creator_results', 'result', 'legacy', 'name')),
1731 'uploader_id': traverse_obj(
1732 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1733 'live_status': live_status,
1c16d9df
C
1734 'release_timestamp': try_call(
1735 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1cffd621 1736 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
7a26ce26 1737 'formats': formats,
c6ef5537 1738 'http_headers': headers,
7a26ce26
SS
1739 }
1740
1741
86b868c6
U
1742class TwitterShortenerIE(TwitterBaseIE):
1743 IE_NAME = 'twitter:shortener'
a537ab1a
U
1744 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1745 _BASE_URL = 'https://t.co/'
86b868c6
U
1746
1747 def _real_extract(self, url):
5ad28e7f 1748 mobj = self._match_valid_url(url)
a537ab1a
U
1749 eid, id = mobj.group('eid', 'id')
1750 if eid:
1751 id = eid
1752 url = self._BASE_URL + id
3d2623a8 1753 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
a537ab1a
U
1754 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1755 if new_url.startswith(__UNSAFE_LINK):
1756 new_url = new_url.replace(__UNSAFE_LINK, "")
9e20a9c4 1757 return self.url_result(new_url)