]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
f86216f8fff50819b8e3f9502f04ec47e0a12d23
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import functools
2 import json
3 import re
4
5 from .common import InfoExtractor
6 from .periscope import PeriscopeBaseIE, PeriscopeIE
7 from ..compat import (
8 compat_parse_qs,
9 compat_urllib_parse_unquote,
10 compat_urllib_parse_urlparse,
11 )
12 from ..utils import (
13 ExtractorError,
14 dict_get,
15 filter_dict,
16 float_or_none,
17 format_field,
18 int_or_none,
19 make_archive_id,
20 remove_end,
21 str_or_none,
22 strip_or_none,
23 traverse_obj,
24 try_call,
25 try_get,
26 unified_timestamp,
27 update_url_query,
28 url_or_none,
29 xpath_text,
30 )
31
32
33 class TwitterBaseIE(InfoExtractor):
34 _NETRC_MACHINE = 'twitter'
35 _API_BASE = 'https://api.twitter.com/1.1/'
36 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
37 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
38 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
39 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
40 _flow_token = None
41
42 _LOGIN_INIT_DATA = json.dumps({
43 'input_flow_data': {
44 'flow_context': {
45 'debug_overrides': {},
46 'start_location': {
47 'location': 'unknown'
48 }
49 }
50 },
51 'subtask_versions': {
52 'action_list': 2,
53 'alert_dialog': 1,
54 'app_download_cta': 1,
55 'check_logged_in_account': 1,
56 'choice_selection': 3,
57 'contacts_live_sync_permission_prompt': 0,
58 'cta': 7,
59 'email_verification': 2,
60 'end_flow': 1,
61 'enter_date': 1,
62 'enter_email': 2,
63 'enter_password': 5,
64 'enter_phone': 2,
65 'enter_recaptcha': 1,
66 'enter_text': 5,
67 'enter_username': 2,
68 'generic_urt': 3,
69 'in_app_notification': 1,
70 'interest_picker': 3,
71 'js_instrumentation': 1,
72 'menu_dialog': 1,
73 'notifications_permission_prompt': 2,
74 'open_account': 2,
75 'open_home_timeline': 1,
76 'open_link': 1,
77 'phone_verification': 4,
78 'privacy_options': 1,
79 'security_key': 3,
80 'select_avatar': 4,
81 'select_banner': 2,
82 'settings_list': 7,
83 'show_code': 1,
84 'sign_up': 2,
85 'sign_up_review': 4,
86 'tweet_selection_urt': 1,
87 'update_users': 1,
88 'upload_media': 1,
89 'user_recommendations_list': 4,
90 'user_recommendations_urt': 1,
91 'wait_spinner': 3,
92 'web_modal': 1
93 }
94 }, separators=(',', ':')).encode()
95
96 def _extract_variant_formats(self, variant, video_id):
97 variant_url = variant.get('url')
98 if not variant_url:
99 return [], {}
100 elif '.m3u8' in variant_url:
101 return self._extract_m3u8_formats_and_subtitles(
102 variant_url, video_id, 'mp4', 'm3u8_native',
103 m3u8_id='hls', fatal=False)
104 else:
105 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
106 f = {
107 'url': variant_url,
108 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
109 'tbr': tbr,
110 }
111 self._search_dimensions_in_video_url(f, variant_url)
112 return [f], {}
113
114 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
115 vmap_url = url_or_none(vmap_url)
116 if not vmap_url:
117 return [], {}
118 vmap_data = self._download_xml(vmap_url, video_id)
119 formats = []
120 subtitles = {}
121 urls = []
122 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
123 video_variant.attrib['url'] = compat_urllib_parse_unquote(
124 video_variant.attrib['url'])
125 urls.append(video_variant.attrib['url'])
126 fmts, subs = self._extract_variant_formats(
127 video_variant.attrib, video_id)
128 formats.extend(fmts)
129 subtitles = self._merge_subtitles(subtitles, subs)
130 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
131 if video_url not in urls:
132 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
133 formats.extend(fmts)
134 subtitles = self._merge_subtitles(subtitles, subs)
135 return formats, subtitles
136
137 @staticmethod
138 def _search_dimensions_in_video_url(a_format, video_url):
139 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
140 if m:
141 a_format.update({
142 'width': int(m.group('width')),
143 'height': int(m.group('height')),
144 })
145
146 @property
147 def is_logged_in(self):
148 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
149
150 def _fetch_guest_token(self, display_id):
151 guest_token = traverse_obj(self._download_json(
152 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
153 headers=self._set_base_headers(legacy=display_id and self._configuration_arg('legacy_api'))),
154 ('guest_token', {str}))
155 if not guest_token:
156 raise ExtractorError('Could not retrieve guest token')
157 return guest_token
158
159 def _set_base_headers(self, legacy=False):
160 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
161 return filter_dict({
162 'Authorization': f'Bearer {bearer_token}',
163 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
164 })
165
166 def _call_login_api(self, note, headers, query={}, data=None):
167 response = self._download_json(
168 f'{self._API_BASE}onboarding/task.json', None, note,
169 headers=headers, query=query, data=data, expected_status=400)
170 error = traverse_obj(response, ('errors', 0, 'message', {str}))
171 if error:
172 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
173 elif traverse_obj(response, 'status') != 'success':
174 raise ExtractorError('Login was unsuccessful')
175
176 subtask = traverse_obj(
177 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
178 if not subtask:
179 raise ExtractorError('Twitter API did not return next login subtask')
180
181 self._flow_token = response['flow_token']
182
183 return subtask
184
185 def _perform_login(self, username, password):
186 if self.is_logged_in:
187 return
188
189 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
190 guest_token = self._search_regex(
191 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
192 headers = {
193 **self._set_base_headers(),
194 'content-type': 'application/json',
195 'x-guest-token': guest_token,
196 'x-twitter-client-language': 'en',
197 'x-twitter-active-user': 'yes',
198 'Referer': 'https://twitter.com/',
199 'Origin': 'https://twitter.com',
200 }
201
202 def build_login_json(*subtask_inputs):
203 return json.dumps({
204 'flow_token': self._flow_token,
205 'subtask_inputs': subtask_inputs
206 }, separators=(',', ':')).encode()
207
208 def input_dict(subtask_id, text):
209 return {
210 'subtask_id': subtask_id,
211 'enter_text': {
212 'text': text,
213 'link': 'next_link'
214 }
215 }
216
217 next_subtask = self._call_login_api(
218 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
219
220 while not self.is_logged_in:
221 if next_subtask == 'LoginJsInstrumentationSubtask':
222 next_subtask = self._call_login_api(
223 'Submitting JS instrumentation response', headers, data=build_login_json({
224 'subtask_id': next_subtask,
225 'js_instrumentation': {
226 'response': '{}',
227 'link': 'next_link'
228 }
229 }))
230
231 elif next_subtask == 'LoginEnterUserIdentifierSSO':
232 next_subtask = self._call_login_api(
233 'Submitting username', headers, data=build_login_json({
234 'subtask_id': next_subtask,
235 'settings_list': {
236 'setting_responses': [{
237 'key': 'user_identifier',
238 'response_data': {
239 'text_data': {
240 'result': username
241 }
242 }
243 }],
244 'link': 'next_link'
245 }
246 }))
247
248 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
249 next_subtask = self._call_login_api(
250 'Submitting alternate identifier', headers,
251 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
252 'one of username, phone number or email that was not used as --username'))))
253
254 elif next_subtask == 'LoginEnterPassword':
255 next_subtask = self._call_login_api(
256 'Submitting password', headers, data=build_login_json({
257 'subtask_id': next_subtask,
258 'enter_password': {
259 'password': password,
260 'link': 'next_link'
261 }
262 }))
263
264 elif next_subtask == 'AccountDuplicationCheck':
265 next_subtask = self._call_login_api(
266 'Submitting account duplication check', headers, data=build_login_json({
267 'subtask_id': next_subtask,
268 'check_logged_in_account': {
269 'link': 'AccountDuplicationCheck_false'
270 }
271 }))
272
273 elif next_subtask == 'LoginTwoFactorAuthChallenge':
274 next_subtask = self._call_login_api(
275 'Submitting 2FA token', headers, data=build_login_json(input_dict(
276 next_subtask, self._get_tfa_info('two-factor authentication token'))))
277
278 elif next_subtask == 'LoginAcid':
279 next_subtask = self._call_login_api(
280 'Submitting confirmation code', headers, data=build_login_json(input_dict(
281 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
282
283 elif next_subtask == 'ArkoseLogin':
284 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
285
286 elif next_subtask == 'DenyLoginSubtask':
287 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
288
289 elif next_subtask == 'LoginSuccessSubtask':
290 raise ExtractorError('Twitter API did not grant auth token cookie')
291
292 else:
293 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
294
295 self.report_login()
296
297 def _call_api(self, path, video_id, query={}, graphql=False):
298 headers = self._set_base_headers(legacy=not graphql and self._configuration_arg('legacy_api'))
299 headers.update({
300 'x-twitter-auth-type': 'OAuth2Session',
301 'x-twitter-client-language': 'en',
302 'x-twitter-active-user': 'yes',
303 } if self.is_logged_in else {
304 'x-guest-token': self._fetch_guest_token(video_id)
305 })
306 allowed_status = {400, 401, 403, 404} if graphql else {403}
307 result = self._download_json(
308 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
309 video_id, headers=headers, query=query, expected_status=allowed_status,
310 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
311
312 if result.get('errors'):
313 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
314 if errors and 'not authorized' in errors:
315 self.raise_login_required(remove_end(errors, '.'))
316 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
317
318 return result
319
320 def _build_graphql_query(self, media_id):
321 raise NotImplementedError('Method must be implemented to support GraphQL')
322
323 def _call_graphql_api(self, endpoint, media_id):
324 data = self._build_graphql_query(media_id)
325 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
326 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
327
328
329 class TwitterCardIE(InfoExtractor):
330 IE_NAME = 'twitter:card'
331 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
332 _TESTS = [
333 {
334 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
335 # MD5 checksums are different in different places
336 'info_dict': {
337 'id': '560070131976392705',
338 'ext': 'mp4',
339 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
340 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
341 'uploader': 'Twitter',
342 'uploader_id': 'Twitter',
343 'thumbnail': r're:^https?://.*\.jpg',
344 'duration': 30.033,
345 'timestamp': 1422366112,
346 'upload_date': '20150127',
347 'age_limit': 0,
348 'comment_count': int,
349 'tags': [],
350 'repost_count': int,
351 'like_count': int,
352 'display_id': '560070183650213889',
353 'uploader_url': 'https://twitter.com/Twitter',
354 },
355 },
356 {
357 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
358 'md5': '7137eca597f72b9abbe61e5ae0161399',
359 'info_dict': {
360 'id': '623160978427936768',
361 'ext': 'mp4',
362 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
363 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
364 'uploader': 'NASA',
365 'uploader_id': 'NASA',
366 'timestamp': 1437408129,
367 'upload_date': '20150720',
368 'uploader_url': 'https://twitter.com/NASA',
369 'age_limit': 0,
370 'comment_count': int,
371 'like_count': int,
372 'repost_count': int,
373 'tags': ['PlutoFlyby'],
374 },
375 'params': {'format': '[protocol=https]'}
376 },
377 {
378 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
379 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
380 'info_dict': {
381 'id': 'dq4Oj5quskI',
382 'ext': 'mp4',
383 'title': 'Ubuntu 11.10 Overview',
384 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
385 'upload_date': '20111013',
386 'uploader': 'OMG! UBUNTU!',
387 'uploader_id': 'omgubuntu',
388 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
389 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
390 'channel_follower_count': int,
391 'chapters': 'count:8',
392 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
393 'duration': 138,
394 'categories': ['Film & Animation'],
395 'age_limit': 0,
396 'comment_count': int,
397 'availability': 'public',
398 'like_count': int,
399 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
400 'view_count': int,
401 'tags': 'count:12',
402 'channel': 'OMG! UBUNTU!',
403 'playable_in_embed': True,
404 },
405 'add_ie': ['Youtube'],
406 },
407 {
408 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
409 'info_dict': {
410 'id': 'iBb2x00UVlv',
411 'ext': 'mp4',
412 'upload_date': '20151113',
413 'uploader_id': '1189339351084113920',
414 'uploader': 'ArsenalTerje',
415 'title': 'Vine by ArsenalTerje',
416 'timestamp': 1447451307,
417 'alt_title': 'Vine by ArsenalTerje',
418 'comment_count': int,
419 'like_count': int,
420 'thumbnail': r're:^https?://[^?#]+\.jpg',
421 'view_count': int,
422 'repost_count': int,
423 },
424 'add_ie': ['Vine'],
425 'params': {'skip_download': 'm3u8'},
426 },
427 {
428 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
429 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
430 'info_dict': {
431 'id': '705235433198714880',
432 'ext': 'mp4',
433 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
434 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
435 'uploader': 'Brent Yarina',
436 'uploader_id': 'BTNBrentYarina',
437 'timestamp': 1456976204,
438 'upload_date': '20160303',
439 },
440 'skip': 'This content is no longer available.',
441 },
442 {
443 'url': 'https://twitter.com/i/videos/752274308186120192',
444 'only_matching': True,
445 },
446 ]
447
448 def _real_extract(self, url):
449 status_id = self._match_id(url)
450 return self.url_result(
451 'https://twitter.com/statuses/' + status_id,
452 TwitterIE.ie_key(), status_id)
453
454
455 class TwitterIE(TwitterBaseIE):
456 IE_NAME = 'twitter'
457 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
458
459 _TESTS = [{
460 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
461 'info_dict': {
462 'id': '643211870443208704',
463 'display_id': '643211948184596480',
464 'ext': 'mp4',
465 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
466 'thumbnail': r're:^https?://.*\.jpg',
467 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
468 'uploader': 'FREE THE NIPPLE',
469 'uploader_id': 'freethenipple',
470 'duration': 12.922,
471 'timestamp': 1442188653,
472 'upload_date': '20150913',
473 'uploader_url': 'https://twitter.com/freethenipple',
474 'comment_count': int,
475 'repost_count': int,
476 'like_count': int,
477 'view_count': int,
478 'tags': [],
479 'age_limit': 18,
480 },
481 }, {
482 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
483 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
484 'info_dict': {
485 'id': '657991469417025536',
486 'ext': 'mp4',
487 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
488 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
489 'thumbnail': r're:^https?://.*\.png',
490 'uploader': 'Gifs',
491 'uploader_id': 'giphz',
492 },
493 'expected_warnings': ['height', 'width'],
494 'skip': 'Account suspended',
495 }, {
496 'url': 'https://twitter.com/starwars/status/665052190608723968',
497 'info_dict': {
498 'id': '665052190608723968',
499 'display_id': '665052190608723968',
500 'ext': 'mp4',
501 'title': r're:Star Wars.*A new beginning is coming December 18.*',
502 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
503 'uploader_id': 'starwars',
504 'uploader': r're:Star Wars.*',
505 'timestamp': 1447395772,
506 'upload_date': '20151113',
507 'uploader_url': 'https://twitter.com/starwars',
508 'comment_count': int,
509 'repost_count': int,
510 'like_count': int,
511 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
512 'age_limit': 0,
513 },
514 }, {
515 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
516 'info_dict': {
517 'id': '705235433198714880',
518 'ext': 'mp4',
519 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
520 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
521 'uploader_id': 'BTNBrentYarina',
522 'uploader': 'Brent Yarina',
523 'timestamp': 1456976204,
524 'upload_date': '20160303',
525 'uploader_url': 'https://twitter.com/BTNBrentYarina',
526 'comment_count': int,
527 'repost_count': int,
528 'like_count': int,
529 'tags': [],
530 'age_limit': 0,
531 },
532 'params': {
533 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
534 # Test case of TwitterCardIE
535 'skip_download': True,
536 },
537 'skip': 'Dead external link',
538 }, {
539 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
540 'info_dict': {
541 'id': '700207414000242688',
542 'display_id': '700207533655363584',
543 'ext': 'mp4',
544 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
545 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
546 'thumbnail': r're:^https?://.*\.jpg',
547 'uploader': 'jaydin donte geer',
548 'uploader_id': 'jaydingeer',
549 'duration': 30.0,
550 'timestamp': 1455777459,
551 'upload_date': '20160218',
552 'uploader_url': 'https://twitter.com/jaydingeer',
553 'comment_count': int,
554 'repost_count': int,
555 'like_count': int,
556 'view_count': int,
557 'tags': ['Damndaniel'],
558 'age_limit': 0,
559 },
560 }, {
561 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
562 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
563 'info_dict': {
564 'id': 'MIOxnrUteUd',
565 'ext': 'mp4',
566 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
567 'uploader': 'TAKUMA',
568 'uploader_id': '1004126642786242560',
569 'timestamp': 1402826626,
570 'upload_date': '20140615',
571 'thumbnail': r're:^https?://.*\.jpg',
572 'alt_title': 'Vine by TAKUMA',
573 'comment_count': int,
574 'repost_count': int,
575 'like_count': int,
576 'view_count': int,
577 },
578 'add_ie': ['Vine'],
579 }, {
580 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
581 'info_dict': {
582 'id': '717462543795523584',
583 'display_id': '719944021058060289',
584 'ext': 'mp4',
585 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
586 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
587 'uploader_id': 'CaptainAmerica',
588 'uploader': 'Captain America',
589 'duration': 3.17,
590 'timestamp': 1460483005,
591 'upload_date': '20160412',
592 'uploader_url': 'https://twitter.com/CaptainAmerica',
593 'thumbnail': r're:^https?://.*\.jpg',
594 'comment_count': int,
595 'repost_count': int,
596 'like_count': int,
597 'view_count': int,
598 'tags': [],
599 'age_limit': 0,
600 },
601 }, {
602 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
603 'info_dict': {
604 'id': '1zqKVVlkqLaKB',
605 'ext': 'mp4',
606 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
607 'upload_date': '20160923',
608 'uploader_id': '1PmKqpJdOJQoY',
609 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
610 'timestamp': 1474613214,
611 'thumbnail': r're:^https?://.*\.jpg',
612 },
613 'add_ie': ['Periscope'],
614 }, {
615 # has mp4 formats via mobile API
616 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
617 'info_dict': {
618 'id': '852077943283097602',
619 'ext': 'mp4',
620 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
621 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
622 'uploader': 'عالم الأخبار',
623 'uploader_id': 'news_al3alm',
624 'duration': 277.4,
625 'timestamp': 1492000653,
626 'upload_date': '20170412',
627 'display_id': '852138619213144067',
628 'age_limit': 0,
629 'uploader_url': 'https://twitter.com/news_al3alm',
630 'thumbnail': r're:^https?://.*\.jpg',
631 'tags': [],
632 'repost_count': int,
633 'view_count': int,
634 'like_count': int,
635 'comment_count': int,
636 },
637 }, {
638 'url': 'https://twitter.com/i/web/status/910031516746514432',
639 'info_dict': {
640 'id': '910030238373089285',
641 'display_id': '910031516746514432',
642 'ext': 'mp4',
643 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
644 'thumbnail': r're:^https?://.*\.jpg',
645 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
646 'uploader': 'Préfet de Guadeloupe',
647 'uploader_id': 'Prefet971',
648 'duration': 47.48,
649 'timestamp': 1505803395,
650 'upload_date': '20170919',
651 'uploader_url': 'https://twitter.com/Prefet971',
652 'comment_count': int,
653 'repost_count': int,
654 'like_count': int,
655 'view_count': int,
656 'tags': ['Maria'],
657 'age_limit': 0,
658 },
659 'params': {
660 'skip_download': True, # requires ffmpeg
661 },
662 }, {
663 # card via api.twitter.com/1.1/videos/tweet/config
664 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
665 'info_dict': {
666 'id': '1001551417340022785',
667 'display_id': '1001551623938805763',
668 'ext': 'mp4',
669 'title': 're:.*?Shep is on a roll today.*?',
670 'thumbnail': r're:^https?://.*\.jpg',
671 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
672 'uploader': 'Lis Power',
673 'uploader_id': 'LisPower1',
674 'duration': 111.278,
675 'timestamp': 1527623489,
676 'upload_date': '20180529',
677 'uploader_url': 'https://twitter.com/LisPower1',
678 'comment_count': int,
679 'repost_count': int,
680 'like_count': int,
681 'view_count': int,
682 'tags': [],
683 'age_limit': 0,
684 },
685 'params': {
686 'skip_download': True, # requires ffmpeg
687 },
688 }, {
689 'url': 'https://twitter.com/foobar/status/1087791357756956680',
690 'info_dict': {
691 'id': '1087791272830607360',
692 'display_id': '1087791357756956680',
693 'ext': 'mp4',
694 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
695 'thumbnail': r're:^https?://.*\.jpg',
696 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
697 'uploader': 'X',
698 'uploader_id': 'X',
699 'duration': 61.567,
700 'timestamp': 1548184644,
701 'upload_date': '20190122',
702 'uploader_url': 'https://twitter.com/X',
703 'comment_count': int,
704 'repost_count': int,
705 'like_count': int,
706 'view_count': int,
707 'tags': [],
708 'age_limit': 0,
709 },
710 }, {
711 # not available in Periscope
712 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
713 'info_dict': {
714 'id': '1vOGwqejwoWxB',
715 'ext': 'mp4',
716 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
717 'uploader': 'Vivi',
718 'uploader_id': '1eVjYOLGkGrQL',
719 'thumbnail': r're:^https?://.*\.jpg',
720 'tags': ['EduTECH2019'],
721 'view_count': int,
722 },
723 'add_ie': ['TwitterBroadcast'],
724 }, {
725 # unified card
726 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
727 'info_dict': {
728 'id': '1349774757969989634',
729 'display_id': '1349794411333394432',
730 'ext': 'mp4',
731 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
732 'thumbnail': r're:^https?://.*\.jpg',
733 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
734 'uploader': 'Brooklyn Nets',
735 'uploader_id': 'BrooklynNets',
736 'duration': 324.484,
737 'timestamp': 1610651040,
738 'upload_date': '20210114',
739 'uploader_url': 'https://twitter.com/BrooklynNets',
740 'comment_count': int,
741 'repost_count': int,
742 'like_count': int,
743 'tags': [],
744 'age_limit': 0,
745 },
746 'params': {
747 'skip_download': True,
748 },
749 }, {
750 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
751 'info_dict': {
752 'id': '1577855447914409984',
753 'display_id': '1577855540407197696',
754 'ext': 'mp4',
755 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
756 'description': 'md5:b9c3699335447391d11753ab21c70a74',
757 'upload_date': '20221006',
758 'uploader': 'oshtru',
759 'uploader_id': 'oshtru',
760 'uploader_url': 'https://twitter.com/oshtru',
761 'thumbnail': r're:^https?://.*\.jpg',
762 'duration': 30.03,
763 'timestamp': 1665025050,
764 'comment_count': int,
765 'repost_count': int,
766 'like_count': int,
767 'view_count': int,
768 'tags': [],
769 'age_limit': 0,
770 },
771 'params': {'skip_download': True},
772 }, {
773 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
774 'info_dict': {
775 'id': '1577719286659006464',
776 'title': 'Ultima📛 | #вʟм - Test',
777 'description': 'Test https://t.co/Y3KEZD7Dad',
778 'uploader': 'Ultima📛 | #вʟм',
779 'uploader_id': 'UltimaShadowX',
780 'uploader_url': 'https://twitter.com/UltimaShadowX',
781 'upload_date': '20221005',
782 'timestamp': 1664992565,
783 'comment_count': int,
784 'repost_count': int,
785 'like_count': int,
786 'tags': [],
787 'age_limit': 0,
788 },
789 'playlist_count': 4,
790 'params': {'skip_download': True},
791 }, {
792 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
793 'info_dict': {
794 'id': '1575559336759263233',
795 'display_id': '1575560063510810624',
796 'ext': 'mp4',
797 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
798 'thumbnail': r're:^https?://.*\.jpg',
799 'description': 'md5:95aea692fda36a12081b9629b02daa92',
800 'uploader': 'Max Olson',
801 'uploader_id': 'MesoMax919',
802 'uploader_url': 'https://twitter.com/MesoMax919',
803 'duration': 21.321,
804 'timestamp': 1664477766,
805 'upload_date': '20220929',
806 'comment_count': int,
807 'repost_count': int,
808 'like_count': int,
809 'view_count': int,
810 'tags': ['HurricaneIan'],
811 'age_limit': 0,
812 },
813 }, {
814 # Adult content, fails if not logged in (GraphQL)
815 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
816 'info_dict': {
817 'id': '1575199163847000068',
818 'display_id': '1575199173472927762',
819 'ext': 'mp4',
820 'title': str,
821 'description': str,
822 'uploader': str,
823 'uploader_id': 'Rizdraws',
824 'uploader_url': 'https://twitter.com/Rizdraws',
825 'upload_date': '20220928',
826 'timestamp': 1664391723,
827 'thumbnail': r're:^https?://.+\.jpg',
828 'like_count': int,
829 'repost_count': int,
830 'comment_count': int,
831 'age_limit': 18,
832 'tags': []
833 },
834 'skip': 'Requires authentication',
835 }, {
836 # Playlist result only with auth
837 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
838 'playlist_mincount': 2,
839 'info_dict': {
840 'id': '1395079556562706435',
841 'title': str,
842 'tags': [],
843 'uploader': str,
844 'like_count': int,
845 'upload_date': '20210519',
846 'age_limit': 0,
847 'repost_count': int,
848 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
849 'uploader_id': 'Srirachachau',
850 'comment_count': int,
851 'uploader_url': 'https://twitter.com/Srirachachau',
852 'timestamp': 1621447860,
853 },
854 }, {
855 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
856 'playlist_mincount': 2,
857 'info_dict': {
858 'id': '1578353380363501568',
859 'title': str,
860 'uploader_id': 'DavidToons_',
861 'repost_count': int,
862 'like_count': int,
863 'uploader': str,
864 'timestamp': 1665143744,
865 'uploader_url': 'https://twitter.com/DavidToons_',
866 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
867 'tags': [],
868 'comment_count': int,
869 'upload_date': '20221007',
870 'age_limit': 0,
871 },
872 }, {
873 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
874 'playlist_count': 2,
875 'info_dict': {
876 'id': '1578401165338976258',
877 'title': str,
878 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
879 'uploader': str,
880 'uploader_id': 'primevideouk',
881 'timestamp': 1665155137,
882 'upload_date': '20221007',
883 'age_limit': 0,
884 'uploader_url': 'https://twitter.com/primevideouk',
885 'comment_count': int,
886 'repost_count': int,
887 'like_count': int,
888 'tags': ['TheRingsOfPower'],
889 },
890 }, {
891 # Twitter Spaces
892 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
893 'info_dict': {
894 'id': '1lPJqmBeeNAJb',
895 'ext': 'm4a',
896 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
897 'uploader': r're:Monique Camarra.+?',
898 'uploader_id': 'MoniqueCamarra',
899 'live_status': 'was_live',
900 'release_timestamp': 1658417414,
901 'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
902 'timestamp': 1658407771,
903 'release_date': '20220721',
904 'upload_date': '20220721',
905 },
906 'add_ie': ['TwitterSpaces'],
907 'params': {'skip_download': 'm3u8'},
908 'skip': 'Requires authentication',
909 }, {
910 # URL specifies video number but --yes-playlist
911 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
912 'playlist_mincount': 2,
913 'info_dict': {
914 'id': '1600649710662213632',
915 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
916 'timestamp': 1670459604.0,
917 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
918 'comment_count': int,
919 'uploader_id': 'CTVJLaidlaw',
920 'repost_count': int,
921 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
922 'upload_date': '20221208',
923 'age_limit': 0,
924 'uploader': 'Jocelyn Laidlaw',
925 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
926 'like_count': int,
927 },
928 }, {
929 # URL specifies video number and --no-playlist
930 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
931 'info_dict': {
932 'id': '1600649511827013632',
933 'ext': 'mp4',
934 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
935 'thumbnail': r're:^https?://.+\.jpg',
936 'timestamp': 1670459604.0,
937 'uploader_id': 'CTVJLaidlaw',
938 'uploader': 'Jocelyn Laidlaw',
939 'repost_count': int,
940 'comment_count': int,
941 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
942 'duration': 102.226,
943 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
944 'display_id': '1600649710662213632',
945 'like_count': int,
946 'view_count': int,
947 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
948 'upload_date': '20221208',
949 'age_limit': 0,
950 },
951 'params': {'noplaylist': True},
952 }, {
953 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
954 # note the id different between extraction and url
955 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
956 'info_dict': {
957 'id': '1621117577354424321',
958 'display_id': '1621117700482416640',
959 'ext': 'mp4',
960 'title': '뽀 - 아 최우제 이동속도 봐',
961 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
962 'duration': 24.598,
963 'uploader': '뽀',
964 'uploader_id': 's2FAKER',
965 'uploader_url': 'https://twitter.com/s2FAKER',
966 'upload_date': '20230202',
967 'timestamp': 1675339553.0,
968 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
969 'age_limit': 18,
970 'tags': [],
971 'like_count': int,
972 'repost_count': int,
973 'comment_count': int,
974 'view_count': int,
975 },
976 }, {
977 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
978 'info_dict': {
979 'id': '1599108643743473680',
980 'display_id': '1599108751385972737',
981 'ext': 'mp4',
982 'title': '\u06ea - \U0001F48B',
983 'uploader_url': 'https://twitter.com/hlo_again',
984 'like_count': int,
985 'uploader_id': 'hlo_again',
986 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
987 'repost_count': int,
988 'duration': 9.531,
989 'comment_count': int,
990 'view_count': int,
991 'upload_date': '20221203',
992 'age_limit': 0,
993 'timestamp': 1670092210.0,
994 'tags': [],
995 'uploader': '\u06ea',
996 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
997 },
998 'params': {'noplaylist': True},
999 }, {
1000 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1001 'info_dict': {
1002 'id': '1600009362759733248',
1003 'display_id': '1600009574919962625',
1004 'ext': 'mp4',
1005 'uploader_url': 'https://twitter.com/MunTheShinobi',
1006 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1007 'view_count': int,
1008 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1009 'age_limit': 0,
1010 'uploader': 'Mün The Friend Of YWAP',
1011 'repost_count': int,
1012 'upload_date': '20221206',
1013 'title': 'Mün The Friend Of YWAP - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1014 'comment_count': int,
1015 'like_count': int,
1016 'tags': [],
1017 'uploader_id': 'MunTheShinobi',
1018 'duration': 139.987,
1019 'timestamp': 1670306984.0,
1020 },
1021 }, {
1022 # url to retweet id w/ legacy api
1023 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1024 'info_dict': {
1025 'id': '1623274794488659969',
1026 'display_id': '1623739803874349067',
1027 'ext': 'mp4',
1028 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1029 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1030 'uploader': 'Johnny Bullets',
1031 'uploader_id': 'Johnnybull3ts',
1032 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1033 'age_limit': 0,
1034 'tags': [],
1035 'duration': 8.033,
1036 'timestamp': 1675853859.0,
1037 'upload_date': '20230208',
1038 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1039 'like_count': int,
1040 'repost_count': int,
1041 },
1042 'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
1043 'skip': 'Protected tweet',
1044 }, {
1045 # orig tweet w/ graphql
1046 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1047 'info_dict': {
1048 'id': '1623274794488659969',
1049 'display_id': '1623739803874349067',
1050 'ext': 'mp4',
1051 'title': '@selfisekai@hackerspace.pl 🐀 - RT @Johnnybull3ts: Me after going viral to over 30million people: Whoopsie-daisy',
1052 'description': 'md5:9258bdbb54793bdc124fe1cd47e96c6a',
1053 'uploader': '@selfisekai@hackerspace.pl 🐀',
1054 'uploader_id': 'liberdalau',
1055 'uploader_url': 'https://twitter.com/liberdalau',
1056 'age_limit': 0,
1057 'tags': [],
1058 'duration': 8.033,
1059 'timestamp': 1675964711.0,
1060 'upload_date': '20230209',
1061 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1062 'like_count': int,
1063 'view_count': int,
1064 'repost_count': int,
1065 'comment_count': int,
1066 },
1067 'skip': 'Protected tweet',
1068 }, {
1069 # onion route
1070 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1071 'only_matching': True,
1072 }, {
1073 # Twitch Clip Embed
1074 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1075 'only_matching': True,
1076 }, {
1077 # promo_video_website card
1078 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1079 'only_matching': True,
1080 }, {
1081 # promo_video_convo card
1082 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1083 'only_matching': True,
1084 }, {
1085 # appplayer card
1086 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1087 'only_matching': True,
1088 }, {
1089 # video_direct_message card
1090 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1091 'only_matching': True,
1092 }, {
1093 # poll2choice_video card
1094 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1095 'only_matching': True,
1096 }, {
1097 # poll3choice_video card
1098 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1099 'only_matching': True,
1100 }, {
1101 # poll4choice_video card
1102 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1103 'only_matching': True,
1104 }]
1105
1106 def _graphql_to_legacy(self, data, twid):
1107 result = traverse_obj(data, (
1108 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1109 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1110 'tweet_results', 'result', ('tweet', None), {dict},
1111 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1112 data, ('tweetResult', 'result', {dict}), default={})
1113
1114 if result.get('__typename') not in ('Tweet', 'TweetTombstone', 'TweetUnavailable', None):
1115 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
1116
1117 if 'tombstone' in result:
1118 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1119 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1120 elif result.get('__typename') == 'TweetUnavailable':
1121 reason = result.get('reason')
1122 if reason == 'NsfwLoggedOut':
1123 self.raise_login_required('NSFW tweet requires authentication')
1124 elif reason == 'Protected':
1125 self.raise_login_required('You are not authorized to view this protected tweet')
1126 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1127
1128 status = result.get('legacy', {})
1129 status.update(traverse_obj(result, {
1130 'user': ('core', 'user_results', 'result', 'legacy'),
1131 'card': ('card', 'legacy'),
1132 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1133 }, expected_type=dict, default={}))
1134
1135 # extra transformation is needed since result does not match legacy format
1136 binding_values = {
1137 binding_value.get('key'): binding_value.get('value')
1138 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1139 }
1140 if binding_values:
1141 status['card']['binding_values'] = binding_values
1142
1143 return status
1144
1145 def _build_graphql_query(self, media_id):
1146 return {
1147 'variables': {
1148 'focalTweetId': media_id,
1149 'includePromotedContent': True,
1150 'with_rux_injections': False,
1151 'withBirdwatchNotes': True,
1152 'withCommunity': True,
1153 'withDownvotePerspective': False,
1154 'withQuickPromoteEligibilityTweetFields': True,
1155 'withReactionsMetadata': False,
1156 'withReactionsPerspective': False,
1157 'withSuperFollowsTweetFields': True,
1158 'withSuperFollowsUserFields': True,
1159 'withV2Timeline': True,
1160 'withVoice': True,
1161 },
1162 'features': {
1163 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1164 'interactive_text_enabled': True,
1165 'responsive_web_edit_tweet_api_enabled': True,
1166 'responsive_web_enhance_cards_enabled': True,
1167 'responsive_web_graphql_timeline_navigation_enabled': False,
1168 'responsive_web_text_conversations_enabled': False,
1169 'responsive_web_uc_gql_enabled': True,
1170 'standardized_nudges_misinfo': True,
1171 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1172 'tweetypie_unmention_optimization_enabled': True,
1173 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1174 'verified_phone_label_enabled': False,
1175 'vibe_api_enabled': True,
1176 },
1177 } if self.is_logged_in else {
1178 'variables': {
1179 'tweetId': media_id,
1180 'withCommunity': False,
1181 'includePromotedContent': False,
1182 'withVoice': False,
1183 },
1184 'features': {
1185 'creator_subscriptions_tweet_preview_api_enabled': True,
1186 'tweetypie_unmention_optimization_enabled': True,
1187 'responsive_web_edit_tweet_api_enabled': True,
1188 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1189 'view_counts_everywhere_api_enabled': True,
1190 'longform_notetweets_consumption_enabled': True,
1191 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1192 'tweet_awards_web_tipping_enabled': False,
1193 'freedom_of_speech_not_reach_fetch_enabled': True,
1194 'standardized_nudges_misinfo': True,
1195 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1196 'longform_notetweets_rich_text_read_enabled': True,
1197 'longform_notetweets_inline_media_enabled': True,
1198 'responsive_web_graphql_exclude_directive_enabled': True,
1199 'verified_phone_label_enabled': False,
1200 'responsive_web_media_download_video_enabled': False,
1201 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1202 'responsive_web_graphql_timeline_navigation_enabled': True,
1203 'responsive_web_enhance_cards_enabled': False
1204 },
1205 'fieldToggles': {
1206 'withArticleRichContentState': False
1207 }
1208 }
1209
1210 def _extract_status(self, twid):
1211 if self.is_logged_in:
1212 return self._graphql_to_legacy(
1213 self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
1214
1215 try:
1216 if not self._configuration_arg('legacy_api'):
1217 return self._graphql_to_legacy(
1218 self._call_graphql_api('2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId', twid), twid)
1219 return traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
1220 'cards_platform': 'Web-12',
1221 'include_cards': 1,
1222 'include_reply_count': 1,
1223 'include_user_entities': 0,
1224 'tweet_mode': 'extended',
1225 }), 'retweeted_status', None)
1226
1227 except ExtractorError as e:
1228 if e.expected:
1229 raise
1230 self.report_warning(
1231 f'{e.orig_msg}. Falling back to syndication endpoint; some metadata may be missing', twid)
1232
1233 status = self._download_json(
1234 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1235 headers={'User-Agent': 'Googlebot'}, query={'id': twid})
1236 status['extended_entities'] = {'media': status.get('mediaDetails')}
1237 return status
1238
1239 def _real_extract(self, url):
1240 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1241 status = self._extract_status(twid)
1242
1243 title = description = traverse_obj(
1244 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1245 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1246 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1247 user = status.get('user') or {}
1248 uploader = user.get('name')
1249 if uploader:
1250 title = f'{uploader} - {title}'
1251 uploader_id = user.get('screen_name')
1252
1253 info = {
1254 'id': twid,
1255 'title': title,
1256 'description': description,
1257 'uploader': uploader,
1258 'timestamp': unified_timestamp(status.get('created_at')),
1259 'uploader_id': uploader_id,
1260 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1261 'like_count': int_or_none(status.get('favorite_count')),
1262 'repost_count': int_or_none(status.get('retweet_count')),
1263 'comment_count': int_or_none(status.get('reply_count')),
1264 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1265 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1266 }
1267
1268 def extract_from_video_info(media):
1269 media_id = traverse_obj(media, 'id_str', 'id', (
1270 'video_info', 'variants', ..., 'url',
1271 {functools.partial(re.search, r'_video/(\d+)/')}, 1
1272 ), get_all=False, expected_type=str_or_none) or twid
1273 self.write_debug(f'Extracting from video info: {media_id}')
1274
1275 formats = []
1276 subtitles = {}
1277 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1278 fmts, subs = self._extract_variant_formats(variant, twid)
1279 subtitles = self._merge_subtitles(subtitles, subs)
1280 formats.extend(fmts)
1281
1282 thumbnails = []
1283 media_url = media.get('media_url_https') or media.get('media_url')
1284 if media_url:
1285 def add_thumbnail(name, size):
1286 thumbnails.append({
1287 'id': name,
1288 'url': update_url_query(media_url, {'name': name}),
1289 'width': int_or_none(size.get('w') or size.get('width')),
1290 'height': int_or_none(size.get('h') or size.get('height')),
1291 })
1292 for name, size in media.get('sizes', {}).items():
1293 add_thumbnail(name, size)
1294 add_thumbnail('orig', media.get('original_info') or {})
1295
1296 return {
1297 'id': media_id,
1298 'formats': formats,
1299 'subtitles': subtitles,
1300 'thumbnails': thumbnails,
1301 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1302 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1303 # The codec of http formats are unknown
1304 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1305 }
1306
1307 def extract_from_card_info(card):
1308 if not card:
1309 return
1310
1311 self.write_debug(f'Extracting from card info: {card.get("url")}')
1312 binding_values = card['binding_values']
1313
1314 def get_binding_value(k):
1315 o = binding_values.get(k) or {}
1316 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1317
1318 card_name = card['name'].split(':')[-1]
1319 if card_name == 'player':
1320 yield {
1321 '_type': 'url',
1322 'url': get_binding_value('player_url'),
1323 }
1324 elif card_name == 'periscope_broadcast':
1325 yield {
1326 '_type': 'url',
1327 'url': get_binding_value('url') or get_binding_value('player_url'),
1328 'ie_key': PeriscopeIE.ie_key(),
1329 }
1330 elif card_name == 'broadcast':
1331 yield {
1332 '_type': 'url',
1333 'url': get_binding_value('broadcast_url'),
1334 'ie_key': TwitterBroadcastIE.ie_key(),
1335 }
1336 elif card_name == 'audiospace':
1337 yield {
1338 '_type': 'url',
1339 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1340 'ie_key': TwitterSpacesIE.ie_key(),
1341 }
1342 elif card_name == 'summary':
1343 yield {
1344 '_type': 'url',
1345 'url': get_binding_value('card_url'),
1346 }
1347 elif card_name == 'unified_card':
1348 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1349 yield from map(extract_from_video_info, traverse_obj(
1350 unified_card, ('media_entities', ...), expected_type=dict))
1351 # amplify, promo_video_website, promo_video_convo, appplayer,
1352 # video_direct_message, poll2choice_video, poll3choice_video,
1353 # poll4choice_video, ...
1354 else:
1355 is_amplify = card_name == 'amplify'
1356 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1357 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1358 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1359
1360 thumbnails = []
1361 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1362 image = get_binding_value('player_image' + suffix) or {}
1363 image_url = image.get('url')
1364 if not image_url or '/player-placeholder' in image_url:
1365 continue
1366 thumbnails.append({
1367 'id': suffix[1:] if suffix else 'medium',
1368 'url': image_url,
1369 'width': int_or_none(image.get('width')),
1370 'height': int_or_none(image.get('height')),
1371 })
1372
1373 yield {
1374 'formats': formats,
1375 'subtitles': subtitles,
1376 'thumbnails': thumbnails,
1377 'duration': int_or_none(get_binding_value(
1378 'content_duration_seconds')),
1379 }
1380
1381 videos = traverse_obj(status, (
1382 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1383
1384 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1385 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1386 else:
1387 desired_obj = traverse_obj(status, (
1388 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1389 if not desired_obj:
1390 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1391 elif desired_obj.get('type') != 'video':
1392 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1393
1394 # Restore original archive id and video index in title
1395 for index, entry in enumerate(videos, 1):
1396 if entry.get('id') != desired_obj.get('id'):
1397 continue
1398 if index == 1:
1399 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1400 if len(videos) != 1:
1401 info['title'] += f' #{index}'
1402 break
1403
1404 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1405
1406 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1407 if not entries:
1408 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1409 if not expanded_url or expanded_url == url:
1410 self.raise_no_formats('No video could be found in this tweet', expected=True)
1411 return info
1412
1413 return self.url_result(expanded_url, display_id=twid, **info)
1414
1415 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1416
1417 if len(entries) == 1:
1418 return entries[0]
1419
1420 for index, entry in enumerate(entries, 1):
1421 entry['title'] += f' #{index}'
1422
1423 return self.playlist_result(entries, **info)
1424
1425
1426 class TwitterAmplifyIE(TwitterBaseIE):
1427 IE_NAME = 'twitter:amplify'
1428 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1429
1430 _TEST = {
1431 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1432 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1433 'info_dict': {
1434 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1435 'ext': 'mp4',
1436 'title': 'Twitter Video',
1437 'thumbnail': 're:^https?://.*',
1438 },
1439 'params': {'format': '[protocol=https]'},
1440 }
1441
1442 def _real_extract(self, url):
1443 video_id = self._match_id(url)
1444 webpage = self._download_webpage(url, video_id)
1445
1446 vmap_url = self._html_search_meta(
1447 'twitter:amplify:vmap', webpage, 'vmap url')
1448 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1449
1450 thumbnails = []
1451 thumbnail = self._html_search_meta(
1452 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1453
1454 def _find_dimension(target):
1455 w = int_or_none(self._html_search_meta(
1456 'twitter:%s:width' % target, webpage, fatal=False))
1457 h = int_or_none(self._html_search_meta(
1458 'twitter:%s:height' % target, webpage, fatal=False))
1459 return w, h
1460
1461 if thumbnail:
1462 thumbnail_w, thumbnail_h = _find_dimension('image')
1463 thumbnails.append({
1464 'url': thumbnail,
1465 'width': thumbnail_w,
1466 'height': thumbnail_h,
1467 })
1468
1469 video_w, video_h = _find_dimension('player')
1470 formats[0].update({
1471 'width': video_w,
1472 'height': video_h,
1473 })
1474
1475 return {
1476 'id': video_id,
1477 'title': 'Twitter Video',
1478 'formats': formats,
1479 'thumbnails': thumbnails,
1480 }
1481
1482
1483 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1484 IE_NAME = 'twitter:broadcast'
1485 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1486
1487 _TEST = {
1488 # untitled Periscope video
1489 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1490 'info_dict': {
1491 'id': '1yNGaQLWpejGj',
1492 'ext': 'mp4',
1493 'title': 'Andrea May Sahouri - Periscope Broadcast',
1494 'uploader': 'Andrea May Sahouri',
1495 'uploader_id': '1PXEdBZWpGwKe',
1496 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1497 'view_count': int,
1498 },
1499 }
1500
1501 def _real_extract(self, url):
1502 broadcast_id = self._match_id(url)
1503 broadcast = self._call_api(
1504 'broadcasts/show.json', broadcast_id,
1505 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1506 info = self._parse_broadcast_data(broadcast, broadcast_id)
1507 media_key = broadcast['media_key']
1508 source = self._call_api(
1509 f'live_video_stream/status/{media_key}', media_key)['source']
1510 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1511 if '/live_video_stream/geoblocked/' in m3u8_url:
1512 self.raise_geo_restricted()
1513 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1514 m3u8_url).query).get('type', [None])[0]
1515 state, width, height = self._extract_common_format_info(broadcast)
1516 info['formats'] = self._extract_pscp_m3u8_formats(
1517 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1518 return info
1519
1520
1521 class TwitterSpacesIE(TwitterBaseIE):
1522 IE_NAME = 'twitter:spaces'
1523 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1524
1525 _TESTS = [{
1526 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1527 'info_dict': {
1528 'id': '1RDxlgyvNXzJL',
1529 'ext': 'm4a',
1530 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1531 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1532 'uploader': r're:Lucio Di Gaetano.*?',
1533 'uploader_id': 'luciodigaetano',
1534 'live_status': 'was_live',
1535 'timestamp': 1659877956,
1536 'upload_date': '20220807',
1537 'release_timestamp': 1659904215,
1538 'release_date': '20220807',
1539 },
1540 'params': {'skip_download': 'm3u8'},
1541 }, {
1542 # post_live/TimedOut but downloadable
1543 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1544 'info_dict': {
1545 'id': '1vAxRAVQWONJl',
1546 'ext': 'm4a',
1547 'title': 'Framing Up FinOps: Billing Tools',
1548 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1549 'uploader': 'Google Cloud',
1550 'uploader_id': 'googlecloud',
1551 'live_status': 'post_live',
1552 'timestamp': 1681409554,
1553 'upload_date': '20230413',
1554 'release_timestamp': 1681839000,
1555 'release_date': '20230418',
1556 },
1557 'params': {'skip_download': 'm3u8'},
1558 }, {
1559 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1560 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1561 'info_dict': {
1562 'id': '1eaKbrQbjoRKX',
1563 'ext': 'm4a',
1564 'title': 'あ',
1565 'description': 'Twitter Space participated by nobody yet',
1566 'uploader': '息根とめる🔪Twitchで復活',
1567 'uploader_id': 'tomeru_ikinone',
1568 'live_status': 'was_live',
1569 'timestamp': 1685617198,
1570 'upload_date': '20230601',
1571 },
1572 'params': {'skip_download': 'm3u8'},
1573 }]
1574
1575 SPACE_STATUS = {
1576 'notstarted': 'is_upcoming',
1577 'ended': 'was_live',
1578 'running': 'is_live',
1579 'timedout': 'post_live',
1580 }
1581
1582 def _build_graphql_query(self, space_id):
1583 return {
1584 'variables': {
1585 'id': space_id,
1586 'isMetatagsQuery': True,
1587 'withDownvotePerspective': False,
1588 'withReactionsMetadata': False,
1589 'withReactionsPerspective': False,
1590 'withReplays': True,
1591 'withSuperFollowsUserFields': True,
1592 'withSuperFollowsTweetFields': True,
1593 },
1594 'features': {
1595 'dont_mention_me_view_api_enabled': True,
1596 'interactive_text_enabled': True,
1597 'responsive_web_edit_tweet_api_enabled': True,
1598 'responsive_web_enhance_cards_enabled': True,
1599 'responsive_web_uc_gql_enabled': True,
1600 'spaces_2022_h2_clipping': True,
1601 'spaces_2022_h2_spaces_communities': False,
1602 'standardized_nudges_misinfo': True,
1603 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1604 'vibe_api_enabled': True,
1605 },
1606 }
1607
1608 def _real_extract(self, url):
1609 space_id = self._match_id(url)
1610 if not self.is_logged_in:
1611 self.raise_login_required('Twitter Spaces require authentication')
1612 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1613 if not space_data:
1614 raise ExtractorError('Twitter Space not found', expected=True)
1615
1616 metadata = space_data['metadata']
1617 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1618 is_live = live_status == 'is_live'
1619
1620 formats = []
1621 headers = {'Referer': 'https://twitter.com/'}
1622 if live_status == 'is_upcoming':
1623 self.raise_no_formats('Twitter Space not started yet', expected=True)
1624 elif not is_live and not metadata.get('is_space_available_for_replay'):
1625 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1626 elif metadata.get('media_key'):
1627 source = traverse_obj(
1628 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1629 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1630 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1631 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
1632 headers=headers, fatal=False) if source else []
1633 for fmt in formats:
1634 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1635 if not is_live:
1636 fmt['container'] = 'm4a_dash'
1637
1638 participants = ', '.join(traverse_obj(
1639 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1640
1641 if not formats and live_status == 'post_live':
1642 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1643
1644 return {
1645 'id': space_id,
1646 'title': metadata.get('title'),
1647 'description': f'Twitter Space participated by {participants}',
1648 'uploader': traverse_obj(
1649 metadata, ('creator_results', 'result', 'legacy', 'name')),
1650 'uploader_id': traverse_obj(
1651 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1652 'live_status': live_status,
1653 'release_timestamp': try_call(
1654 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1655 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1656 'formats': formats,
1657 'http_headers': headers,
1658 }
1659
1660
1661 class TwitterShortenerIE(TwitterBaseIE):
1662 IE_NAME = 'twitter:shortener'
1663 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1664 _BASE_URL = 'https://t.co/'
1665
1666 def _real_extract(self, url):
1667 mobj = self._match_valid_url(url)
1668 eid, id = mobj.group('eid', 'id')
1669 if eid:
1670 id = eid
1671 url = self._BASE_URL + id
1672 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
1673 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1674 if new_url.startswith(__UNSAFE_LINK):
1675 new_url = new_url.replace(__UNSAFE_LINK, "")
1676 return self.url_result(new_url)