]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[extractor/twitter] Fix unauthenticated extraction (#7476)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import json
2 import re
3 import urllib.error
4
5 from .common import InfoExtractor
6 from .periscope import PeriscopeBaseIE, PeriscopeIE
7 from ..compat import (
8 compat_parse_qs,
9 compat_urllib_parse_unquote,
10 compat_urllib_parse_urlparse,
11 )
12 from ..utils import (
13 ExtractorError,
14 dict_get,
15 float_or_none,
16 format_field,
17 int_or_none,
18 make_archive_id,
19 remove_end,
20 str_or_none,
21 strip_or_none,
22 traverse_obj,
23 try_call,
24 try_get,
25 unified_timestamp,
26 update_url_query,
27 url_or_none,
28 xpath_text,
29 )
30
31
32 class TwitterBaseIE(InfoExtractor):
33 _NETRC_MACHINE = 'twitter'
34 _API_BASE = 'https://api.twitter.com/1.1/'
35 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
36 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
37 _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
38 _flow_token = None
39
40 _LOGIN_INIT_DATA = json.dumps({
41 'input_flow_data': {
42 'flow_context': {
43 'debug_overrides': {},
44 'start_location': {
45 'location': 'unknown'
46 }
47 }
48 },
49 'subtask_versions': {
50 'action_list': 2,
51 'alert_dialog': 1,
52 'app_download_cta': 1,
53 'check_logged_in_account': 1,
54 'choice_selection': 3,
55 'contacts_live_sync_permission_prompt': 0,
56 'cta': 7,
57 'email_verification': 2,
58 'end_flow': 1,
59 'enter_date': 1,
60 'enter_email': 2,
61 'enter_password': 5,
62 'enter_phone': 2,
63 'enter_recaptcha': 1,
64 'enter_text': 5,
65 'enter_username': 2,
66 'generic_urt': 3,
67 'in_app_notification': 1,
68 'interest_picker': 3,
69 'js_instrumentation': 1,
70 'menu_dialog': 1,
71 'notifications_permission_prompt': 2,
72 'open_account': 2,
73 'open_home_timeline': 1,
74 'open_link': 1,
75 'phone_verification': 4,
76 'privacy_options': 1,
77 'security_key': 3,
78 'select_avatar': 4,
79 'select_banner': 2,
80 'settings_list': 7,
81 'show_code': 1,
82 'sign_up': 2,
83 'sign_up_review': 4,
84 'tweet_selection_urt': 1,
85 'update_users': 1,
86 'upload_media': 1,
87 'user_recommendations_list': 4,
88 'user_recommendations_urt': 1,
89 'wait_spinner': 3,
90 'web_modal': 1
91 }
92 }, separators=(',', ':')).encode()
93
94 def _extract_variant_formats(self, variant, video_id):
95 variant_url = variant.get('url')
96 if not variant_url:
97 return [], {}
98 elif '.m3u8' in variant_url:
99 return self._extract_m3u8_formats_and_subtitles(
100 variant_url, video_id, 'mp4', 'm3u8_native',
101 m3u8_id='hls', fatal=False)
102 else:
103 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
104 f = {
105 'url': variant_url,
106 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
107 'tbr': tbr,
108 }
109 self._search_dimensions_in_video_url(f, variant_url)
110 return [f], {}
111
112 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
113 vmap_url = url_or_none(vmap_url)
114 if not vmap_url:
115 return [], {}
116 vmap_data = self._download_xml(vmap_url, video_id)
117 formats = []
118 subtitles = {}
119 urls = []
120 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
121 video_variant.attrib['url'] = compat_urllib_parse_unquote(
122 video_variant.attrib['url'])
123 urls.append(video_variant.attrib['url'])
124 fmts, subs = self._extract_variant_formats(
125 video_variant.attrib, video_id)
126 formats.extend(fmts)
127 subtitles = self._merge_subtitles(subtitles, subs)
128 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
129 if video_url not in urls:
130 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
131 formats.extend(fmts)
132 subtitles = self._merge_subtitles(subtitles, subs)
133 return formats, subtitles
134
135 @staticmethod
136 def _search_dimensions_in_video_url(a_format, video_url):
137 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
138 if m:
139 a_format.update({
140 'width': int(m.group('width')),
141 'height': int(m.group('height')),
142 })
143
144 @property
145 def is_logged_in(self):
146 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
147
148 def _set_base_headers(self):
149 headers = self._AUTH.copy()
150 csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
151 if csrf_token:
152 headers['x-csrf-token'] = csrf_token
153 return headers
154
155 def _call_login_api(self, note, headers, query={}, data=None):
156 response = self._download_json(
157 f'{self._API_BASE}onboarding/task.json', None, note,
158 headers=headers, query=query, data=data, expected_status=400)
159 error = traverse_obj(response, ('errors', 0, 'message', {str}))
160 if error:
161 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
162 elif traverse_obj(response, 'status') != 'success':
163 raise ExtractorError('Login was unsuccessful')
164
165 subtask = traverse_obj(
166 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
167 if not subtask:
168 raise ExtractorError('Twitter API did not return next login subtask')
169
170 self._flow_token = response['flow_token']
171
172 return subtask
173
174 def _perform_login(self, username, password):
175 if self.is_logged_in:
176 return
177
178 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
179 headers = self._set_base_headers()
180 guest_token = self._search_regex(
181 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._download_json(
182 f'{self._API_BASE}guest/activate.json', None, 'Downloading guest token',
183 data=b'', headers=headers)['guest_token']
184 headers.update({
185 'content-type': 'application/json',
186 'x-guest-token': guest_token,
187 'x-twitter-client-language': 'en',
188 'x-twitter-active-user': 'yes',
189 'Referer': 'https://twitter.com/',
190 'Origin': 'https://twitter.com',
191 })
192
193 def build_login_json(*subtask_inputs):
194 return json.dumps({
195 'flow_token': self._flow_token,
196 'subtask_inputs': subtask_inputs
197 }, separators=(',', ':')).encode()
198
199 def input_dict(subtask_id, text):
200 return {
201 'subtask_id': subtask_id,
202 'enter_text': {
203 'text': text,
204 'link': 'next_link'
205 }
206 }
207
208 next_subtask = self._call_login_api(
209 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
210
211 while not self.is_logged_in:
212 if next_subtask == 'LoginJsInstrumentationSubtask':
213 next_subtask = self._call_login_api(
214 'Submitting JS instrumentation response', headers, data=build_login_json({
215 'subtask_id': next_subtask,
216 'js_instrumentation': {
217 'response': '{}',
218 'link': 'next_link'
219 }
220 }))
221
222 elif next_subtask == 'LoginEnterUserIdentifierSSO':
223 next_subtask = self._call_login_api(
224 'Submitting username', headers, data=build_login_json({
225 'subtask_id': next_subtask,
226 'settings_list': {
227 'setting_responses': [{
228 'key': 'user_identifier',
229 'response_data': {
230 'text_data': {
231 'result': username
232 }
233 }
234 }],
235 'link': 'next_link'
236 }
237 }))
238
239 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
240 next_subtask = self._call_login_api(
241 'Submitting alternate identifier', headers,
242 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
243 'one of username, phone number or email that was not used as --username'))))
244
245 elif next_subtask == 'LoginEnterPassword':
246 next_subtask = self._call_login_api(
247 'Submitting password', headers, data=build_login_json({
248 'subtask_id': next_subtask,
249 'enter_password': {
250 'password': password,
251 'link': 'next_link'
252 }
253 }))
254
255 elif next_subtask == 'AccountDuplicationCheck':
256 next_subtask = self._call_login_api(
257 'Submitting account duplication check', headers, data=build_login_json({
258 'subtask_id': next_subtask,
259 'check_logged_in_account': {
260 'link': 'AccountDuplicationCheck_false'
261 }
262 }))
263
264 elif next_subtask == 'LoginTwoFactorAuthChallenge':
265 next_subtask = self._call_login_api(
266 'Submitting 2FA token', headers, data=build_login_json(input_dict(
267 next_subtask, self._get_tfa_info('two-factor authentication token'))))
268
269 elif next_subtask == 'LoginAcid':
270 next_subtask = self._call_login_api(
271 'Submitting confirmation code', headers, data=build_login_json(input_dict(
272 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
273
274 elif next_subtask == 'LoginSuccessSubtask':
275 raise ExtractorError('Twitter API did not grant auth token cookie')
276
277 else:
278 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
279
280 self.report_login()
281
282 def _call_api(self, path, video_id, query={}, graphql=False):
283 if not self.is_logged_in:
284 self.raise_login_required()
285
286 result = self._download_json(
287 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, video_id,
288 f'Downloading {"GraphQL" if graphql else "legacy API"} JSON', headers={
289 **self._set_base_headers(),
290 'x-twitter-auth-type': 'OAuth2Session',
291 'x-twitter-client-language': 'en',
292 'x-twitter-active-user': 'yes',
293 }, query=query, expected_status={400, 401, 403, 404} if graphql else {403})
294
295 if result.get('errors'):
296 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
297 raise ExtractorError(
298 f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
299
300 return result
301
302 def _build_graphql_query(self, media_id):
303 raise NotImplementedError('Method must be implemented to support GraphQL')
304
305 def _call_graphql_api(self, endpoint, media_id):
306 data = self._build_graphql_query(media_id)
307 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
308 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
309
310
311 class TwitterCardIE(InfoExtractor):
312 IE_NAME = 'twitter:card'
313 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
314 _TESTS = [
315 {
316 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
317 # MD5 checksums are different in different places
318 'info_dict': {
319 'id': '560070131976392705',
320 'ext': 'mp4',
321 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
322 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
323 'uploader': 'Twitter',
324 'uploader_id': 'Twitter',
325 'thumbnail': r're:^https?://.*\.jpg',
326 'duration': 30.033,
327 'timestamp': 1422366112,
328 'upload_date': '20150127',
329 'age_limit': 0,
330 'comment_count': int,
331 'tags': [],
332 'repost_count': int,
333 'like_count': int,
334 'display_id': '560070183650213889',
335 'uploader_url': 'https://twitter.com/Twitter',
336 },
337 },
338 {
339 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
340 'md5': '7137eca597f72b9abbe61e5ae0161399',
341 'info_dict': {
342 'id': '623160978427936768',
343 'ext': 'mp4',
344 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
345 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
346 'uploader': 'NASA',
347 'uploader_id': 'NASA',
348 'timestamp': 1437408129,
349 'upload_date': '20150720',
350 'uploader_url': 'https://twitter.com/NASA',
351 'age_limit': 0,
352 'comment_count': int,
353 'like_count': int,
354 'repost_count': int,
355 'tags': ['PlutoFlyby'],
356 },
357 'params': {'format': '[protocol=https]'}
358 },
359 {
360 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
361 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
362 'info_dict': {
363 'id': 'dq4Oj5quskI',
364 'ext': 'mp4',
365 'title': 'Ubuntu 11.10 Overview',
366 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
367 'upload_date': '20111013',
368 'uploader': 'OMG! UBUNTU!',
369 'uploader_id': 'omgubuntu',
370 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
371 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
372 'channel_follower_count': int,
373 'chapters': 'count:8',
374 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
375 'duration': 138,
376 'categories': ['Film & Animation'],
377 'age_limit': 0,
378 'comment_count': int,
379 'availability': 'public',
380 'like_count': int,
381 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
382 'view_count': int,
383 'tags': 'count:12',
384 'channel': 'OMG! UBUNTU!',
385 'playable_in_embed': True,
386 },
387 'add_ie': ['Youtube'],
388 },
389 {
390 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
391 'info_dict': {
392 'id': 'iBb2x00UVlv',
393 'ext': 'mp4',
394 'upload_date': '20151113',
395 'uploader_id': '1189339351084113920',
396 'uploader': 'ArsenalTerje',
397 'title': 'Vine by ArsenalTerje',
398 'timestamp': 1447451307,
399 'alt_title': 'Vine by ArsenalTerje',
400 'comment_count': int,
401 'like_count': int,
402 'thumbnail': r're:^https?://[^?#]+\.jpg',
403 'view_count': int,
404 'repost_count': int,
405 },
406 'add_ie': ['Vine'],
407 'params': {'skip_download': 'm3u8'},
408 },
409 {
410 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
411 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
412 'info_dict': {
413 'id': '705235433198714880',
414 'ext': 'mp4',
415 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
416 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
417 'uploader': 'Brent Yarina',
418 'uploader_id': 'BTNBrentYarina',
419 'timestamp': 1456976204,
420 'upload_date': '20160303',
421 },
422 'skip': 'This content is no longer available.',
423 },
424 {
425 'url': 'https://twitter.com/i/videos/752274308186120192',
426 'only_matching': True,
427 },
428 ]
429
430 def _real_extract(self, url):
431 status_id = self._match_id(url)
432 return self.url_result(
433 'https://twitter.com/statuses/' + status_id,
434 TwitterIE.ie_key(), status_id)
435
436
437 class TwitterIE(TwitterBaseIE):
438 IE_NAME = 'twitter'
439 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
440
441 _TESTS = [{
442 # comment_count, repost_count, view_count are only available with auth (applies to all tests)
443 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
444 'info_dict': {
445 'id': '643211870443208704',
446 'display_id': '643211948184596480',
447 'ext': 'mp4',
448 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
449 'thumbnail': r're:^https?://.*\.jpg',
450 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
451 'uploader': 'FREE THE NIPPLE',
452 'uploader_id': 'freethenipple',
453 'duration': 12.922,
454 'timestamp': 1442188653,
455 'upload_date': '20150913',
456 'uploader_url': 'https://twitter.com/freethenipple',
457 'like_count': int,
458 'tags': [],
459 'age_limit': 18,
460 },
461 }, {
462 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
463 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
464 'info_dict': {
465 'id': '657991469417025536',
466 'ext': 'mp4',
467 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
468 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
469 'thumbnail': r're:^https?://.*\.png',
470 'uploader': 'Gifs',
471 'uploader_id': 'giphz',
472 },
473 'expected_warnings': ['height', 'width'],
474 'skip': 'Account suspended',
475 }, {
476 'url': 'https://twitter.com/starwars/status/665052190608723968',
477 'info_dict': {
478 'id': '665052190608723968',
479 'display_id': '665052190608723968',
480 'ext': 'mp4',
481 'title': r're:Star Wars.*A new beginning is coming December 18.*',
482 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
483 'uploader_id': 'starwars',
484 'uploader': r're:Star Wars.*',
485 'timestamp': 1447395772,
486 'upload_date': '20151113',
487 'uploader_url': 'https://twitter.com/starwars',
488 'like_count': int,
489 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
490 'age_limit': 0,
491 },
492 }, {
493 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
494 'info_dict': {
495 'id': '705235433198714880',
496 'ext': 'mp4',
497 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
498 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
499 'uploader_id': 'BTNBrentYarina',
500 'uploader': 'Brent Yarina',
501 'timestamp': 1456976204,
502 'upload_date': '20160303',
503 'uploader_url': 'https://twitter.com/BTNBrentYarina',
504 'comment_count': int,
505 'repost_count': int,
506 'like_count': int,
507 'tags': [],
508 'age_limit': 0,
509 },
510 'params': {
511 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
512 # Test case of TwitterCardIE
513 'skip_download': True,
514 },
515 'skip': 'Dead external link',
516 }, {
517 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
518 'info_dict': {
519 'id': '700207414000242688',
520 'display_id': '700207533655363584',
521 'ext': 'mp4',
522 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
523 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
524 'thumbnail': r're:^https?://.*\.jpg',
525 'uploader': 'jaydin donte geer',
526 'uploader_id': 'jaydingeer',
527 'duration': 30.0,
528 'timestamp': 1455777459,
529 'upload_date': '20160218',
530 'uploader_url': 'https://twitter.com/jaydingeer',
531 'like_count': int,
532 'tags': ['Damndaniel'],
533 'age_limit': 0,
534 },
535 }, {
536 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
537 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
538 'info_dict': {
539 'id': 'MIOxnrUteUd',
540 'ext': 'mp4',
541 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
542 'uploader': 'TAKUMA',
543 'uploader_id': '1004126642786242560',
544 'timestamp': 1402826626,
545 'upload_date': '20140615',
546 'thumbnail': r're:^https?://.*\.jpg',
547 'alt_title': 'Vine by TAKUMA',
548 'comment_count': int,
549 'repost_count': int,
550 'like_count': int,
551 'view_count': int,
552 },
553 'add_ie': ['Vine'],
554 }, {
555 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
556 'info_dict': {
557 'id': '717462543795523584',
558 'display_id': '719944021058060289',
559 'ext': 'mp4',
560 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
561 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
562 'uploader_id': 'CaptainAmerica',
563 'uploader': 'Captain America',
564 'duration': 3.17,
565 'timestamp': 1460483005,
566 'upload_date': '20160412',
567 'uploader_url': 'https://twitter.com/CaptainAmerica',
568 'thumbnail': r're:^https?://.*\.jpg',
569 'like_count': int,
570 'tags': [],
571 'age_limit': 0,
572 },
573 }, {
574 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
575 'info_dict': {
576 'id': '1zqKVVlkqLaKB',
577 'ext': 'mp4',
578 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
579 'upload_date': '20160923',
580 'uploader_id': '1PmKqpJdOJQoY',
581 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
582 'timestamp': 1474613214,
583 'thumbnail': r're:^https?://.*\.jpg',
584 },
585 'add_ie': ['Periscope'],
586 }, {
587 # has mp4 formats via mobile API
588 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
589 'info_dict': {
590 'id': '852138619213144067',
591 'ext': 'mp4',
592 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
593 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
594 'uploader': 'عالم الأخبار',
595 'uploader_id': 'news_al3alm',
596 'duration': 277.4,
597 'timestamp': 1492000653,
598 'upload_date': '20170412',
599 },
600 'skip': 'Account suspended',
601 }, {
602 'url': 'https://twitter.com/i/web/status/910031516746514432',
603 'info_dict': {
604 'id': '910030238373089285',
605 'display_id': '910031516746514432',
606 'ext': 'mp4',
607 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
608 'thumbnail': r're:^https?://.*\.jpg',
609 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
610 'uploader': 'Préfet de Guadeloupe',
611 'uploader_id': 'Prefet971',
612 'duration': 47.48,
613 'timestamp': 1505803395,
614 'upload_date': '20170919',
615 'uploader_url': 'https://twitter.com/Prefet971',
616 'like_count': int,
617 'tags': ['Maria'],
618 'age_limit': 0,
619 },
620 'params': {
621 'skip_download': True, # requires ffmpeg
622 },
623 }, {
624 # card via api.twitter.com/1.1/videos/tweet/config
625 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
626 'info_dict': {
627 'id': '1001551417340022785',
628 'display_id': '1001551623938805763',
629 'ext': 'mp4',
630 'title': 're:.*?Shep is on a roll today.*?',
631 'thumbnail': r're:^https?://.*\.jpg',
632 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
633 'uploader': 'Lis Power',
634 'uploader_id': 'LisPower1',
635 'duration': 111.278,
636 'timestamp': 1527623489,
637 'upload_date': '20180529',
638 'uploader_url': 'https://twitter.com/LisPower1',
639 'like_count': int,
640 'tags': [],
641 'age_limit': 0,
642 },
643 'params': {
644 'skip_download': True, # requires ffmpeg
645 },
646 }, {
647 'url': 'https://twitter.com/foobar/status/1087791357756956680',
648 'info_dict': {
649 'id': '1087791272830607360',
650 'display_id': '1087791357756956680',
651 'ext': 'mp4',
652 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
653 'thumbnail': r're:^https?://.*\.jpg',
654 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
655 'uploader': 'Twitter',
656 'uploader_id': 'Twitter',
657 'duration': 61.567,
658 'timestamp': 1548184644,
659 'upload_date': '20190122',
660 'uploader_url': 'https://twitter.com/Twitter',
661 'like_count': int,
662 'tags': [],
663 'age_limit': 0,
664 },
665 }, {
666 # not available in Periscope
667 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
668 'info_dict': {
669 'id': '1vOGwqejwoWxB',
670 'ext': 'mp4',
671 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
672 'uploader': 'Vivi',
673 'uploader_id': '1eVjYOLGkGrQL',
674 'thumbnail': r're:^https?://.*\.jpg',
675 'tags': ['EduTECH2019'],
676 'view_count': int,
677 },
678 'add_ie': ['TwitterBroadcast'],
679 'skip': 'Requires authentication',
680 }, {
681 # unified card
682 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
683 'info_dict': {
684 'id': '1349774757969989634',
685 'display_id': '1349794411333394432',
686 'ext': 'mp4',
687 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
688 'thumbnail': r're:^https?://.*\.jpg',
689 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
690 'uploader': 'Brooklyn Nets',
691 'uploader_id': 'BrooklynNets',
692 'duration': 324.484,
693 'timestamp': 1610651040,
694 'upload_date': '20210114',
695 'uploader_url': 'https://twitter.com/BrooklynNets',
696 'like_count': int,
697 'tags': [],
698 'age_limit': 0,
699 },
700 'params': {
701 'skip_download': True,
702 },
703 }, {
704 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
705 'info_dict': {
706 'id': '1577855447914409984',
707 'display_id': '1577855540407197696',
708 'ext': 'mp4',
709 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
710 'description': 'md5:b9c3699335447391d11753ab21c70a74',
711 'upload_date': '20221006',
712 'uploader': 'oshtru',
713 'uploader_id': 'oshtru',
714 'uploader_url': 'https://twitter.com/oshtru',
715 'thumbnail': r're:^https?://.*\.jpg',
716 'duration': 30.03,
717 'timestamp': 1665025050,
718 'like_count': int,
719 'tags': [],
720 'age_limit': 0,
721 },
722 'params': {'skip_download': True},
723 }, {
724 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
725 'info_dict': {
726 'id': '1577719286659006464',
727 'title': 'Ultima📛 | #вʟм - Test',
728 'description': 'Test https://t.co/Y3KEZD7Dad',
729 'uploader': 'Ultima📛 | #вʟм',
730 'uploader_id': 'UltimaShadowX',
731 'uploader_url': 'https://twitter.com/UltimaShadowX',
732 'upload_date': '20221005',
733 'timestamp': 1664992565,
734 'like_count': int,
735 'tags': [],
736 'age_limit': 0,
737 },
738 'playlist_count': 4,
739 'params': {'skip_download': True},
740 }, {
741 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
742 'info_dict': {
743 'id': '1575559336759263233',
744 'display_id': '1575560063510810624',
745 'ext': 'mp4',
746 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
747 'thumbnail': r're:^https?://.*\.jpg',
748 'description': 'md5:95aea692fda36a12081b9629b02daa92',
749 'uploader': 'Max Olson',
750 'uploader_id': 'MesoMax919',
751 'uploader_url': 'https://twitter.com/MesoMax919',
752 'duration': 21.321,
753 'timestamp': 1664477766,
754 'upload_date': '20220929',
755 'like_count': int,
756 'tags': ['HurricaneIan'],
757 'age_limit': 0,
758 },
759 }, {
760 # Adult content, fails if not logged in (GraphQL)
761 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
762 'info_dict': {
763 'id': '1575199163847000068',
764 'display_id': '1575199173472927762',
765 'ext': 'mp4',
766 'title': str,
767 'description': str,
768 'uploader': str,
769 'uploader_id': 'Rizdraws',
770 'uploader_url': 'https://twitter.com/Rizdraws',
771 'upload_date': '20220928',
772 'timestamp': 1664391723,
773 'thumbnail': r're:^https?://.+\.jpg',
774 'like_count': int,
775 'repost_count': int,
776 'comment_count': int,
777 'age_limit': 18,
778 'tags': []
779 },
780 'skip': 'Requires authentication',
781 }, {
782 # Single Vimeo video result without auth
783 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
784 'info_dict': {
785 'id': '551578322',
786 'ext': 'mp4',
787 'title': 'Dusty & The Mayor',
788 'uploader': 'Michael Chau',
789 'uploader_id': 'user29061007',
790 'uploader_url': 'https://vimeo.com/user29061007',
791 'duration': 478,
792 'thumbnail': 'https://i.vimeocdn.com/video/1139658575-0dfdce6e9a2401fe09feb24bf0d14e6f24a53c12f447ff688ace61009ad4c1ba-d_1280',
793 },
794 }, {
795 # Playlist result only with auth
796 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
797 'playlist_mincount': 2,
798 'info_dict': {
799 'id': '1395079556562706435',
800 'title': str,
801 'tags': [],
802 'uploader': str,
803 'like_count': int,
804 'upload_date': '20210519',
805 'age_limit': 0,
806 'repost_count': int,
807 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
808 'uploader_id': 'Srirachachau',
809 'comment_count': int,
810 'uploader_url': 'https://twitter.com/Srirachachau',
811 'timestamp': 1621447860,
812 },
813 'skip': 'Requires authentication',
814 }, {
815 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
816 'playlist_mincount': 2,
817 'info_dict': {
818 'id': '1578353380363501568',
819 'title': str,
820 'uploader_id': 'DavidToons_',
821 'repost_count': int,
822 'like_count': int,
823 'uploader': str,
824 'timestamp': 1665143744,
825 'uploader_url': 'https://twitter.com/DavidToons_',
826 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
827 'tags': [],
828 'comment_count': int,
829 'upload_date': '20221007',
830 'age_limit': 0,
831 },
832 'skip': 'Requires authentication',
833 }, {
834 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
835 'playlist_count': 2,
836 'info_dict': {
837 'id': '1578401165338976258',
838 'title': str,
839 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
840 'uploader': str,
841 'uploader_id': 'primevideouk',
842 'timestamp': 1665155137,
843 'upload_date': '20221007',
844 'age_limit': 0,
845 'uploader_url': 'https://twitter.com/primevideouk',
846 'like_count': int,
847 'tags': ['TheRingsOfPower'],
848 },
849 }, {
850 # Twitter Spaces
851 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
852 'info_dict': {
853 'id': '1lPJqmBeeNAJb',
854 'ext': 'm4a',
855 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
856 'uploader': r're:Monique Camarra.+?',
857 'uploader_id': 'MoniqueCamarra',
858 'live_status': 'was_live',
859 'release_timestamp': 1658417414,
860 'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
861 'timestamp': 1658407771,
862 'release_date': '20220721',
863 'upload_date': '20220721',
864 },
865 'add_ie': ['TwitterSpaces'],
866 'params': {'skip_download': 'm3u8'},
867 'skip': 'Requires authentication',
868 }, {
869 # URL specifies video number but --yes-playlist
870 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
871 'playlist_mincount': 2,
872 'info_dict': {
873 'id': '1600649710662213632',
874 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
875 'timestamp': 1670459604.0,
876 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
877 'uploader_id': 'CTVJLaidlaw',
878 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
879 'upload_date': '20221208',
880 'age_limit': 0,
881 'uploader': 'Jocelyn Laidlaw',
882 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
883 'like_count': int,
884 },
885 }, {
886 # URL specifies video number and --no-playlist
887 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
888 'info_dict': {
889 'id': '1600649511827013632',
890 'ext': 'mp4',
891 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
892 'thumbnail': r're:^https?://.+\.jpg',
893 'timestamp': 1670459604.0,
894 'uploader_id': 'CTVJLaidlaw',
895 'uploader': 'Jocelyn Laidlaw',
896 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
897 'duration': 102.226,
898 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
899 'display_id': '1600649710662213632',
900 'like_count': int,
901 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
902 'upload_date': '20221208',
903 'age_limit': 0,
904 },
905 'params': {'noplaylist': True},
906 }, {
907 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
908 # note the id different between extraction and url
909 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
910 'info_dict': {
911 'id': '1621117577354424321',
912 'display_id': '1621117700482416640',
913 'ext': 'mp4',
914 'title': '뽀 - 아 최우제 이동속도 봐',
915 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
916 'duration': 24.598,
917 'uploader': '뽀',
918 'uploader_id': 's2FAKER',
919 'uploader_url': 'https://twitter.com/s2FAKER',
920 'upload_date': '20230202',
921 'timestamp': 1675339553.0,
922 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
923 'age_limit': 18,
924 'tags': [],
925 'like_count': int,
926 },
927 }, {
928 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
929 'info_dict': {
930 'id': '1599108643743473680',
931 'display_id': '1599108751385972737',
932 'ext': 'mp4',
933 'title': '\u06ea - \U0001F48B',
934 'uploader_url': 'https://twitter.com/hlo_again',
935 'like_count': int,
936 'uploader_id': 'hlo_again',
937 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
938 'duration': 9.531,
939 'upload_date': '20221203',
940 'age_limit': 0,
941 'timestamp': 1670092210.0,
942 'tags': [],
943 'uploader': '\u06ea',
944 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
945 },
946 'params': {'noplaylist': True},
947 }, {
948 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
949 'info_dict': {
950 'id': '1600009362759733248',
951 'display_id': '1600009574919962625',
952 'ext': 'mp4',
953 'uploader_url': 'https://twitter.com/MunTheShinobi',
954 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
955 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
956 'age_limit': 0,
957 'uploader': 'Mün The Shinobi',
958 'upload_date': '20221206',
959 'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
960 'like_count': int,
961 'tags': [],
962 'uploader_id': 'MunTheShinobi',
963 'duration': 139.987,
964 'timestamp': 1670306984.0,
965 },
966 }, {
967 # url to retweet id
968 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
969 'info_dict': {
970 'id': '1623274794488659969',
971 'display_id': '1623739803874349067',
972 'ext': 'mp4',
973 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
974 'description': 'md5:224d62f54b0cdef8e33d4c56c41ac503',
975 'uploader': 'Johnny Bullets',
976 'uploader_id': 'Johnnybull3ts',
977 'uploader_url': 'https://twitter.com/Johnnybull3ts',
978 'age_limit': 0,
979 'tags': [],
980 'duration': 8.033,
981 'timestamp': 1675853859.0,
982 'upload_date': '20230208',
983 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
984 'like_count': int,
985 },
986 }, {
987 # onion route
988 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
989 'only_matching': True,
990 }, {
991 # Twitch Clip Embed
992 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
993 'only_matching': True,
994 }, {
995 # promo_video_website card
996 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
997 'only_matching': True,
998 }, {
999 # promo_video_convo card
1000 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1001 'only_matching': True,
1002 }, {
1003 # appplayer card
1004 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1005 'only_matching': True,
1006 }, {
1007 # video_direct_message card
1008 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1009 'only_matching': True,
1010 }, {
1011 # poll2choice_video card
1012 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1013 'only_matching': True,
1014 }, {
1015 # poll3choice_video card
1016 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1017 'only_matching': True,
1018 }, {
1019 # poll4choice_video card
1020 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1021 'only_matching': True,
1022 }]
1023
1024 def _graphql_to_legacy(self, data, twid):
1025 result = traverse_obj(data, (
1026 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1027 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1028 'tweet_results', 'result', ('tweet', None),
1029 ), expected_type=dict, default={}, get_all=False)
1030
1031 if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
1032 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
1033
1034 if 'tombstone' in result:
1035 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1036 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1037
1038 status = result.get('legacy', {})
1039 status.update(traverse_obj(result, {
1040 'user': ('core', 'user_results', 'result', 'legacy'),
1041 'card': ('card', 'legacy'),
1042 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1043 }, expected_type=dict, default={}))
1044
1045 # extra transformation is needed since result does not match legacy format
1046 binding_values = {
1047 binding_value.get('key'): binding_value.get('value')
1048 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1049 }
1050 if binding_values:
1051 status['card']['binding_values'] = binding_values
1052
1053 return status
1054
1055 def _build_graphql_query(self, media_id):
1056 return {
1057 'variables': {
1058 'focalTweetId': media_id,
1059 'includePromotedContent': True,
1060 'with_rux_injections': False,
1061 'withBirdwatchNotes': True,
1062 'withCommunity': True,
1063 'withDownvotePerspective': False,
1064 'withQuickPromoteEligibilityTweetFields': True,
1065 'withReactionsMetadata': False,
1066 'withReactionsPerspective': False,
1067 'withSuperFollowsTweetFields': True,
1068 'withSuperFollowsUserFields': True,
1069 'withV2Timeline': True,
1070 'withVoice': True,
1071 },
1072 'features': {
1073 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1074 'interactive_text_enabled': True,
1075 'responsive_web_edit_tweet_api_enabled': True,
1076 'responsive_web_enhance_cards_enabled': True,
1077 'responsive_web_graphql_timeline_navigation_enabled': False,
1078 'responsive_web_text_conversations_enabled': False,
1079 'responsive_web_uc_gql_enabled': True,
1080 'standardized_nudges_misinfo': True,
1081 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1082 'tweetypie_unmention_optimization_enabled': True,
1083 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1084 'verified_phone_label_enabled': False,
1085 'vibe_api_enabled': True,
1086 },
1087 }
1088
1089 def _real_extract(self, url):
1090 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1091 if not self.is_logged_in:
1092 try:
1093 status = self._download_json(
1094 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1095 headers={'User-Agent': 'Googlebot'}, query={'id': twid})
1096 self.to_screen(f'Some metadata is missing without authentication. {self._login_hint()}')
1097 except ExtractorError as e:
1098 if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
1099 self.raise_login_required('Requested tweet may only be available when logged in')
1100 raise
1101 else:
1102 status = self._graphql_to_legacy(
1103 self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
1104
1105 title = description = traverse_obj(
1106 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1107 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1108 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1109 user = status.get('user') or {}
1110 uploader = user.get('name')
1111 if uploader:
1112 title = f'{uploader} - {title}'
1113 uploader_id = user.get('screen_name')
1114
1115 info = {
1116 'id': twid,
1117 'title': title,
1118 'description': description,
1119 'uploader': uploader,
1120 'timestamp': unified_timestamp(status.get('created_at')),
1121 'uploader_id': uploader_id,
1122 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1123 'like_count': int_or_none(status.get('favorite_count')),
1124 'repost_count': int_or_none(status.get('retweet_count')),
1125 'comment_count': int_or_none(status.get('reply_count')),
1126 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1127 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1128 }
1129
1130 def extract_from_video_info(media):
1131 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1132 if not media_id:
1133 # workaround for non-authenticated responses
1134 media_id = traverse_obj(media, (
1135 'video_info', 'variants', ..., 'url',
1136 {lambda x: re.search(r'_video/(\d+)/', x)[1]}), get_all=False)
1137 self.write_debug(f'Extracting from video info: {media_id}')
1138
1139 formats = []
1140 subtitles = {}
1141 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1142 fmts, subs = self._extract_variant_formats(variant, twid)
1143 subtitles = self._merge_subtitles(subtitles, subs)
1144 formats.extend(fmts)
1145
1146 thumbnails = []
1147 media_url = media.get('media_url_https') or media.get('media_url')
1148 if media_url:
1149 def add_thumbnail(name, size):
1150 thumbnails.append({
1151 'id': name,
1152 'url': update_url_query(media_url, {'name': name}),
1153 'width': int_or_none(size.get('w') or size.get('width')),
1154 'height': int_or_none(size.get('h') or size.get('height')),
1155 })
1156 for name, size in media.get('sizes', {}).items():
1157 add_thumbnail(name, size)
1158 add_thumbnail('orig', media.get('original_info') or {})
1159
1160 return {
1161 'id': media_id or twid,
1162 'formats': formats,
1163 'subtitles': subtitles,
1164 'thumbnails': thumbnails,
1165 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1166 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1167 # The codec of http formats are unknown
1168 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1169 }
1170
1171 def extract_from_card_info(card):
1172 if not card:
1173 return
1174
1175 self.write_debug(f'Extracting from card info: {card.get("url")}')
1176 binding_values = card['binding_values']
1177
1178 def get_binding_value(k):
1179 o = binding_values.get(k) or {}
1180 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1181
1182 card_name = card['name'].split(':')[-1]
1183 if card_name == 'player':
1184 yield {
1185 '_type': 'url',
1186 'url': get_binding_value('player_url'),
1187 }
1188 elif card_name == 'periscope_broadcast':
1189 yield {
1190 '_type': 'url',
1191 'url': get_binding_value('url') or get_binding_value('player_url'),
1192 'ie_key': PeriscopeIE.ie_key(),
1193 }
1194 elif card_name == 'broadcast':
1195 yield {
1196 '_type': 'url',
1197 'url': get_binding_value('broadcast_url'),
1198 'ie_key': TwitterBroadcastIE.ie_key(),
1199 }
1200 elif card_name == 'audiospace':
1201 yield {
1202 '_type': 'url',
1203 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1204 'ie_key': TwitterSpacesIE.ie_key(),
1205 }
1206 elif card_name == 'summary':
1207 yield {
1208 '_type': 'url',
1209 'url': get_binding_value('card_url'),
1210 }
1211 elif card_name == 'unified_card':
1212 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1213 yield from map(extract_from_video_info, traverse_obj(
1214 unified_card, ('media_entities', ...), expected_type=dict))
1215 # amplify, promo_video_website, promo_video_convo, appplayer,
1216 # video_direct_message, poll2choice_video, poll3choice_video,
1217 # poll4choice_video, ...
1218 else:
1219 is_amplify = card_name == 'amplify'
1220 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1221 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1222 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1223
1224 thumbnails = []
1225 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1226 image = get_binding_value('player_image' + suffix) or {}
1227 image_url = image.get('url')
1228 if not image_url or '/player-placeholder' in image_url:
1229 continue
1230 thumbnails.append({
1231 'id': suffix[1:] if suffix else 'medium',
1232 'url': image_url,
1233 'width': int_or_none(image.get('width')),
1234 'height': int_or_none(image.get('height')),
1235 })
1236
1237 yield {
1238 'formats': formats,
1239 'subtitles': subtitles,
1240 'thumbnails': thumbnails,
1241 'duration': int_or_none(get_binding_value(
1242 'content_duration_seconds')),
1243 }
1244
1245 videos = traverse_obj(status, (
1246 ('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
1247 lambda _, m: m['type'] != 'photo', {dict}))
1248
1249 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1250 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1251 else:
1252 desired_obj = traverse_obj(status, (
1253 ('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
1254 int(selected_index) - 1, {dict}), get_all=False)
1255 if not desired_obj:
1256 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1257 elif desired_obj.get('type') != 'video':
1258 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1259
1260 # Restore original archive id and video index in title
1261 for index, entry in enumerate(videos, 1):
1262 if entry.get('id') != desired_obj.get('id'):
1263 continue
1264 if index == 1:
1265 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1266 if len(videos) != 1:
1267 info['title'] += f' #{index}'
1268 break
1269
1270 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1271
1272 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1273 if not entries:
1274 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1275 if not expanded_url or expanded_url == url:
1276 self.raise_no_formats('No video could be found in this tweet', expected=True)
1277 return info
1278
1279 return self.url_result(expanded_url, display_id=twid, **info)
1280
1281 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1282
1283 if len(entries) == 1:
1284 return entries[0]
1285
1286 for index, entry in enumerate(entries, 1):
1287 entry['title'] += f' #{index}'
1288
1289 return self.playlist_result(entries, **info)
1290
1291
1292 class TwitterAmplifyIE(TwitterBaseIE):
1293 IE_NAME = 'twitter:amplify'
1294 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1295
1296 _TEST = {
1297 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1298 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1299 'info_dict': {
1300 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1301 'ext': 'mp4',
1302 'title': 'Twitter Video',
1303 'thumbnail': 're:^https?://.*',
1304 },
1305 'params': {'format': '[protocol=https]'},
1306 }
1307
1308 def _real_extract(self, url):
1309 video_id = self._match_id(url)
1310 webpage = self._download_webpage(url, video_id)
1311
1312 vmap_url = self._html_search_meta(
1313 'twitter:amplify:vmap', webpage, 'vmap url')
1314 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1315
1316 thumbnails = []
1317 thumbnail = self._html_search_meta(
1318 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1319
1320 def _find_dimension(target):
1321 w = int_or_none(self._html_search_meta(
1322 'twitter:%s:width' % target, webpage, fatal=False))
1323 h = int_or_none(self._html_search_meta(
1324 'twitter:%s:height' % target, webpage, fatal=False))
1325 return w, h
1326
1327 if thumbnail:
1328 thumbnail_w, thumbnail_h = _find_dimension('image')
1329 thumbnails.append({
1330 'url': thumbnail,
1331 'width': thumbnail_w,
1332 'height': thumbnail_h,
1333 })
1334
1335 video_w, video_h = _find_dimension('player')
1336 formats[0].update({
1337 'width': video_w,
1338 'height': video_h,
1339 })
1340
1341 return {
1342 'id': video_id,
1343 'title': 'Twitter Video',
1344 'formats': formats,
1345 'thumbnails': thumbnails,
1346 }
1347
1348
1349 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1350 IE_NAME = 'twitter:broadcast'
1351 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1352
1353 _TEST = {
1354 # untitled Periscope video
1355 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1356 'info_dict': {
1357 'id': '1yNGaQLWpejGj',
1358 'ext': 'mp4',
1359 'title': 'Andrea May Sahouri - Periscope Broadcast',
1360 'uploader': 'Andrea May Sahouri',
1361 'uploader_id': '1PXEdBZWpGwKe',
1362 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1363 'view_count': int,
1364 },
1365 }
1366
1367 def _real_extract(self, url):
1368 broadcast_id = self._match_id(url)
1369 broadcast = self._call_api(
1370 'broadcasts/show.json', broadcast_id,
1371 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1372 info = self._parse_broadcast_data(broadcast, broadcast_id)
1373 media_key = broadcast['media_key']
1374 source = self._call_api(
1375 f'live_video_stream/status/{media_key}', media_key)['source']
1376 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1377 if '/live_video_stream/geoblocked/' in m3u8_url:
1378 self.raise_geo_restricted()
1379 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1380 m3u8_url).query).get('type', [None])[0]
1381 state, width, height = self._extract_common_format_info(broadcast)
1382 info['formats'] = self._extract_pscp_m3u8_formats(
1383 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1384 return info
1385
1386
1387 class TwitterSpacesIE(TwitterBaseIE):
1388 IE_NAME = 'twitter:spaces'
1389 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1390
1391 _TESTS = [{
1392 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1393 'info_dict': {
1394 'id': '1RDxlgyvNXzJL',
1395 'ext': 'm4a',
1396 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1397 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1398 'uploader': r're:Lucio Di Gaetano.*?',
1399 'uploader_id': 'luciodigaetano',
1400 'live_status': 'was_live',
1401 'timestamp': 1659877956,
1402 'upload_date': '20220807',
1403 'release_timestamp': 1659904215,
1404 'release_date': '20220807',
1405 },
1406 'params': {'skip_download': 'm3u8'},
1407 }]
1408
1409 SPACE_STATUS = {
1410 'notstarted': 'is_upcoming',
1411 'ended': 'was_live',
1412 'running': 'is_live',
1413 'timedout': 'post_live',
1414 }
1415
1416 def _build_graphql_query(self, space_id):
1417 return {
1418 'variables': {
1419 'id': space_id,
1420 'isMetatagsQuery': True,
1421 'withDownvotePerspective': False,
1422 'withReactionsMetadata': False,
1423 'withReactionsPerspective': False,
1424 'withReplays': True,
1425 'withSuperFollowsUserFields': True,
1426 'withSuperFollowsTweetFields': True,
1427 },
1428 'features': {
1429 'dont_mention_me_view_api_enabled': True,
1430 'interactive_text_enabled': True,
1431 'responsive_web_edit_tweet_api_enabled': True,
1432 'responsive_web_enhance_cards_enabled': True,
1433 'responsive_web_uc_gql_enabled': True,
1434 'spaces_2022_h2_clipping': True,
1435 'spaces_2022_h2_spaces_communities': False,
1436 'standardized_nudges_misinfo': True,
1437 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1438 'vibe_api_enabled': True,
1439 },
1440 }
1441
1442 def _real_extract(self, url):
1443 space_id = self._match_id(url)
1444 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1445 if not space_data:
1446 raise ExtractorError('Twitter Space not found', expected=True)
1447
1448 metadata = space_data['metadata']
1449 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1450 is_live = live_status == 'is_live'
1451
1452 formats = []
1453 if live_status == 'is_upcoming':
1454 self.raise_no_formats('Twitter Space not started yet', expected=True)
1455 elif not is_live and not metadata.get('is_space_available_for_replay'):
1456 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1457 elif metadata.get('media_key'):
1458 source = traverse_obj(
1459 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1460 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1461 formats = self._extract_m3u8_formats(
1462 source, metadata['media_key'], 'm4a', live=is_live, fatal=False,
1463 headers={'Referer': 'https://twitter.com/'}) if source else []
1464 for fmt in formats:
1465 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1466 if not is_live:
1467 fmt['container'] = 'm4a_dash'
1468
1469 participants = ', '.join(traverse_obj(
1470 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1471
1472 if not formats and live_status == 'post_live':
1473 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1474
1475 return {
1476 'id': space_id,
1477 'title': metadata.get('title'),
1478 'description': f'Twitter Space participated by {participants}',
1479 'uploader': traverse_obj(
1480 metadata, ('creator_results', 'result', 'legacy', 'name')),
1481 'uploader_id': traverse_obj(
1482 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1483 'live_status': live_status,
1484 'release_timestamp': try_call(
1485 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1486 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1487 'formats': formats,
1488 }
1489
1490
1491 class TwitterShortenerIE(TwitterBaseIE):
1492 IE_NAME = 'twitter:shortener'
1493 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1494 _BASE_URL = 'https://t.co/'
1495
1496 def _real_extract(self, url):
1497 mobj = self._match_valid_url(url)
1498 eid, id = mobj.group('eid', 'id')
1499 if eid:
1500 id = eid
1501 url = self._BASE_URL + id
1502 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1503 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1504 if new_url.startswith(__UNSAFE_LINK):
1505 new_url = new_url.replace(__UNSAFE_LINK, "")
1506 return self.url_result(new_url)