]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[extractor/youtube] Ignore incomplete data for comment threads by default (#7475)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import json
2 import re
3
4 from .common import InfoExtractor
5 from .periscope import PeriscopeBaseIE, PeriscopeIE
6 from ..compat import (
7 compat_parse_qs,
8 compat_urllib_parse_unquote,
9 compat_urllib_parse_urlparse,
10 )
11 from ..utils import (
12 ExtractorError,
13 dict_get,
14 float_or_none,
15 format_field,
16 int_or_none,
17 make_archive_id,
18 remove_end,
19 str_or_none,
20 strip_or_none,
21 traverse_obj,
22 try_call,
23 try_get,
24 unified_timestamp,
25 update_url_query,
26 url_or_none,
27 xpath_text,
28 )
29
30
31 class TwitterBaseIE(InfoExtractor):
32 _NETRC_MACHINE = 'twitter'
33 _API_BASE = 'https://api.twitter.com/1.1/'
34 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
35 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
36 _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
37 _guest_token = None
38 _flow_token = None
39
40 _LOGIN_INIT_DATA = json.dumps({
41 'input_flow_data': {
42 'flow_context': {
43 'debug_overrides': {},
44 'start_location': {
45 'location': 'unknown'
46 }
47 }
48 },
49 'subtask_versions': {
50 'action_list': 2,
51 'alert_dialog': 1,
52 'app_download_cta': 1,
53 'check_logged_in_account': 1,
54 'choice_selection': 3,
55 'contacts_live_sync_permission_prompt': 0,
56 'cta': 7,
57 'email_verification': 2,
58 'end_flow': 1,
59 'enter_date': 1,
60 'enter_email': 2,
61 'enter_password': 5,
62 'enter_phone': 2,
63 'enter_recaptcha': 1,
64 'enter_text': 5,
65 'enter_username': 2,
66 'generic_urt': 3,
67 'in_app_notification': 1,
68 'interest_picker': 3,
69 'js_instrumentation': 1,
70 'menu_dialog': 1,
71 'notifications_permission_prompt': 2,
72 'open_account': 2,
73 'open_home_timeline': 1,
74 'open_link': 1,
75 'phone_verification': 4,
76 'privacy_options': 1,
77 'security_key': 3,
78 'select_avatar': 4,
79 'select_banner': 2,
80 'settings_list': 7,
81 'show_code': 1,
82 'sign_up': 2,
83 'sign_up_review': 4,
84 'tweet_selection_urt': 1,
85 'update_users': 1,
86 'upload_media': 1,
87 'user_recommendations_list': 4,
88 'user_recommendations_urt': 1,
89 'wait_spinner': 3,
90 'web_modal': 1
91 }
92 }, separators=(',', ':')).encode()
93
94 def _extract_variant_formats(self, variant, video_id):
95 variant_url = variant.get('url')
96 if not variant_url:
97 return [], {}
98 elif '.m3u8' in variant_url:
99 return self._extract_m3u8_formats_and_subtitles(
100 variant_url, video_id, 'mp4', 'm3u8_native',
101 m3u8_id='hls', fatal=False)
102 else:
103 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
104 f = {
105 'url': variant_url,
106 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
107 'tbr': tbr,
108 }
109 self._search_dimensions_in_video_url(f, variant_url)
110 return [f], {}
111
112 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
113 vmap_url = url_or_none(vmap_url)
114 if not vmap_url:
115 return [], {}
116 vmap_data = self._download_xml(vmap_url, video_id)
117 formats = []
118 subtitles = {}
119 urls = []
120 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
121 video_variant.attrib['url'] = compat_urllib_parse_unquote(
122 video_variant.attrib['url'])
123 urls.append(video_variant.attrib['url'])
124 fmts, subs = self._extract_variant_formats(
125 video_variant.attrib, video_id)
126 formats.extend(fmts)
127 subtitles = self._merge_subtitles(subtitles, subs)
128 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
129 if video_url not in urls:
130 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
131 formats.extend(fmts)
132 subtitles = self._merge_subtitles(subtitles, subs)
133 return formats, subtitles
134
135 @staticmethod
136 def _search_dimensions_in_video_url(a_format, video_url):
137 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
138 if m:
139 a_format.update({
140 'width': int(m.group('width')),
141 'height': int(m.group('height')),
142 })
143
144 @property
145 def is_logged_in(self):
146 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
147
148 def _fetch_guest_token(self, headers, display_id):
149 headers.pop('x-guest-token', None)
150 self._guest_token = traverse_obj(self._download_json(
151 f'{self._API_BASE}guest/activate.json', display_id,
152 'Downloading guest token', data=b'', headers=headers), 'guest_token')
153 if not self._guest_token:
154 raise ExtractorError('Could not retrieve guest token')
155
156 def _set_base_headers(self):
157 headers = self._AUTH.copy()
158 csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
159 if csrf_token:
160 headers['x-csrf-token'] = csrf_token
161 return headers
162
163 def _call_login_api(self, note, headers, query={}, data=None):
164 response = self._download_json(
165 f'{self._API_BASE}onboarding/task.json', None, note,
166 headers=headers, query=query, data=data, expected_status=400)
167 error = traverse_obj(response, ('errors', 0, 'message', {str}))
168 if error:
169 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
170 elif traverse_obj(response, 'status') != 'success':
171 raise ExtractorError('Login was unsuccessful')
172
173 subtask = traverse_obj(
174 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
175 if not subtask:
176 raise ExtractorError('Twitter API did not return next login subtask')
177
178 self._flow_token = response['flow_token']
179
180 return subtask
181
182 def _perform_login(self, username, password):
183 if self.is_logged_in:
184 return
185
186 self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
187 headers = self._set_base_headers()
188 self._fetch_guest_token(headers, None)
189 headers.update({
190 'content-type': 'application/json',
191 'x-guest-token': self._guest_token,
192 'x-twitter-client-language': 'en',
193 'x-twitter-active-user': 'yes',
194 'Referer': 'https://twitter.com/',
195 'Origin': 'https://twitter.com',
196 })
197
198 def build_login_json(*subtask_inputs):
199 return json.dumps({
200 'flow_token': self._flow_token,
201 'subtask_inputs': subtask_inputs
202 }, separators=(',', ':')).encode()
203
204 def input_dict(subtask_id, text):
205 return {
206 'subtask_id': subtask_id,
207 'enter_text': {
208 'text': text,
209 'link': 'next_link'
210 }
211 }
212
213 next_subtask = self._call_login_api(
214 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
215
216 while not self.is_logged_in:
217 if next_subtask == 'LoginJsInstrumentationSubtask':
218 next_subtask = self._call_login_api(
219 'Submitting JS instrumentation response', headers, data=build_login_json({
220 'subtask_id': next_subtask,
221 'js_instrumentation': {
222 'response': '{}',
223 'link': 'next_link'
224 }
225 }))
226
227 elif next_subtask == 'LoginEnterUserIdentifierSSO':
228 next_subtask = self._call_login_api(
229 'Submitting username', headers, data=build_login_json({
230 'subtask_id': next_subtask,
231 'settings_list': {
232 'setting_responses': [{
233 'key': 'user_identifier',
234 'response_data': {
235 'text_data': {
236 'result': username
237 }
238 }
239 }],
240 'link': 'next_link'
241 }
242 }))
243
244 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
245 next_subtask = self._call_login_api(
246 'Submitting alternate identifier', headers,
247 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
248 'one of username, phone number or email that was not used as --username'))))
249
250 elif next_subtask == 'LoginEnterPassword':
251 next_subtask = self._call_login_api(
252 'Submitting password', headers, data=build_login_json({
253 'subtask_id': next_subtask,
254 'enter_password': {
255 'password': password,
256 'link': 'next_link'
257 }
258 }))
259
260 elif next_subtask == 'AccountDuplicationCheck':
261 next_subtask = self._call_login_api(
262 'Submitting account duplication check', headers, data=build_login_json({
263 'subtask_id': next_subtask,
264 'check_logged_in_account': {
265 'link': 'AccountDuplicationCheck_false'
266 }
267 }))
268
269 elif next_subtask == 'LoginTwoFactorAuthChallenge':
270 next_subtask = self._call_login_api(
271 'Submitting 2FA token', headers, data=build_login_json(input_dict(
272 next_subtask, self._get_tfa_info('two-factor authentication token'))))
273
274 elif next_subtask == 'LoginAcid':
275 next_subtask = self._call_login_api(
276 'Submitting confirmation code', headers, data=build_login_json(input_dict(
277 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
278
279 elif next_subtask == 'LoginSuccessSubtask':
280 raise ExtractorError('Twitter API did not grant auth token cookie')
281
282 else:
283 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
284
285 self.report_login()
286
287 def _call_api(self, path, video_id, query={}, graphql=False):
288 headers = self._set_base_headers()
289 if self.is_logged_in:
290 headers.update({
291 'x-twitter-auth-type': 'OAuth2Session',
292 'x-twitter-client-language': 'en',
293 'x-twitter-active-user': 'yes',
294 })
295
296 for first_attempt in (True, False):
297 if not self.is_logged_in:
298 if not self._guest_token:
299 self._fetch_guest_token(headers, video_id)
300 headers['x-guest-token'] = self._guest_token
301
302 allowed_status = {400, 401, 403, 404} if graphql else {403}
303 result = self._download_json(
304 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
305 video_id, headers=headers, query=query, expected_status=allowed_status,
306 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
307
308 if result.get('errors'):
309 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
310 if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
311 self.to_screen('Guest token has expired. Refreshing guest token')
312 self._guest_token = None
313 continue
314
315 raise ExtractorError(
316 f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
317
318 return result
319
320 def _build_graphql_query(self, media_id):
321 raise NotImplementedError('Method must be implemented to support GraphQL')
322
323 def _call_graphql_api(self, endpoint, media_id):
324 data = self._build_graphql_query(media_id)
325 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
326 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
327
328
329 class TwitterCardIE(InfoExtractor):
330 IE_NAME = 'twitter:card'
331 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
332 _TESTS = [
333 {
334 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
335 # MD5 checksums are different in different places
336 'info_dict': {
337 'id': '560070131976392705',
338 'ext': 'mp4',
339 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
340 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
341 'uploader': 'Twitter',
342 'uploader_id': 'Twitter',
343 'thumbnail': r're:^https?://.*\.jpg',
344 'duration': 30.033,
345 'timestamp': 1422366112,
346 'upload_date': '20150127',
347 'age_limit': 0,
348 'comment_count': int,
349 'tags': [],
350 'repost_count': int,
351 'like_count': int,
352 'display_id': '560070183650213889',
353 'uploader_url': 'https://twitter.com/Twitter',
354 },
355 },
356 {
357 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
358 'md5': '7137eca597f72b9abbe61e5ae0161399',
359 'info_dict': {
360 'id': '623160978427936768',
361 'ext': 'mp4',
362 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
363 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
364 'uploader': 'NASA',
365 'uploader_id': 'NASA',
366 'timestamp': 1437408129,
367 'upload_date': '20150720',
368 'uploader_url': 'https://twitter.com/NASA',
369 'age_limit': 0,
370 'comment_count': int,
371 'like_count': int,
372 'repost_count': int,
373 'tags': ['PlutoFlyby'],
374 },
375 'params': {'format': '[protocol=https]'}
376 },
377 {
378 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
379 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
380 'info_dict': {
381 'id': 'dq4Oj5quskI',
382 'ext': 'mp4',
383 'title': 'Ubuntu 11.10 Overview',
384 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
385 'upload_date': '20111013',
386 'uploader': 'OMG! UBUNTU!',
387 'uploader_id': 'omgubuntu',
388 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
389 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
390 'channel_follower_count': int,
391 'chapters': 'count:8',
392 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
393 'duration': 138,
394 'categories': ['Film & Animation'],
395 'age_limit': 0,
396 'comment_count': int,
397 'availability': 'public',
398 'like_count': int,
399 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
400 'view_count': int,
401 'tags': 'count:12',
402 'channel': 'OMG! UBUNTU!',
403 'playable_in_embed': True,
404 },
405 'add_ie': ['Youtube'],
406 },
407 {
408 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
409 'info_dict': {
410 'id': 'iBb2x00UVlv',
411 'ext': 'mp4',
412 'upload_date': '20151113',
413 'uploader_id': '1189339351084113920',
414 'uploader': 'ArsenalTerje',
415 'title': 'Vine by ArsenalTerje',
416 'timestamp': 1447451307,
417 'alt_title': 'Vine by ArsenalTerje',
418 'comment_count': int,
419 'like_count': int,
420 'thumbnail': r're:^https?://[^?#]+\.jpg',
421 'view_count': int,
422 'repost_count': int,
423 },
424 'add_ie': ['Vine'],
425 'params': {'skip_download': 'm3u8'},
426 },
427 {
428 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
429 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
430 'info_dict': {
431 'id': '705235433198714880',
432 'ext': 'mp4',
433 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
434 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
435 'uploader': 'Brent Yarina',
436 'uploader_id': 'BTNBrentYarina',
437 'timestamp': 1456976204,
438 'upload_date': '20160303',
439 },
440 'skip': 'This content is no longer available.',
441 },
442 {
443 'url': 'https://twitter.com/i/videos/752274308186120192',
444 'only_matching': True,
445 },
446 ]
447
448 def _real_extract(self, url):
449 status_id = self._match_id(url)
450 return self.url_result(
451 'https://twitter.com/statuses/' + status_id,
452 TwitterIE.ie_key(), status_id)
453
454
455 class TwitterIE(TwitterBaseIE):
456 IE_NAME = 'twitter'
457 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
458
459 _TESTS = [{
460 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
461 'info_dict': {
462 'id': '643211870443208704',
463 'display_id': '643211948184596480',
464 'ext': 'mp4',
465 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
466 'thumbnail': r're:^https?://.*\.jpg',
467 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
468 'uploader': 'FREE THE NIPPLE',
469 'uploader_id': 'freethenipple',
470 'duration': 12.922,
471 'timestamp': 1442188653,
472 'upload_date': '20150913',
473 'uploader_url': 'https://twitter.com/freethenipple',
474 'comment_count': int,
475 'repost_count': int,
476 'like_count': int,
477 'view_count': int,
478 'tags': [],
479 'age_limit': 18,
480 },
481 }, {
482 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
483 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
484 'info_dict': {
485 'id': '657991469417025536',
486 'ext': 'mp4',
487 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
488 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
489 'thumbnail': r're:^https?://.*\.png',
490 'uploader': 'Gifs',
491 'uploader_id': 'giphz',
492 },
493 'expected_warnings': ['height', 'width'],
494 'skip': 'Account suspended',
495 }, {
496 'url': 'https://twitter.com/starwars/status/665052190608723968',
497 'info_dict': {
498 'id': '665052190608723968',
499 'display_id': '665052190608723968',
500 'ext': 'mp4',
501 'title': r're:Star Wars.*A new beginning is coming December 18.*',
502 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
503 'uploader_id': 'starwars',
504 'uploader': r're:Star Wars.*',
505 'timestamp': 1447395772,
506 'upload_date': '20151113',
507 'uploader_url': 'https://twitter.com/starwars',
508 'comment_count': int,
509 'repost_count': int,
510 'like_count': int,
511 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
512 'age_limit': 0,
513 },
514 }, {
515 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
516 'info_dict': {
517 'id': '705235433198714880',
518 'ext': 'mp4',
519 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
520 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
521 'uploader_id': 'BTNBrentYarina',
522 'uploader': 'Brent Yarina',
523 'timestamp': 1456976204,
524 'upload_date': '20160303',
525 'uploader_url': 'https://twitter.com/BTNBrentYarina',
526 'comment_count': int,
527 'repost_count': int,
528 'like_count': int,
529 'tags': [],
530 'age_limit': 0,
531 },
532 'params': {
533 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
534 # Test case of TwitterCardIE
535 'skip_download': True,
536 },
537 'skip': 'Dead external link',
538 }, {
539 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
540 'info_dict': {
541 'id': '700207414000242688',
542 'display_id': '700207533655363584',
543 'ext': 'mp4',
544 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
545 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
546 'thumbnail': r're:^https?://.*\.jpg',
547 'uploader': 'jaydin donte geer',
548 'uploader_id': 'jaydingeer',
549 'duration': 30.0,
550 'timestamp': 1455777459,
551 'upload_date': '20160218',
552 'uploader_url': 'https://twitter.com/jaydingeer',
553 'comment_count': int,
554 'repost_count': int,
555 'like_count': int,
556 'view_count': int,
557 'tags': ['Damndaniel'],
558 'age_limit': 0,
559 },
560 }, {
561 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
562 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
563 'info_dict': {
564 'id': 'MIOxnrUteUd',
565 'ext': 'mp4',
566 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
567 'uploader': 'TAKUMA',
568 'uploader_id': '1004126642786242560',
569 'timestamp': 1402826626,
570 'upload_date': '20140615',
571 'thumbnail': r're:^https?://.*\.jpg',
572 'alt_title': 'Vine by TAKUMA',
573 'comment_count': int,
574 'repost_count': int,
575 'like_count': int,
576 'view_count': int,
577 },
578 'add_ie': ['Vine'],
579 }, {
580 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
581 'info_dict': {
582 'id': '717462543795523584',
583 'display_id': '719944021058060289',
584 'ext': 'mp4',
585 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
586 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
587 'uploader_id': 'CaptainAmerica',
588 'uploader': 'Captain America',
589 'duration': 3.17,
590 'timestamp': 1460483005,
591 'upload_date': '20160412',
592 'uploader_url': 'https://twitter.com/CaptainAmerica',
593 'thumbnail': r're:^https?://.*\.jpg',
594 'comment_count': int,
595 'repost_count': int,
596 'like_count': int,
597 'view_count': int,
598 'tags': [],
599 'age_limit': 0,
600 },
601 }, {
602 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
603 'info_dict': {
604 'id': '1zqKVVlkqLaKB',
605 'ext': 'mp4',
606 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
607 'upload_date': '20160923',
608 'uploader_id': '1PmKqpJdOJQoY',
609 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
610 'timestamp': 1474613214,
611 'thumbnail': r're:^https?://.*\.jpg',
612 },
613 'add_ie': ['Periscope'],
614 }, {
615 # has mp4 formats via mobile API
616 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
617 'info_dict': {
618 'id': '852138619213144067',
619 'ext': 'mp4',
620 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
621 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
622 'uploader': 'عالم الأخبار',
623 'uploader_id': 'news_al3alm',
624 'duration': 277.4,
625 'timestamp': 1492000653,
626 'upload_date': '20170412',
627 },
628 'skip': 'Account suspended',
629 }, {
630 'url': 'https://twitter.com/i/web/status/910031516746514432',
631 'info_dict': {
632 'id': '910030238373089285',
633 'display_id': '910031516746514432',
634 'ext': 'mp4',
635 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
636 'thumbnail': r're:^https?://.*\.jpg',
637 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
638 'uploader': 'Préfet de Guadeloupe',
639 'uploader_id': 'Prefet971',
640 'duration': 47.48,
641 'timestamp': 1505803395,
642 'upload_date': '20170919',
643 'uploader_url': 'https://twitter.com/Prefet971',
644 'comment_count': int,
645 'repost_count': int,
646 'like_count': int,
647 'view_count': int,
648 'tags': ['Maria'],
649 'age_limit': 0,
650 },
651 'params': {
652 'skip_download': True, # requires ffmpeg
653 },
654 }, {
655 # card via api.twitter.com/1.1/videos/tweet/config
656 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
657 'info_dict': {
658 'id': '1001551417340022785',
659 'display_id': '1001551623938805763',
660 'ext': 'mp4',
661 'title': 're:.*?Shep is on a roll today.*?',
662 'thumbnail': r're:^https?://.*\.jpg',
663 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
664 'uploader': 'Lis Power',
665 'uploader_id': 'LisPower1',
666 'duration': 111.278,
667 'timestamp': 1527623489,
668 'upload_date': '20180529',
669 'uploader_url': 'https://twitter.com/LisPower1',
670 'comment_count': int,
671 'repost_count': int,
672 'like_count': int,
673 'view_count': int,
674 'tags': [],
675 'age_limit': 0,
676 },
677 'params': {
678 'skip_download': True, # requires ffmpeg
679 },
680 }, {
681 'url': 'https://twitter.com/foobar/status/1087791357756956680',
682 'info_dict': {
683 'id': '1087791272830607360',
684 'display_id': '1087791357756956680',
685 'ext': 'mp4',
686 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
687 'thumbnail': r're:^https?://.*\.jpg',
688 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
689 'uploader': 'Twitter',
690 'uploader_id': 'Twitter',
691 'duration': 61.567,
692 'timestamp': 1548184644,
693 'upload_date': '20190122',
694 'uploader_url': 'https://twitter.com/Twitter',
695 'comment_count': int,
696 'repost_count': int,
697 'like_count': int,
698 'view_count': int,
699 'tags': [],
700 'age_limit': 0,
701 },
702 }, {
703 # not available in Periscope
704 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
705 'info_dict': {
706 'id': '1vOGwqejwoWxB',
707 'ext': 'mp4',
708 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
709 'uploader': 'Vivi',
710 'uploader_id': '1eVjYOLGkGrQL',
711 'thumbnail': r're:^https?://.*\.jpg',
712 'tags': ['EduTECH2019'],
713 'view_count': int,
714 },
715 'add_ie': ['TwitterBroadcast'],
716 }, {
717 # unified card
718 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
719 'info_dict': {
720 'id': '1349774757969989634',
721 'display_id': '1349794411333394432',
722 'ext': 'mp4',
723 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
724 'thumbnail': r're:^https?://.*\.jpg',
725 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
726 'uploader': 'Brooklyn Nets',
727 'uploader_id': 'BrooklynNets',
728 'duration': 324.484,
729 'timestamp': 1610651040,
730 'upload_date': '20210114',
731 'uploader_url': 'https://twitter.com/BrooklynNets',
732 'comment_count': int,
733 'repost_count': int,
734 'like_count': int,
735 'tags': [],
736 'age_limit': 0,
737 },
738 'params': {
739 'skip_download': True,
740 },
741 }, {
742 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
743 'info_dict': {
744 'id': '1577855447914409984',
745 'display_id': '1577855540407197696',
746 'ext': 'mp4',
747 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
748 'description': 'md5:b9c3699335447391d11753ab21c70a74',
749 'upload_date': '20221006',
750 'uploader': 'oshtru',
751 'uploader_id': 'oshtru',
752 'uploader_url': 'https://twitter.com/oshtru',
753 'thumbnail': r're:^https?://.*\.jpg',
754 'duration': 30.03,
755 'timestamp': 1665025050,
756 'comment_count': int,
757 'repost_count': int,
758 'like_count': int,
759 'view_count': int,
760 'tags': [],
761 'age_limit': 0,
762 },
763 'params': {'skip_download': True},
764 }, {
765 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
766 'info_dict': {
767 'id': '1577719286659006464',
768 'title': 'Ultima | #\u0432\u029f\u043c - Test',
769 'description': 'Test https://t.co/Y3KEZD7Dad',
770 'uploader': 'Ultima | #\u0432\u029f\u043c',
771 'uploader_id': 'UltimaShadowX',
772 'uploader_url': 'https://twitter.com/UltimaShadowX',
773 'upload_date': '20221005',
774 'timestamp': 1664992565,
775 'comment_count': int,
776 'repost_count': int,
777 'like_count': int,
778 'tags': [],
779 'age_limit': 0,
780 },
781 'playlist_count': 4,
782 'params': {'skip_download': True},
783 }, {
784 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
785 'info_dict': {
786 'id': '1575559336759263233',
787 'display_id': '1575560063510810624',
788 'ext': 'mp4',
789 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
790 'thumbnail': r're:^https?://.*\.jpg',
791 'description': 'md5:95aea692fda36a12081b9629b02daa92',
792 'uploader': 'Max Olson',
793 'uploader_id': 'MesoMax919',
794 'uploader_url': 'https://twitter.com/MesoMax919',
795 'duration': 21.321,
796 'timestamp': 1664477766,
797 'upload_date': '20220929',
798 'comment_count': int,
799 'repost_count': int,
800 'like_count': int,
801 'view_count': int,
802 'tags': ['HurricaneIan'],
803 'age_limit': 0,
804 },
805 }, {
806 # Adult content, fails if not logged in (GraphQL)
807 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
808 'info_dict': {
809 'id': '1575199163847000068',
810 'display_id': '1575199173472927762',
811 'ext': 'mp4',
812 'title': str,
813 'description': str,
814 'uploader': str,
815 'uploader_id': 'Rizdraws',
816 'uploader_url': 'https://twitter.com/Rizdraws',
817 'upload_date': '20220928',
818 'timestamp': 1664391723,
819 'thumbnail': r're:^https?://.+\.jpg',
820 'like_count': int,
821 'repost_count': int,
822 'comment_count': int,
823 'age_limit': 18,
824 'tags': []
825 },
826 'skip': 'Requires authentication',
827 }, {
828 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
829 'playlist_mincount': 2,
830 'info_dict': {
831 'id': '1395079556562706435',
832 'title': str,
833 'tags': [],
834 'uploader': str,
835 'like_count': int,
836 'upload_date': '20210519',
837 'age_limit': 0,
838 'repost_count': int,
839 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
840 'uploader_id': 'Srirachachau',
841 'comment_count': int,
842 'uploader_url': 'https://twitter.com/Srirachachau',
843 'timestamp': 1621447860,
844 },
845 }, {
846 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
847 'playlist_mincount': 2,
848 'info_dict': {
849 'id': '1578353380363501568',
850 'title': str,
851 'uploader_id': 'DavidToons_',
852 'repost_count': int,
853 'like_count': int,
854 'uploader': str,
855 'timestamp': 1665143744,
856 'uploader_url': 'https://twitter.com/DavidToons_',
857 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
858 'tags': [],
859 'comment_count': int,
860 'upload_date': '20221007',
861 'age_limit': 0,
862 },
863 }, {
864 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
865 'playlist_count': 2,
866 'info_dict': {
867 'id': '1578401165338976258',
868 'title': str,
869 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
870 'uploader': str,
871 'uploader_id': 'primevideouk',
872 'timestamp': 1665155137,
873 'upload_date': '20221007',
874 'age_limit': 0,
875 'uploader_url': 'https://twitter.com/primevideouk',
876 'comment_count': int,
877 'repost_count': int,
878 'like_count': int,
879 'tags': ['TheRingsOfPower'],
880 },
881 }, {
882 # Twitter Spaces
883 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
884 'info_dict': {
885 'id': '1lPJqmBeeNAJb',
886 'ext': 'm4a',
887 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
888 'uploader': r're:Monique Camarra.+?',
889 'uploader_id': 'MoniqueCamarra',
890 'live_status': 'was_live',
891 'release_timestamp': 1658417414,
892 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
893 'timestamp': 1658407771464,
894 },
895 'add_ie': ['TwitterSpaces'],
896 'params': {'skip_download': 'm3u8'},
897 }, {
898 # URL specifies video number but --yes-playlist
899 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
900 'playlist_mincount': 2,
901 'info_dict': {
902 'id': '1600649710662213632',
903 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
904 'timestamp': 1670459604.0,
905 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
906 'comment_count': int,
907 'uploader_id': 'CTVJLaidlaw',
908 'repost_count': int,
909 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
910 'upload_date': '20221208',
911 'age_limit': 0,
912 'uploader': 'Jocelyn Laidlaw',
913 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
914 'like_count': int,
915 },
916 }, {
917 # URL specifies video number and --no-playlist
918 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
919 'info_dict': {
920 'id': '1600649511827013632',
921 'ext': 'mp4',
922 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
923 'thumbnail': r're:^https?://.+\.jpg',
924 'timestamp': 1670459604.0,
925 'uploader_id': 'CTVJLaidlaw',
926 'uploader': 'Jocelyn Laidlaw',
927 'repost_count': int,
928 'comment_count': int,
929 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
930 'duration': 102.226,
931 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
932 'display_id': '1600649710662213632',
933 'like_count': int,
934 'view_count': int,
935 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
936 'upload_date': '20221208',
937 'age_limit': 0,
938 },
939 'params': {'noplaylist': True},
940 }, {
941 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
942 # note the id different between extraction and url
943 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
944 'info_dict': {
945 'id': '1621117577354424321',
946 'display_id': '1621117700482416640',
947 'ext': 'mp4',
948 'title': '뽀 - 아 최우제 이동속도 봐',
949 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
950 'duration': 24.598,
951 'uploader': '뽀',
952 'uploader_id': 's2FAKER',
953 'uploader_url': 'https://twitter.com/s2FAKER',
954 'upload_date': '20230202',
955 'timestamp': 1675339553.0,
956 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
957 'age_limit': 18,
958 'tags': [],
959 'like_count': int,
960 'repost_count': int,
961 'comment_count': int,
962 'view_count': int,
963 },
964 }, {
965 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
966 'info_dict': {
967 'id': '1599108643743473680',
968 'display_id': '1599108751385972737',
969 'ext': 'mp4',
970 'title': '\u06ea - \U0001F48B',
971 'uploader_url': 'https://twitter.com/hlo_again',
972 'like_count': int,
973 'uploader_id': 'hlo_again',
974 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
975 'repost_count': int,
976 'duration': 9.531,
977 'comment_count': int,
978 'view_count': int,
979 'upload_date': '20221203',
980 'age_limit': 0,
981 'timestamp': 1670092210.0,
982 'tags': [],
983 'uploader': '\u06ea',
984 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
985 },
986 'params': {'noplaylist': True},
987 }, {
988 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
989 'info_dict': {
990 'id': '1600009362759733248',
991 'display_id': '1600009574919962625',
992 'ext': 'mp4',
993 'uploader_url': 'https://twitter.com/MunTheShinobi',
994 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
995 'view_count': int,
996 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
997 'age_limit': 0,
998 'uploader': 'Mün The Shinobi',
999 'repost_count': int,
1000 'upload_date': '20221206',
1001 'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1002 'comment_count': int,
1003 'like_count': int,
1004 'tags': [],
1005 'uploader_id': 'MunTheShinobi',
1006 'duration': 139.987,
1007 'timestamp': 1670306984.0,
1008 },
1009 }, {
1010 # url to retweet id, legacy API
1011 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1012 'info_dict': {
1013 'id': '1623274794488659969',
1014 'display_id': '1623739803874349067',
1015 'ext': 'mp4',
1016 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1017 'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
1018 'uploader': 'Johnny Bullets',
1019 'uploader_id': 'Johnnybull3ts',
1020 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1021 'age_limit': 0,
1022 'tags': [],
1023 'duration': 8.033,
1024 'timestamp': 1675853859.0,
1025 'upload_date': '20230208',
1026 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1027 'like_count': int,
1028 'repost_count': int,
1029 'comment_count': int,
1030 },
1031 'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
1032 }, {
1033 # onion route
1034 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1035 'only_matching': True,
1036 }, {
1037 # Twitch Clip Embed
1038 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1039 'only_matching': True,
1040 }, {
1041 # promo_video_website card
1042 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1043 'only_matching': True,
1044 }, {
1045 # promo_video_convo card
1046 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1047 'only_matching': True,
1048 }, {
1049 # appplayer card
1050 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1051 'only_matching': True,
1052 }, {
1053 # video_direct_message card
1054 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1055 'only_matching': True,
1056 }, {
1057 # poll2choice_video card
1058 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1059 'only_matching': True,
1060 }, {
1061 # poll3choice_video card
1062 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1063 'only_matching': True,
1064 }, {
1065 # poll4choice_video card
1066 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1067 'only_matching': True,
1068 }]
1069
1070 def _graphql_to_legacy(self, data, twid):
1071 result = traverse_obj(data, (
1072 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1073 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1074 'tweet_results', 'result', ('tweet', None),
1075 ), expected_type=dict, default={}, get_all=False)
1076
1077 if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
1078 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
1079
1080 if 'tombstone' in result:
1081 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1082 if cause and 'adult content' in cause:
1083 self.raise_login_required(cause)
1084 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1085
1086 status = result.get('legacy', {})
1087 status.update(traverse_obj(result, {
1088 'user': ('core', 'user_results', 'result', 'legacy'),
1089 'card': ('card', 'legacy'),
1090 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1091 }, expected_type=dict, default={}))
1092
1093 # extra transformation is needed since result does not match legacy format
1094 binding_values = {
1095 binding_value.get('key'): binding_value.get('value')
1096 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1097 }
1098 if binding_values:
1099 status['card']['binding_values'] = binding_values
1100
1101 return status
1102
1103 def _build_graphql_query(self, media_id):
1104 return {
1105 'variables': {
1106 'focalTweetId': media_id,
1107 'includePromotedContent': True,
1108 'with_rux_injections': False,
1109 'withBirdwatchNotes': True,
1110 'withCommunity': True,
1111 'withDownvotePerspective': False,
1112 'withQuickPromoteEligibilityTweetFields': True,
1113 'withReactionsMetadata': False,
1114 'withReactionsPerspective': False,
1115 'withSuperFollowsTweetFields': True,
1116 'withSuperFollowsUserFields': True,
1117 'withV2Timeline': True,
1118 'withVoice': True,
1119 },
1120 'features': {
1121 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1122 'interactive_text_enabled': True,
1123 'responsive_web_edit_tweet_api_enabled': True,
1124 'responsive_web_enhance_cards_enabled': True,
1125 'responsive_web_graphql_timeline_navigation_enabled': False,
1126 'responsive_web_text_conversations_enabled': False,
1127 'responsive_web_uc_gql_enabled': True,
1128 'standardized_nudges_misinfo': True,
1129 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1130 'tweetypie_unmention_optimization_enabled': True,
1131 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1132 'verified_phone_label_enabled': False,
1133 'vibe_api_enabled': True,
1134 },
1135 }
1136
1137 def _real_extract(self, url):
1138 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1139 if self._configuration_arg('legacy_api') and not self.is_logged_in:
1140 status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
1141 'cards_platform': 'Web-12',
1142 'include_cards': 1,
1143 'include_reply_count': 1,
1144 'include_user_entities': 0,
1145 'tweet_mode': 'extended',
1146 }), 'retweeted_status', None)
1147 else:
1148 result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
1149 status = self._graphql_to_legacy(result, twid)
1150
1151 title = description = status['full_text'].replace('\n', ' ')
1152 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1153 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1154 user = status.get('user') or {}
1155 uploader = user.get('name')
1156 if uploader:
1157 title = f'{uploader} - {title}'
1158 uploader_id = user.get('screen_name')
1159
1160 info = {
1161 'id': twid,
1162 'title': title,
1163 'description': description,
1164 'uploader': uploader,
1165 'timestamp': unified_timestamp(status.get('created_at')),
1166 'uploader_id': uploader_id,
1167 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1168 'like_count': int_or_none(status.get('favorite_count')),
1169 'repost_count': int_or_none(status.get('retweet_count')),
1170 'comment_count': int_or_none(status.get('reply_count')),
1171 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1172 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1173 }
1174
1175 def extract_from_video_info(media):
1176 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1177 self.write_debug(f'Extracting from video info: {media_id}')
1178 video_info = media.get('video_info') or {}
1179
1180 formats = []
1181 subtitles = {}
1182 for variant in video_info.get('variants', []):
1183 fmts, subs = self._extract_variant_formats(variant, twid)
1184 subtitles = self._merge_subtitles(subtitles, subs)
1185 formats.extend(fmts)
1186
1187 thumbnails = []
1188 media_url = media.get('media_url_https') or media.get('media_url')
1189 if media_url:
1190 def add_thumbnail(name, size):
1191 thumbnails.append({
1192 'id': name,
1193 'url': update_url_query(media_url, {'name': name}),
1194 'width': int_or_none(size.get('w') or size.get('width')),
1195 'height': int_or_none(size.get('h') or size.get('height')),
1196 })
1197 for name, size in media.get('sizes', {}).items():
1198 add_thumbnail(name, size)
1199 add_thumbnail('orig', media.get('original_info') or {})
1200
1201 return {
1202 'id': media_id,
1203 'formats': formats,
1204 'subtitles': subtitles,
1205 'thumbnails': thumbnails,
1206 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1207 'duration': float_or_none(video_info.get('duration_millis'), 1000),
1208 # The codec of http formats are unknown
1209 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1210 }
1211
1212 def extract_from_card_info(card):
1213 if not card:
1214 return
1215
1216 self.write_debug(f'Extracting from card info: {card.get("url")}')
1217 binding_values = card['binding_values']
1218
1219 def get_binding_value(k):
1220 o = binding_values.get(k) or {}
1221 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1222
1223 card_name = card['name'].split(':')[-1]
1224 if card_name == 'player':
1225 yield {
1226 '_type': 'url',
1227 'url': get_binding_value('player_url'),
1228 }
1229 elif card_name == 'periscope_broadcast':
1230 yield {
1231 '_type': 'url',
1232 'url': get_binding_value('url') or get_binding_value('player_url'),
1233 'ie_key': PeriscopeIE.ie_key(),
1234 }
1235 elif card_name == 'broadcast':
1236 yield {
1237 '_type': 'url',
1238 'url': get_binding_value('broadcast_url'),
1239 'ie_key': TwitterBroadcastIE.ie_key(),
1240 }
1241 elif card_name == 'audiospace':
1242 yield {
1243 '_type': 'url',
1244 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1245 'ie_key': TwitterSpacesIE.ie_key(),
1246 }
1247 elif card_name == 'summary':
1248 yield {
1249 '_type': 'url',
1250 'url': get_binding_value('card_url'),
1251 }
1252 elif card_name == 'unified_card':
1253 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1254 yield from map(extract_from_video_info, traverse_obj(
1255 unified_card, ('media_entities', ...), expected_type=dict))
1256 # amplify, promo_video_website, promo_video_convo, appplayer,
1257 # video_direct_message, poll2choice_video, poll3choice_video,
1258 # poll4choice_video, ...
1259 else:
1260 is_amplify = card_name == 'amplify'
1261 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1262 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1263 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1264
1265 thumbnails = []
1266 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1267 image = get_binding_value('player_image' + suffix) or {}
1268 image_url = image.get('url')
1269 if not image_url or '/player-placeholder' in image_url:
1270 continue
1271 thumbnails.append({
1272 'id': suffix[1:] if suffix else 'medium',
1273 'url': image_url,
1274 'width': int_or_none(image.get('width')),
1275 'height': int_or_none(image.get('height')),
1276 })
1277
1278 yield {
1279 'formats': formats,
1280 'subtitles': subtitles,
1281 'thumbnails': thumbnails,
1282 'duration': int_or_none(get_binding_value(
1283 'content_duration_seconds')),
1284 }
1285
1286 videos = traverse_obj(status, (
1287 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1288
1289 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1290 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1291 else:
1292 desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
1293 if not desired_obj:
1294 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1295 elif desired_obj.get('type') != 'video':
1296 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1297
1298 # Restore original archive id and video index in title
1299 for index, entry in enumerate(videos, 1):
1300 if entry.get('id') != desired_obj.get('id'):
1301 continue
1302 if index == 1:
1303 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1304 if len(videos) != 1:
1305 info['title'] += f' #{index}'
1306 break
1307
1308 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1309
1310 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1311 if not entries:
1312 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1313 if not expanded_url or expanded_url == url:
1314 self.raise_no_formats('No video could be found in this tweet', expected=True)
1315 return info
1316
1317 return self.url_result(expanded_url, display_id=twid, **info)
1318
1319 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1320
1321 if len(entries) == 1:
1322 return entries[0]
1323
1324 for index, entry in enumerate(entries, 1):
1325 entry['title'] += f' #{index}'
1326
1327 return self.playlist_result(entries, **info)
1328
1329
1330 class TwitterAmplifyIE(TwitterBaseIE):
1331 IE_NAME = 'twitter:amplify'
1332 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1333
1334 _TEST = {
1335 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1336 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1337 'info_dict': {
1338 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1339 'ext': 'mp4',
1340 'title': 'Twitter Video',
1341 'thumbnail': 're:^https?://.*',
1342 },
1343 'params': {'format': '[protocol=https]'},
1344 }
1345
1346 def _real_extract(self, url):
1347 video_id = self._match_id(url)
1348 webpage = self._download_webpage(url, video_id)
1349
1350 vmap_url = self._html_search_meta(
1351 'twitter:amplify:vmap', webpage, 'vmap url')
1352 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1353
1354 thumbnails = []
1355 thumbnail = self._html_search_meta(
1356 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1357
1358 def _find_dimension(target):
1359 w = int_or_none(self._html_search_meta(
1360 'twitter:%s:width' % target, webpage, fatal=False))
1361 h = int_or_none(self._html_search_meta(
1362 'twitter:%s:height' % target, webpage, fatal=False))
1363 return w, h
1364
1365 if thumbnail:
1366 thumbnail_w, thumbnail_h = _find_dimension('image')
1367 thumbnails.append({
1368 'url': thumbnail,
1369 'width': thumbnail_w,
1370 'height': thumbnail_h,
1371 })
1372
1373 video_w, video_h = _find_dimension('player')
1374 formats[0].update({
1375 'width': video_w,
1376 'height': video_h,
1377 })
1378
1379 return {
1380 'id': video_id,
1381 'title': 'Twitter Video',
1382 'formats': formats,
1383 'thumbnails': thumbnails,
1384 }
1385
1386
1387 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1388 IE_NAME = 'twitter:broadcast'
1389 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1390
1391 _TEST = {
1392 # untitled Periscope video
1393 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1394 'info_dict': {
1395 'id': '1yNGaQLWpejGj',
1396 'ext': 'mp4',
1397 'title': 'Andrea May Sahouri - Periscope Broadcast',
1398 'uploader': 'Andrea May Sahouri',
1399 'uploader_id': '1PXEdBZWpGwKe',
1400 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1401 'view_count': int,
1402 },
1403 }
1404
1405 def _real_extract(self, url):
1406 broadcast_id = self._match_id(url)
1407 broadcast = self._call_api(
1408 'broadcasts/show.json', broadcast_id,
1409 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1410 info = self._parse_broadcast_data(broadcast, broadcast_id)
1411 media_key = broadcast['media_key']
1412 source = self._call_api(
1413 f'live_video_stream/status/{media_key}', media_key)['source']
1414 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1415 if '/live_video_stream/geoblocked/' in m3u8_url:
1416 self.raise_geo_restricted()
1417 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1418 m3u8_url).query).get('type', [None])[0]
1419 state, width, height = self._extract_common_format_info(broadcast)
1420 info['formats'] = self._extract_pscp_m3u8_formats(
1421 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1422 return info
1423
1424
1425 class TwitterSpacesIE(TwitterBaseIE):
1426 IE_NAME = 'twitter:spaces'
1427 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1428
1429 _TESTS = [{
1430 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1431 'info_dict': {
1432 'id': '1RDxlgyvNXzJL',
1433 'ext': 'm4a',
1434 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1435 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1436 'uploader': r're:Lucio Di Gaetano.*?',
1437 'uploader_id': 'luciodigaetano',
1438 'live_status': 'was_live',
1439 'timestamp': 1659877956397,
1440 },
1441 'params': {'skip_download': 'm3u8'},
1442 }]
1443
1444 SPACE_STATUS = {
1445 'notstarted': 'is_upcoming',
1446 'ended': 'was_live',
1447 'running': 'is_live',
1448 'timedout': 'post_live',
1449 }
1450
1451 def _build_graphql_query(self, space_id):
1452 return {
1453 'variables': {
1454 'id': space_id,
1455 'isMetatagsQuery': True,
1456 'withDownvotePerspective': False,
1457 'withReactionsMetadata': False,
1458 'withReactionsPerspective': False,
1459 'withReplays': True,
1460 'withSuperFollowsUserFields': True,
1461 'withSuperFollowsTweetFields': True,
1462 },
1463 'features': {
1464 'dont_mention_me_view_api_enabled': True,
1465 'interactive_text_enabled': True,
1466 'responsive_web_edit_tweet_api_enabled': True,
1467 'responsive_web_enhance_cards_enabled': True,
1468 'responsive_web_uc_gql_enabled': True,
1469 'spaces_2022_h2_clipping': True,
1470 'spaces_2022_h2_spaces_communities': False,
1471 'standardized_nudges_misinfo': True,
1472 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1473 'vibe_api_enabled': True,
1474 },
1475 }
1476
1477 def _real_extract(self, url):
1478 space_id = self._match_id(url)
1479 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1480 if not space_data:
1481 raise ExtractorError('Twitter Space not found', expected=True)
1482
1483 metadata = space_data['metadata']
1484 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1485
1486 formats = []
1487 if live_status == 'is_upcoming':
1488 self.raise_no_formats('Twitter Space not started yet', expected=True)
1489 elif live_status == 'post_live':
1490 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1491 else:
1492 source = self._call_api(
1493 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1494
1495 # XXX: Native downloader does not work
1496 formats = self._extract_m3u8_formats(
1497 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1498 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1499 headers={'Referer': 'https://twitter.com/'})
1500 for fmt in formats:
1501 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1502
1503 participants = ', '.join(traverse_obj(
1504 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1505 return {
1506 'id': space_id,
1507 'title': metadata.get('title'),
1508 'description': f'Twitter Space participated by {participants}',
1509 'uploader': traverse_obj(
1510 metadata, ('creator_results', 'result', 'legacy', 'name')),
1511 'uploader_id': traverse_obj(
1512 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1513 'live_status': live_status,
1514 'release_timestamp': try_call(
1515 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1516 'timestamp': metadata.get('created_at'),
1517 'formats': formats,
1518 }
1519
1520
1521 class TwitterShortenerIE(TwitterBaseIE):
1522 IE_NAME = 'twitter:shortener'
1523 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1524 _BASE_URL = 'https://t.co/'
1525
1526 def _real_extract(self, url):
1527 mobj = self._match_valid_url(url)
1528 eid, id = mobj.group('eid', 'id')
1529 if eid:
1530 id = eid
1531 url = self._BASE_URL + id
1532 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1533 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1534 if new_url.startswith(__UNSAFE_LINK):
1535 new_url = new_url.replace(__UNSAFE_LINK, "")
1536 return self.url_result(new_url)