]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[extractor/twitter:spaces] Fix extraction (#7512)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import json
2 import re
3
4 from .common import InfoExtractor
5 from .periscope import PeriscopeBaseIE, PeriscopeIE
6 from ..compat import (
7 compat_parse_qs,
8 compat_urllib_parse_unquote,
9 compat_urllib_parse_urlparse,
10 )
11 from ..utils import (
12 ExtractorError,
13 dict_get,
14 float_or_none,
15 format_field,
16 int_or_none,
17 make_archive_id,
18 remove_end,
19 str_or_none,
20 strip_or_none,
21 traverse_obj,
22 try_call,
23 try_get,
24 unified_timestamp,
25 update_url_query,
26 url_or_none,
27 xpath_text,
28 )
29
30
31 class TwitterBaseIE(InfoExtractor):
32 _NETRC_MACHINE = 'twitter'
33 _API_BASE = 'https://api.twitter.com/1.1/'
34 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
35 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
36 _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
37 _guest_token = None
38 _flow_token = None
39
40 _LOGIN_INIT_DATA = json.dumps({
41 'input_flow_data': {
42 'flow_context': {
43 'debug_overrides': {},
44 'start_location': {
45 'location': 'unknown'
46 }
47 }
48 },
49 'subtask_versions': {
50 'action_list': 2,
51 'alert_dialog': 1,
52 'app_download_cta': 1,
53 'check_logged_in_account': 1,
54 'choice_selection': 3,
55 'contacts_live_sync_permission_prompt': 0,
56 'cta': 7,
57 'email_verification': 2,
58 'end_flow': 1,
59 'enter_date': 1,
60 'enter_email': 2,
61 'enter_password': 5,
62 'enter_phone': 2,
63 'enter_recaptcha': 1,
64 'enter_text': 5,
65 'enter_username': 2,
66 'generic_urt': 3,
67 'in_app_notification': 1,
68 'interest_picker': 3,
69 'js_instrumentation': 1,
70 'menu_dialog': 1,
71 'notifications_permission_prompt': 2,
72 'open_account': 2,
73 'open_home_timeline': 1,
74 'open_link': 1,
75 'phone_verification': 4,
76 'privacy_options': 1,
77 'security_key': 3,
78 'select_avatar': 4,
79 'select_banner': 2,
80 'settings_list': 7,
81 'show_code': 1,
82 'sign_up': 2,
83 'sign_up_review': 4,
84 'tweet_selection_urt': 1,
85 'update_users': 1,
86 'upload_media': 1,
87 'user_recommendations_list': 4,
88 'user_recommendations_urt': 1,
89 'wait_spinner': 3,
90 'web_modal': 1
91 }
92 }, separators=(',', ':')).encode()
93
94 def _extract_variant_formats(self, variant, video_id):
95 variant_url = variant.get('url')
96 if not variant_url:
97 return [], {}
98 elif '.m3u8' in variant_url:
99 return self._extract_m3u8_formats_and_subtitles(
100 variant_url, video_id, 'mp4', 'm3u8_native',
101 m3u8_id='hls', fatal=False)
102 else:
103 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
104 f = {
105 'url': variant_url,
106 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
107 'tbr': tbr,
108 }
109 self._search_dimensions_in_video_url(f, variant_url)
110 return [f], {}
111
112 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
113 vmap_url = url_or_none(vmap_url)
114 if not vmap_url:
115 return [], {}
116 vmap_data = self._download_xml(vmap_url, video_id)
117 formats = []
118 subtitles = {}
119 urls = []
120 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
121 video_variant.attrib['url'] = compat_urllib_parse_unquote(
122 video_variant.attrib['url'])
123 urls.append(video_variant.attrib['url'])
124 fmts, subs = self._extract_variant_formats(
125 video_variant.attrib, video_id)
126 formats.extend(fmts)
127 subtitles = self._merge_subtitles(subtitles, subs)
128 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
129 if video_url not in urls:
130 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
131 formats.extend(fmts)
132 subtitles = self._merge_subtitles(subtitles, subs)
133 return formats, subtitles
134
135 @staticmethod
136 def _search_dimensions_in_video_url(a_format, video_url):
137 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
138 if m:
139 a_format.update({
140 'width': int(m.group('width')),
141 'height': int(m.group('height')),
142 })
143
144 @property
145 def is_logged_in(self):
146 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
147
148 def _fetch_guest_token(self, headers, display_id):
149 headers.pop('x-guest-token', None)
150 self._guest_token = traverse_obj(self._download_json(
151 f'{self._API_BASE}guest/activate.json', display_id,
152 'Downloading guest token', data=b'', headers=headers), 'guest_token')
153 if not self._guest_token:
154 raise ExtractorError('Could not retrieve guest token')
155
156 def _set_base_headers(self):
157 headers = self._AUTH.copy()
158 csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
159 if csrf_token:
160 headers['x-csrf-token'] = csrf_token
161 return headers
162
163 def _call_login_api(self, note, headers, query={}, data=None):
164 response = self._download_json(
165 f'{self._API_BASE}onboarding/task.json', None, note,
166 headers=headers, query=query, data=data, expected_status=400)
167 error = traverse_obj(response, ('errors', 0, 'message', {str}))
168 if error:
169 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
170 elif traverse_obj(response, 'status') != 'success':
171 raise ExtractorError('Login was unsuccessful')
172
173 subtask = traverse_obj(
174 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
175 if not subtask:
176 raise ExtractorError('Twitter API did not return next login subtask')
177
178 self._flow_token = response['flow_token']
179
180 return subtask
181
182 def _perform_login(self, username, password):
183 if self.is_logged_in:
184 return
185
186 self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
187 headers = self._set_base_headers()
188 self._fetch_guest_token(headers, None)
189 headers.update({
190 'content-type': 'application/json',
191 'x-guest-token': self._guest_token,
192 'x-twitter-client-language': 'en',
193 'x-twitter-active-user': 'yes',
194 'Referer': 'https://twitter.com/',
195 'Origin': 'https://twitter.com',
196 })
197
198 def build_login_json(*subtask_inputs):
199 return json.dumps({
200 'flow_token': self._flow_token,
201 'subtask_inputs': subtask_inputs
202 }, separators=(',', ':')).encode()
203
204 def input_dict(subtask_id, text):
205 return {
206 'subtask_id': subtask_id,
207 'enter_text': {
208 'text': text,
209 'link': 'next_link'
210 }
211 }
212
213 next_subtask = self._call_login_api(
214 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
215
216 while not self.is_logged_in:
217 if next_subtask == 'LoginJsInstrumentationSubtask':
218 next_subtask = self._call_login_api(
219 'Submitting JS instrumentation response', headers, data=build_login_json({
220 'subtask_id': next_subtask,
221 'js_instrumentation': {
222 'response': '{}',
223 'link': 'next_link'
224 }
225 }))
226
227 elif next_subtask == 'LoginEnterUserIdentifierSSO':
228 next_subtask = self._call_login_api(
229 'Submitting username', headers, data=build_login_json({
230 'subtask_id': next_subtask,
231 'settings_list': {
232 'setting_responses': [{
233 'key': 'user_identifier',
234 'response_data': {
235 'text_data': {
236 'result': username
237 }
238 }
239 }],
240 'link': 'next_link'
241 }
242 }))
243
244 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
245 next_subtask = self._call_login_api(
246 'Submitting alternate identifier', headers,
247 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
248 'one of username, phone number or email that was not used as --username'))))
249
250 elif next_subtask == 'LoginEnterPassword':
251 next_subtask = self._call_login_api(
252 'Submitting password', headers, data=build_login_json({
253 'subtask_id': next_subtask,
254 'enter_password': {
255 'password': password,
256 'link': 'next_link'
257 }
258 }))
259
260 elif next_subtask == 'AccountDuplicationCheck':
261 next_subtask = self._call_login_api(
262 'Submitting account duplication check', headers, data=build_login_json({
263 'subtask_id': next_subtask,
264 'check_logged_in_account': {
265 'link': 'AccountDuplicationCheck_false'
266 }
267 }))
268
269 elif next_subtask == 'LoginTwoFactorAuthChallenge':
270 next_subtask = self._call_login_api(
271 'Submitting 2FA token', headers, data=build_login_json(input_dict(
272 next_subtask, self._get_tfa_info('two-factor authentication token'))))
273
274 elif next_subtask == 'LoginAcid':
275 next_subtask = self._call_login_api(
276 'Submitting confirmation code', headers, data=build_login_json(input_dict(
277 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
278
279 elif next_subtask == 'LoginSuccessSubtask':
280 raise ExtractorError('Twitter API did not grant auth token cookie')
281
282 else:
283 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
284
285 self.report_login()
286
287 def _call_api(self, path, video_id, query={}, graphql=False):
288 headers = self._set_base_headers()
289 if self.is_logged_in:
290 headers.update({
291 'x-twitter-auth-type': 'OAuth2Session',
292 'x-twitter-client-language': 'en',
293 'x-twitter-active-user': 'yes',
294 })
295
296 for first_attempt in (True, False):
297 if not self.is_logged_in:
298 if not self._guest_token:
299 self._fetch_guest_token(headers, video_id)
300 headers['x-guest-token'] = self._guest_token
301
302 allowed_status = {400, 401, 403, 404} if graphql else {403}
303 result = self._download_json(
304 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
305 video_id, headers=headers, query=query, expected_status=allowed_status,
306 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
307
308 if result.get('errors'):
309 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
310 if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
311 self.to_screen('Guest token has expired. Refreshing guest token')
312 self._guest_token = None
313 continue
314
315 raise ExtractorError(
316 f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
317
318 return result
319
320 def _build_graphql_query(self, media_id):
321 raise NotImplementedError('Method must be implemented to support GraphQL')
322
323 def _call_graphql_api(self, endpoint, media_id):
324 data = self._build_graphql_query(media_id)
325 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
326 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
327
328
329 class TwitterCardIE(InfoExtractor):
330 IE_NAME = 'twitter:card'
331 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
332 _TESTS = [
333 {
334 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
335 # MD5 checksums are different in different places
336 'info_dict': {
337 'id': '560070131976392705',
338 'ext': 'mp4',
339 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
340 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
341 'uploader': 'Twitter',
342 'uploader_id': 'Twitter',
343 'thumbnail': r're:^https?://.*\.jpg',
344 'duration': 30.033,
345 'timestamp': 1422366112,
346 'upload_date': '20150127',
347 'age_limit': 0,
348 'comment_count': int,
349 'tags': [],
350 'repost_count': int,
351 'like_count': int,
352 'display_id': '560070183650213889',
353 'uploader_url': 'https://twitter.com/Twitter',
354 },
355 },
356 {
357 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
358 'md5': '7137eca597f72b9abbe61e5ae0161399',
359 'info_dict': {
360 'id': '623160978427936768',
361 'ext': 'mp4',
362 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
363 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
364 'uploader': 'NASA',
365 'uploader_id': 'NASA',
366 'timestamp': 1437408129,
367 'upload_date': '20150720',
368 'uploader_url': 'https://twitter.com/NASA',
369 'age_limit': 0,
370 'comment_count': int,
371 'like_count': int,
372 'repost_count': int,
373 'tags': ['PlutoFlyby'],
374 },
375 'params': {'format': '[protocol=https]'}
376 },
377 {
378 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
379 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
380 'info_dict': {
381 'id': 'dq4Oj5quskI',
382 'ext': 'mp4',
383 'title': 'Ubuntu 11.10 Overview',
384 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
385 'upload_date': '20111013',
386 'uploader': 'OMG! UBUNTU!',
387 'uploader_id': 'omgubuntu',
388 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
389 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
390 'channel_follower_count': int,
391 'chapters': 'count:8',
392 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
393 'duration': 138,
394 'categories': ['Film & Animation'],
395 'age_limit': 0,
396 'comment_count': int,
397 'availability': 'public',
398 'like_count': int,
399 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
400 'view_count': int,
401 'tags': 'count:12',
402 'channel': 'OMG! UBUNTU!',
403 'playable_in_embed': True,
404 },
405 'add_ie': ['Youtube'],
406 },
407 {
408 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
409 'info_dict': {
410 'id': 'iBb2x00UVlv',
411 'ext': 'mp4',
412 'upload_date': '20151113',
413 'uploader_id': '1189339351084113920',
414 'uploader': 'ArsenalTerje',
415 'title': 'Vine by ArsenalTerje',
416 'timestamp': 1447451307,
417 'alt_title': 'Vine by ArsenalTerje',
418 'comment_count': int,
419 'like_count': int,
420 'thumbnail': r're:^https?://[^?#]+\.jpg',
421 'view_count': int,
422 'repost_count': int,
423 },
424 'add_ie': ['Vine'],
425 'params': {'skip_download': 'm3u8'},
426 },
427 {
428 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
429 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
430 'info_dict': {
431 'id': '705235433198714880',
432 'ext': 'mp4',
433 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
434 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
435 'uploader': 'Brent Yarina',
436 'uploader_id': 'BTNBrentYarina',
437 'timestamp': 1456976204,
438 'upload_date': '20160303',
439 },
440 'skip': 'This content is no longer available.',
441 },
442 {
443 'url': 'https://twitter.com/i/videos/752274308186120192',
444 'only_matching': True,
445 },
446 ]
447
448 def _real_extract(self, url):
449 status_id = self._match_id(url)
450 return self.url_result(
451 'https://twitter.com/statuses/' + status_id,
452 TwitterIE.ie_key(), status_id)
453
454
455 class TwitterIE(TwitterBaseIE):
456 IE_NAME = 'twitter'
457 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
458
459 _TESTS = [{
460 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
461 'info_dict': {
462 'id': '643211870443208704',
463 'display_id': '643211948184596480',
464 'ext': 'mp4',
465 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
466 'thumbnail': r're:^https?://.*\.jpg',
467 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
468 'uploader': 'FREE THE NIPPLE',
469 'uploader_id': 'freethenipple',
470 'duration': 12.922,
471 'timestamp': 1442188653,
472 'upload_date': '20150913',
473 'uploader_url': 'https://twitter.com/freethenipple',
474 'comment_count': int,
475 'repost_count': int,
476 'like_count': int,
477 'view_count': int,
478 'tags': [],
479 'age_limit': 18,
480 },
481 }, {
482 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
483 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
484 'info_dict': {
485 'id': '657991469417025536',
486 'ext': 'mp4',
487 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
488 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
489 'thumbnail': r're:^https?://.*\.png',
490 'uploader': 'Gifs',
491 'uploader_id': 'giphz',
492 },
493 'expected_warnings': ['height', 'width'],
494 'skip': 'Account suspended',
495 }, {
496 'url': 'https://twitter.com/starwars/status/665052190608723968',
497 'info_dict': {
498 'id': '665052190608723968',
499 'display_id': '665052190608723968',
500 'ext': 'mp4',
501 'title': r're:Star Wars.*A new beginning is coming December 18.*',
502 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
503 'uploader_id': 'starwars',
504 'uploader': r're:Star Wars.*',
505 'timestamp': 1447395772,
506 'upload_date': '20151113',
507 'uploader_url': 'https://twitter.com/starwars',
508 'comment_count': int,
509 'repost_count': int,
510 'like_count': int,
511 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
512 'age_limit': 0,
513 },
514 }, {
515 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
516 'info_dict': {
517 'id': '705235433198714880',
518 'ext': 'mp4',
519 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
520 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
521 'uploader_id': 'BTNBrentYarina',
522 'uploader': 'Brent Yarina',
523 'timestamp': 1456976204,
524 'upload_date': '20160303',
525 'uploader_url': 'https://twitter.com/BTNBrentYarina',
526 'comment_count': int,
527 'repost_count': int,
528 'like_count': int,
529 'tags': [],
530 'age_limit': 0,
531 },
532 'params': {
533 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
534 # Test case of TwitterCardIE
535 'skip_download': True,
536 },
537 'skip': 'Dead external link',
538 }, {
539 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
540 'info_dict': {
541 'id': '700207414000242688',
542 'display_id': '700207533655363584',
543 'ext': 'mp4',
544 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
545 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
546 'thumbnail': r're:^https?://.*\.jpg',
547 'uploader': 'jaydin donte geer',
548 'uploader_id': 'jaydingeer',
549 'duration': 30.0,
550 'timestamp': 1455777459,
551 'upload_date': '20160218',
552 'uploader_url': 'https://twitter.com/jaydingeer',
553 'comment_count': int,
554 'repost_count': int,
555 'like_count': int,
556 'view_count': int,
557 'tags': ['Damndaniel'],
558 'age_limit': 0,
559 },
560 }, {
561 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
562 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
563 'info_dict': {
564 'id': 'MIOxnrUteUd',
565 'ext': 'mp4',
566 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
567 'uploader': 'TAKUMA',
568 'uploader_id': '1004126642786242560',
569 'timestamp': 1402826626,
570 'upload_date': '20140615',
571 'thumbnail': r're:^https?://.*\.jpg',
572 'alt_title': 'Vine by TAKUMA',
573 'comment_count': int,
574 'repost_count': int,
575 'like_count': int,
576 'view_count': int,
577 },
578 'add_ie': ['Vine'],
579 }, {
580 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
581 'info_dict': {
582 'id': '717462543795523584',
583 'display_id': '719944021058060289',
584 'ext': 'mp4',
585 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
586 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
587 'uploader_id': 'CaptainAmerica',
588 'uploader': 'Captain America',
589 'duration': 3.17,
590 'timestamp': 1460483005,
591 'upload_date': '20160412',
592 'uploader_url': 'https://twitter.com/CaptainAmerica',
593 'thumbnail': r're:^https?://.*\.jpg',
594 'comment_count': int,
595 'repost_count': int,
596 'like_count': int,
597 'view_count': int,
598 'tags': [],
599 'age_limit': 0,
600 },
601 }, {
602 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
603 'info_dict': {
604 'id': '1zqKVVlkqLaKB',
605 'ext': 'mp4',
606 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
607 'upload_date': '20160923',
608 'uploader_id': '1PmKqpJdOJQoY',
609 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
610 'timestamp': 1474613214,
611 'thumbnail': r're:^https?://.*\.jpg',
612 },
613 'add_ie': ['Periscope'],
614 }, {
615 # has mp4 formats via mobile API
616 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
617 'info_dict': {
618 'id': '852138619213144067',
619 'ext': 'mp4',
620 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
621 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
622 'uploader': 'عالم الأخبار',
623 'uploader_id': 'news_al3alm',
624 'duration': 277.4,
625 'timestamp': 1492000653,
626 'upload_date': '20170412',
627 },
628 'skip': 'Account suspended',
629 }, {
630 'url': 'https://twitter.com/i/web/status/910031516746514432',
631 'info_dict': {
632 'id': '910030238373089285',
633 'display_id': '910031516746514432',
634 'ext': 'mp4',
635 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
636 'thumbnail': r're:^https?://.*\.jpg',
637 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
638 'uploader': 'Préfet de Guadeloupe',
639 'uploader_id': 'Prefet971',
640 'duration': 47.48,
641 'timestamp': 1505803395,
642 'upload_date': '20170919',
643 'uploader_url': 'https://twitter.com/Prefet971',
644 'comment_count': int,
645 'repost_count': int,
646 'like_count': int,
647 'view_count': int,
648 'tags': ['Maria'],
649 'age_limit': 0,
650 },
651 'params': {
652 'skip_download': True, # requires ffmpeg
653 },
654 }, {
655 # card via api.twitter.com/1.1/videos/tweet/config
656 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
657 'info_dict': {
658 'id': '1001551417340022785',
659 'display_id': '1001551623938805763',
660 'ext': 'mp4',
661 'title': 're:.*?Shep is on a roll today.*?',
662 'thumbnail': r're:^https?://.*\.jpg',
663 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
664 'uploader': 'Lis Power',
665 'uploader_id': 'LisPower1',
666 'duration': 111.278,
667 'timestamp': 1527623489,
668 'upload_date': '20180529',
669 'uploader_url': 'https://twitter.com/LisPower1',
670 'comment_count': int,
671 'repost_count': int,
672 'like_count': int,
673 'view_count': int,
674 'tags': [],
675 'age_limit': 0,
676 },
677 'params': {
678 'skip_download': True, # requires ffmpeg
679 },
680 }, {
681 'url': 'https://twitter.com/foobar/status/1087791357756956680',
682 'info_dict': {
683 'id': '1087791272830607360',
684 'display_id': '1087791357756956680',
685 'ext': 'mp4',
686 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
687 'thumbnail': r're:^https?://.*\.jpg',
688 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
689 'uploader': 'Twitter',
690 'uploader_id': 'Twitter',
691 'duration': 61.567,
692 'timestamp': 1548184644,
693 'upload_date': '20190122',
694 'uploader_url': 'https://twitter.com/Twitter',
695 'comment_count': int,
696 'repost_count': int,
697 'like_count': int,
698 'view_count': int,
699 'tags': [],
700 'age_limit': 0,
701 },
702 }, {
703 # not available in Periscope
704 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
705 'info_dict': {
706 'id': '1vOGwqejwoWxB',
707 'ext': 'mp4',
708 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
709 'uploader': 'Vivi',
710 'uploader_id': '1eVjYOLGkGrQL',
711 'thumbnail': r're:^https?://.*\.jpg',
712 'tags': ['EduTECH2019'],
713 'view_count': int,
714 },
715 'add_ie': ['TwitterBroadcast'],
716 }, {
717 # unified card
718 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
719 'info_dict': {
720 'id': '1349774757969989634',
721 'display_id': '1349794411333394432',
722 'ext': 'mp4',
723 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
724 'thumbnail': r're:^https?://.*\.jpg',
725 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
726 'uploader': 'Brooklyn Nets',
727 'uploader_id': 'BrooklynNets',
728 'duration': 324.484,
729 'timestamp': 1610651040,
730 'upload_date': '20210114',
731 'uploader_url': 'https://twitter.com/BrooklynNets',
732 'comment_count': int,
733 'repost_count': int,
734 'like_count': int,
735 'tags': [],
736 'age_limit': 0,
737 },
738 'params': {
739 'skip_download': True,
740 },
741 }, {
742 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
743 'info_dict': {
744 'id': '1577855447914409984',
745 'display_id': '1577855540407197696',
746 'ext': 'mp4',
747 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
748 'description': 'md5:b9c3699335447391d11753ab21c70a74',
749 'upload_date': '20221006',
750 'uploader': 'oshtru',
751 'uploader_id': 'oshtru',
752 'uploader_url': 'https://twitter.com/oshtru',
753 'thumbnail': r're:^https?://.*\.jpg',
754 'duration': 30.03,
755 'timestamp': 1665025050,
756 'comment_count': int,
757 'repost_count': int,
758 'like_count': int,
759 'view_count': int,
760 'tags': [],
761 'age_limit': 0,
762 },
763 'params': {'skip_download': True},
764 }, {
765 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
766 'info_dict': {
767 'id': '1577719286659006464',
768 'title': 'Ultima | #\u0432\u029f\u043c - Test',
769 'description': 'Test https://t.co/Y3KEZD7Dad',
770 'uploader': 'Ultima | #\u0432\u029f\u043c',
771 'uploader_id': 'UltimaShadowX',
772 'uploader_url': 'https://twitter.com/UltimaShadowX',
773 'upload_date': '20221005',
774 'timestamp': 1664992565,
775 'comment_count': int,
776 'repost_count': int,
777 'like_count': int,
778 'tags': [],
779 'age_limit': 0,
780 },
781 'playlist_count': 4,
782 'params': {'skip_download': True},
783 }, {
784 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
785 'info_dict': {
786 'id': '1575559336759263233',
787 'display_id': '1575560063510810624',
788 'ext': 'mp4',
789 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
790 'thumbnail': r're:^https?://.*\.jpg',
791 'description': 'md5:95aea692fda36a12081b9629b02daa92',
792 'uploader': 'Max Olson',
793 'uploader_id': 'MesoMax919',
794 'uploader_url': 'https://twitter.com/MesoMax919',
795 'duration': 21.321,
796 'timestamp': 1664477766,
797 'upload_date': '20220929',
798 'comment_count': int,
799 'repost_count': int,
800 'like_count': int,
801 'view_count': int,
802 'tags': ['HurricaneIan'],
803 'age_limit': 0,
804 },
805 }, {
806 # Adult content, fails if not logged in (GraphQL)
807 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
808 'info_dict': {
809 'id': '1575199163847000068',
810 'display_id': '1575199173472927762',
811 'ext': 'mp4',
812 'title': str,
813 'description': str,
814 'uploader': str,
815 'uploader_id': 'Rizdraws',
816 'uploader_url': 'https://twitter.com/Rizdraws',
817 'upload_date': '20220928',
818 'timestamp': 1664391723,
819 'thumbnail': r're:^https?://.+\.jpg',
820 'like_count': int,
821 'repost_count': int,
822 'comment_count': int,
823 'age_limit': 18,
824 'tags': []
825 },
826 'skip': 'Requires authentication',
827 }, {
828 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
829 'playlist_mincount': 2,
830 'info_dict': {
831 'id': '1395079556562706435',
832 'title': str,
833 'tags': [],
834 'uploader': str,
835 'like_count': int,
836 'upload_date': '20210519',
837 'age_limit': 0,
838 'repost_count': int,
839 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
840 'uploader_id': 'Srirachachau',
841 'comment_count': int,
842 'uploader_url': 'https://twitter.com/Srirachachau',
843 'timestamp': 1621447860,
844 },
845 }, {
846 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
847 'playlist_mincount': 2,
848 'info_dict': {
849 'id': '1578353380363501568',
850 'title': str,
851 'uploader_id': 'DavidToons_',
852 'repost_count': int,
853 'like_count': int,
854 'uploader': str,
855 'timestamp': 1665143744,
856 'uploader_url': 'https://twitter.com/DavidToons_',
857 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
858 'tags': [],
859 'comment_count': int,
860 'upload_date': '20221007',
861 'age_limit': 0,
862 },
863 }, {
864 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
865 'playlist_count': 2,
866 'info_dict': {
867 'id': '1578401165338976258',
868 'title': str,
869 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
870 'uploader': str,
871 'uploader_id': 'primevideouk',
872 'timestamp': 1665155137,
873 'upload_date': '20221007',
874 'age_limit': 0,
875 'uploader_url': 'https://twitter.com/primevideouk',
876 'comment_count': int,
877 'repost_count': int,
878 'like_count': int,
879 'tags': ['TheRingsOfPower'],
880 },
881 }, {
882 # Twitter Spaces
883 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
884 'info_dict': {
885 'id': '1lPJqmBeeNAJb',
886 'ext': 'm4a',
887 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
888 'uploader': r're:Monique Camarra.+?',
889 'uploader_id': 'MoniqueCamarra',
890 'live_status': 'was_live',
891 'release_timestamp': 1658417414,
892 'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
893 'timestamp': 1658407771,
894 'release_date': '20220721',
895 'upload_date': '20220721',
896 },
897 'add_ie': ['TwitterSpaces'],
898 'params': {'skip_download': 'm3u8'},
899 }, {
900 # URL specifies video number but --yes-playlist
901 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
902 'playlist_mincount': 2,
903 'info_dict': {
904 'id': '1600649710662213632',
905 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
906 'timestamp': 1670459604.0,
907 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
908 'comment_count': int,
909 'uploader_id': 'CTVJLaidlaw',
910 'repost_count': int,
911 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
912 'upload_date': '20221208',
913 'age_limit': 0,
914 'uploader': 'Jocelyn Laidlaw',
915 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
916 'like_count': int,
917 },
918 }, {
919 # URL specifies video number and --no-playlist
920 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
921 'info_dict': {
922 'id': '1600649511827013632',
923 'ext': 'mp4',
924 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
925 'thumbnail': r're:^https?://.+\.jpg',
926 'timestamp': 1670459604.0,
927 'uploader_id': 'CTVJLaidlaw',
928 'uploader': 'Jocelyn Laidlaw',
929 'repost_count': int,
930 'comment_count': int,
931 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
932 'duration': 102.226,
933 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
934 'display_id': '1600649710662213632',
935 'like_count': int,
936 'view_count': int,
937 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
938 'upload_date': '20221208',
939 'age_limit': 0,
940 },
941 'params': {'noplaylist': True},
942 }, {
943 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
944 # note the id different between extraction and url
945 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
946 'info_dict': {
947 'id': '1621117577354424321',
948 'display_id': '1621117700482416640',
949 'ext': 'mp4',
950 'title': '뽀 - 아 최우제 이동속도 봐',
951 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
952 'duration': 24.598,
953 'uploader': '뽀',
954 'uploader_id': 's2FAKER',
955 'uploader_url': 'https://twitter.com/s2FAKER',
956 'upload_date': '20230202',
957 'timestamp': 1675339553.0,
958 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
959 'age_limit': 18,
960 'tags': [],
961 'like_count': int,
962 'repost_count': int,
963 'comment_count': int,
964 'view_count': int,
965 },
966 }, {
967 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
968 'info_dict': {
969 'id': '1599108643743473680',
970 'display_id': '1599108751385972737',
971 'ext': 'mp4',
972 'title': '\u06ea - \U0001F48B',
973 'uploader_url': 'https://twitter.com/hlo_again',
974 'like_count': int,
975 'uploader_id': 'hlo_again',
976 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
977 'repost_count': int,
978 'duration': 9.531,
979 'comment_count': int,
980 'view_count': int,
981 'upload_date': '20221203',
982 'age_limit': 0,
983 'timestamp': 1670092210.0,
984 'tags': [],
985 'uploader': '\u06ea',
986 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
987 },
988 'params': {'noplaylist': True},
989 }, {
990 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
991 'info_dict': {
992 'id': '1600009362759733248',
993 'display_id': '1600009574919962625',
994 'ext': 'mp4',
995 'uploader_url': 'https://twitter.com/MunTheShinobi',
996 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
997 'view_count': int,
998 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
999 'age_limit': 0,
1000 'uploader': 'Mün The Shinobi',
1001 'repost_count': int,
1002 'upload_date': '20221206',
1003 'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1004 'comment_count': int,
1005 'like_count': int,
1006 'tags': [],
1007 'uploader_id': 'MunTheShinobi',
1008 'duration': 139.987,
1009 'timestamp': 1670306984.0,
1010 },
1011 }, {
1012 # url to retweet id, legacy API
1013 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1014 'info_dict': {
1015 'id': '1623274794488659969',
1016 'display_id': '1623739803874349067',
1017 'ext': 'mp4',
1018 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1019 'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
1020 'uploader': 'Johnny Bullets',
1021 'uploader_id': 'Johnnybull3ts',
1022 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1023 'age_limit': 0,
1024 'tags': [],
1025 'duration': 8.033,
1026 'timestamp': 1675853859.0,
1027 'upload_date': '20230208',
1028 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1029 'like_count': int,
1030 'repost_count': int,
1031 'comment_count': int,
1032 },
1033 'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
1034 }, {
1035 # onion route
1036 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1037 'only_matching': True,
1038 }, {
1039 # Twitch Clip Embed
1040 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1041 'only_matching': True,
1042 }, {
1043 # promo_video_website card
1044 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1045 'only_matching': True,
1046 }, {
1047 # promo_video_convo card
1048 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1049 'only_matching': True,
1050 }, {
1051 # appplayer card
1052 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1053 'only_matching': True,
1054 }, {
1055 # video_direct_message card
1056 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1057 'only_matching': True,
1058 }, {
1059 # poll2choice_video card
1060 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1061 'only_matching': True,
1062 }, {
1063 # poll3choice_video card
1064 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1065 'only_matching': True,
1066 }, {
1067 # poll4choice_video card
1068 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1069 'only_matching': True,
1070 }]
1071
1072 def _graphql_to_legacy(self, data, twid):
1073 result = traverse_obj(data, (
1074 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1075 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1076 'tweet_results', 'result', ('tweet', None),
1077 ), expected_type=dict, default={}, get_all=False)
1078
1079 if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
1080 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
1081
1082 if 'tombstone' in result:
1083 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1084 if cause and 'adult content' in cause:
1085 self.raise_login_required(cause)
1086 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1087
1088 status = result.get('legacy', {})
1089 status.update(traverse_obj(result, {
1090 'user': ('core', 'user_results', 'result', 'legacy'),
1091 'card': ('card', 'legacy'),
1092 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1093 }, expected_type=dict, default={}))
1094
1095 # extra transformation is needed since result does not match legacy format
1096 binding_values = {
1097 binding_value.get('key'): binding_value.get('value')
1098 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1099 }
1100 if binding_values:
1101 status['card']['binding_values'] = binding_values
1102
1103 return status
1104
1105 def _build_graphql_query(self, media_id):
1106 return {
1107 'variables': {
1108 'focalTweetId': media_id,
1109 'includePromotedContent': True,
1110 'with_rux_injections': False,
1111 'withBirdwatchNotes': True,
1112 'withCommunity': True,
1113 'withDownvotePerspective': False,
1114 'withQuickPromoteEligibilityTweetFields': True,
1115 'withReactionsMetadata': False,
1116 'withReactionsPerspective': False,
1117 'withSuperFollowsTweetFields': True,
1118 'withSuperFollowsUserFields': True,
1119 'withV2Timeline': True,
1120 'withVoice': True,
1121 },
1122 'features': {
1123 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1124 'interactive_text_enabled': True,
1125 'responsive_web_edit_tweet_api_enabled': True,
1126 'responsive_web_enhance_cards_enabled': True,
1127 'responsive_web_graphql_timeline_navigation_enabled': False,
1128 'responsive_web_text_conversations_enabled': False,
1129 'responsive_web_uc_gql_enabled': True,
1130 'standardized_nudges_misinfo': True,
1131 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1132 'tweetypie_unmention_optimization_enabled': True,
1133 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1134 'verified_phone_label_enabled': False,
1135 'vibe_api_enabled': True,
1136 },
1137 }
1138
1139 def _real_extract(self, url):
1140 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1141 if self._configuration_arg('legacy_api') and not self.is_logged_in:
1142 status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
1143 'cards_platform': 'Web-12',
1144 'include_cards': 1,
1145 'include_reply_count': 1,
1146 'include_user_entities': 0,
1147 'tweet_mode': 'extended',
1148 }), 'retweeted_status', None)
1149 else:
1150 result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
1151 status = self._graphql_to_legacy(result, twid)
1152
1153 title = description = status['full_text'].replace('\n', ' ')
1154 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1155 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1156 user = status.get('user') or {}
1157 uploader = user.get('name')
1158 if uploader:
1159 title = f'{uploader} - {title}'
1160 uploader_id = user.get('screen_name')
1161
1162 info = {
1163 'id': twid,
1164 'title': title,
1165 'description': description,
1166 'uploader': uploader,
1167 'timestamp': unified_timestamp(status.get('created_at')),
1168 'uploader_id': uploader_id,
1169 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1170 'like_count': int_or_none(status.get('favorite_count')),
1171 'repost_count': int_or_none(status.get('retweet_count')),
1172 'comment_count': int_or_none(status.get('reply_count')),
1173 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1174 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1175 }
1176
1177 def extract_from_video_info(media):
1178 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1179 self.write_debug(f'Extracting from video info: {media_id}')
1180 video_info = media.get('video_info') or {}
1181
1182 formats = []
1183 subtitles = {}
1184 for variant in video_info.get('variants', []):
1185 fmts, subs = self._extract_variant_formats(variant, twid)
1186 subtitles = self._merge_subtitles(subtitles, subs)
1187 formats.extend(fmts)
1188
1189 thumbnails = []
1190 media_url = media.get('media_url_https') or media.get('media_url')
1191 if media_url:
1192 def add_thumbnail(name, size):
1193 thumbnails.append({
1194 'id': name,
1195 'url': update_url_query(media_url, {'name': name}),
1196 'width': int_or_none(size.get('w') or size.get('width')),
1197 'height': int_or_none(size.get('h') or size.get('height')),
1198 })
1199 for name, size in media.get('sizes', {}).items():
1200 add_thumbnail(name, size)
1201 add_thumbnail('orig', media.get('original_info') or {})
1202
1203 return {
1204 'id': media_id,
1205 'formats': formats,
1206 'subtitles': subtitles,
1207 'thumbnails': thumbnails,
1208 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1209 'duration': float_or_none(video_info.get('duration_millis'), 1000),
1210 # The codec of http formats are unknown
1211 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1212 }
1213
1214 def extract_from_card_info(card):
1215 if not card:
1216 return
1217
1218 self.write_debug(f'Extracting from card info: {card.get("url")}')
1219 binding_values = card['binding_values']
1220
1221 def get_binding_value(k):
1222 o = binding_values.get(k) or {}
1223 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1224
1225 card_name = card['name'].split(':')[-1]
1226 if card_name == 'player':
1227 yield {
1228 '_type': 'url',
1229 'url': get_binding_value('player_url'),
1230 }
1231 elif card_name == 'periscope_broadcast':
1232 yield {
1233 '_type': 'url',
1234 'url': get_binding_value('url') or get_binding_value('player_url'),
1235 'ie_key': PeriscopeIE.ie_key(),
1236 }
1237 elif card_name == 'broadcast':
1238 yield {
1239 '_type': 'url',
1240 'url': get_binding_value('broadcast_url'),
1241 'ie_key': TwitterBroadcastIE.ie_key(),
1242 }
1243 elif card_name == 'audiospace':
1244 yield {
1245 '_type': 'url',
1246 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1247 'ie_key': TwitterSpacesIE.ie_key(),
1248 }
1249 elif card_name == 'summary':
1250 yield {
1251 '_type': 'url',
1252 'url': get_binding_value('card_url'),
1253 }
1254 elif card_name == 'unified_card':
1255 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1256 yield from map(extract_from_video_info, traverse_obj(
1257 unified_card, ('media_entities', ...), expected_type=dict))
1258 # amplify, promo_video_website, promo_video_convo, appplayer,
1259 # video_direct_message, poll2choice_video, poll3choice_video,
1260 # poll4choice_video, ...
1261 else:
1262 is_amplify = card_name == 'amplify'
1263 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1264 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1265 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1266
1267 thumbnails = []
1268 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1269 image = get_binding_value('player_image' + suffix) or {}
1270 image_url = image.get('url')
1271 if not image_url or '/player-placeholder' in image_url:
1272 continue
1273 thumbnails.append({
1274 'id': suffix[1:] if suffix else 'medium',
1275 'url': image_url,
1276 'width': int_or_none(image.get('width')),
1277 'height': int_or_none(image.get('height')),
1278 })
1279
1280 yield {
1281 'formats': formats,
1282 'subtitles': subtitles,
1283 'thumbnails': thumbnails,
1284 'duration': int_or_none(get_binding_value(
1285 'content_duration_seconds')),
1286 }
1287
1288 videos = traverse_obj(status, (
1289 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1290
1291 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1292 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1293 else:
1294 desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
1295 if not desired_obj:
1296 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1297 elif desired_obj.get('type') != 'video':
1298 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1299
1300 # Restore original archive id and video index in title
1301 for index, entry in enumerate(videos, 1):
1302 if entry.get('id') != desired_obj.get('id'):
1303 continue
1304 if index == 1:
1305 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1306 if len(videos) != 1:
1307 info['title'] += f' #{index}'
1308 break
1309
1310 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1311
1312 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1313 if not entries:
1314 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1315 if not expanded_url or expanded_url == url:
1316 self.raise_no_formats('No video could be found in this tweet', expected=True)
1317 return info
1318
1319 return self.url_result(expanded_url, display_id=twid, **info)
1320
1321 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1322
1323 if len(entries) == 1:
1324 return entries[0]
1325
1326 for index, entry in enumerate(entries, 1):
1327 entry['title'] += f' #{index}'
1328
1329 return self.playlist_result(entries, **info)
1330
1331
1332 class TwitterAmplifyIE(TwitterBaseIE):
1333 IE_NAME = 'twitter:amplify'
1334 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1335
1336 _TEST = {
1337 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1338 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1339 'info_dict': {
1340 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1341 'ext': 'mp4',
1342 'title': 'Twitter Video',
1343 'thumbnail': 're:^https?://.*',
1344 },
1345 'params': {'format': '[protocol=https]'},
1346 }
1347
1348 def _real_extract(self, url):
1349 video_id = self._match_id(url)
1350 webpage = self._download_webpage(url, video_id)
1351
1352 vmap_url = self._html_search_meta(
1353 'twitter:amplify:vmap', webpage, 'vmap url')
1354 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1355
1356 thumbnails = []
1357 thumbnail = self._html_search_meta(
1358 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1359
1360 def _find_dimension(target):
1361 w = int_or_none(self._html_search_meta(
1362 'twitter:%s:width' % target, webpage, fatal=False))
1363 h = int_or_none(self._html_search_meta(
1364 'twitter:%s:height' % target, webpage, fatal=False))
1365 return w, h
1366
1367 if thumbnail:
1368 thumbnail_w, thumbnail_h = _find_dimension('image')
1369 thumbnails.append({
1370 'url': thumbnail,
1371 'width': thumbnail_w,
1372 'height': thumbnail_h,
1373 })
1374
1375 video_w, video_h = _find_dimension('player')
1376 formats[0].update({
1377 'width': video_w,
1378 'height': video_h,
1379 })
1380
1381 return {
1382 'id': video_id,
1383 'title': 'Twitter Video',
1384 'formats': formats,
1385 'thumbnails': thumbnails,
1386 }
1387
1388
1389 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1390 IE_NAME = 'twitter:broadcast'
1391 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1392
1393 _TEST = {
1394 # untitled Periscope video
1395 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1396 'info_dict': {
1397 'id': '1yNGaQLWpejGj',
1398 'ext': 'mp4',
1399 'title': 'Andrea May Sahouri - Periscope Broadcast',
1400 'uploader': 'Andrea May Sahouri',
1401 'uploader_id': '1PXEdBZWpGwKe',
1402 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1403 'view_count': int,
1404 },
1405 }
1406
1407 def _real_extract(self, url):
1408 broadcast_id = self._match_id(url)
1409 broadcast = self._call_api(
1410 'broadcasts/show.json', broadcast_id,
1411 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1412 info = self._parse_broadcast_data(broadcast, broadcast_id)
1413 media_key = broadcast['media_key']
1414 source = self._call_api(
1415 f'live_video_stream/status/{media_key}', media_key)['source']
1416 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1417 if '/live_video_stream/geoblocked/' in m3u8_url:
1418 self.raise_geo_restricted()
1419 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1420 m3u8_url).query).get('type', [None])[0]
1421 state, width, height = self._extract_common_format_info(broadcast)
1422 info['formats'] = self._extract_pscp_m3u8_formats(
1423 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1424 return info
1425
1426
1427 class TwitterSpacesIE(TwitterBaseIE):
1428 IE_NAME = 'twitter:spaces'
1429 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1430
1431 _TESTS = [{
1432 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1433 'info_dict': {
1434 'id': '1RDxlgyvNXzJL',
1435 'ext': 'm4a',
1436 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1437 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1438 'uploader': r're:Lucio Di Gaetano.*?',
1439 'uploader_id': 'luciodigaetano',
1440 'live_status': 'was_live',
1441 'timestamp': 1659877956,
1442 'upload_date': '20220807',
1443 'release_timestamp': 1659904215,
1444 'release_date': '20220807',
1445 },
1446 'params': {'skip_download': 'm3u8'},
1447 }]
1448
1449 SPACE_STATUS = {
1450 'notstarted': 'is_upcoming',
1451 'ended': 'was_live',
1452 'running': 'is_live',
1453 'timedout': 'post_live',
1454 }
1455
1456 def _build_graphql_query(self, space_id):
1457 return {
1458 'variables': {
1459 'id': space_id,
1460 'isMetatagsQuery': True,
1461 'withDownvotePerspective': False,
1462 'withReactionsMetadata': False,
1463 'withReactionsPerspective': False,
1464 'withReplays': True,
1465 'withSuperFollowsUserFields': True,
1466 'withSuperFollowsTweetFields': True,
1467 },
1468 'features': {
1469 'dont_mention_me_view_api_enabled': True,
1470 'interactive_text_enabled': True,
1471 'responsive_web_edit_tweet_api_enabled': True,
1472 'responsive_web_enhance_cards_enabled': True,
1473 'responsive_web_uc_gql_enabled': True,
1474 'spaces_2022_h2_clipping': True,
1475 'spaces_2022_h2_spaces_communities': False,
1476 'standardized_nudges_misinfo': True,
1477 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1478 'vibe_api_enabled': True,
1479 },
1480 }
1481
1482 def _real_extract(self, url):
1483 space_id = self._match_id(url)
1484 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1485 if not space_data:
1486 raise ExtractorError('Twitter Space not found', expected=True)
1487
1488 metadata = space_data['metadata']
1489 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1490 is_live = live_status == 'is_live'
1491
1492 formats = []
1493 if live_status == 'is_upcoming':
1494 self.raise_no_formats('Twitter Space not started yet', expected=True)
1495 elif not is_live and not metadata.get('is_space_available_for_replay'):
1496 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1497 elif metadata.get('media_key'):
1498 source = traverse_obj(
1499 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1500 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1501 formats = self._extract_m3u8_formats(
1502 source, metadata['media_key'], 'm4a', live=is_live, fatal=False,
1503 headers={'Referer': 'https://twitter.com/'}) if source else []
1504 for fmt in formats:
1505 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1506 if not is_live:
1507 fmt['container'] = 'm4a_dash'
1508
1509 participants = ', '.join(traverse_obj(
1510 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1511
1512 if not formats and live_status == 'post_live':
1513 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1514
1515 return {
1516 'id': space_id,
1517 'title': metadata.get('title'),
1518 'description': f'Twitter Space participated by {participants}',
1519 'uploader': traverse_obj(
1520 metadata, ('creator_results', 'result', 'legacy', 'name')),
1521 'uploader_id': traverse_obj(
1522 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1523 'live_status': live_status,
1524 'release_timestamp': try_call(
1525 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1526 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1527 'formats': formats,
1528 }
1529
1530
1531 class TwitterShortenerIE(TwitterBaseIE):
1532 IE_NAME = 'twitter:shortener'
1533 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1534 _BASE_URL = 'https://t.co/'
1535
1536 def _real_extract(self, url):
1537 mobj = self._match_valid_url(url)
1538 eid, id = mobj.group('eid', 'id')
1539 if eid:
1540 id = eid
1541 url = self._BASE_URL + id
1542 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1543 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1544 if new_url.startswith(__UNSAFE_LINK):
1545 new_url = new_url.replace(__UNSAFE_LINK, "")
1546 return self.url_result(new_url)