]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/twitter.py
`FFmpegFixupM3u8PP` may need to run with ffmpeg
[yt-dlp.git] / yt_dlp / extractor / twitter.py
... / ...
CommitLineData
1import json
2import re
3
4from .common import InfoExtractor
5from .periscope import PeriscopeBaseIE, PeriscopeIE
6from ..compat import (
7 compat_parse_qs,
8 compat_urllib_parse_unquote,
9 compat_urllib_parse_urlparse,
10)
11from ..utils import (
12 ExtractorError,
13 dict_get,
14 filter_dict,
15 float_or_none,
16 format_field,
17 int_or_none,
18 make_archive_id,
19 remove_end,
20 str_or_none,
21 strip_or_none,
22 traverse_obj,
23 try_call,
24 try_get,
25 unified_timestamp,
26 update_url_query,
27 url_or_none,
28 xpath_text,
29)
30
31
32class TwitterBaseIE(InfoExtractor):
33 _NETRC_MACHINE = 'twitter'
34 _API_BASE = 'https://api.twitter.com/1.1/'
35 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
36 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
37 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
38 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
39 _flow_token = None
40
41 _LOGIN_INIT_DATA = json.dumps({
42 'input_flow_data': {
43 'flow_context': {
44 'debug_overrides': {},
45 'start_location': {
46 'location': 'unknown'
47 }
48 }
49 },
50 'subtask_versions': {
51 'action_list': 2,
52 'alert_dialog': 1,
53 'app_download_cta': 1,
54 'check_logged_in_account': 1,
55 'choice_selection': 3,
56 'contacts_live_sync_permission_prompt': 0,
57 'cta': 7,
58 'email_verification': 2,
59 'end_flow': 1,
60 'enter_date': 1,
61 'enter_email': 2,
62 'enter_password': 5,
63 'enter_phone': 2,
64 'enter_recaptcha': 1,
65 'enter_text': 5,
66 'enter_username': 2,
67 'generic_urt': 3,
68 'in_app_notification': 1,
69 'interest_picker': 3,
70 'js_instrumentation': 1,
71 'menu_dialog': 1,
72 'notifications_permission_prompt': 2,
73 'open_account': 2,
74 'open_home_timeline': 1,
75 'open_link': 1,
76 'phone_verification': 4,
77 'privacy_options': 1,
78 'security_key': 3,
79 'select_avatar': 4,
80 'select_banner': 2,
81 'settings_list': 7,
82 'show_code': 1,
83 'sign_up': 2,
84 'sign_up_review': 4,
85 'tweet_selection_urt': 1,
86 'update_users': 1,
87 'upload_media': 1,
88 'user_recommendations_list': 4,
89 'user_recommendations_urt': 1,
90 'wait_spinner': 3,
91 'web_modal': 1
92 }
93 }, separators=(',', ':')).encode()
94
95 def _extract_variant_formats(self, variant, video_id):
96 variant_url = variant.get('url')
97 if not variant_url:
98 return [], {}
99 elif '.m3u8' in variant_url:
100 return self._extract_m3u8_formats_and_subtitles(
101 variant_url, video_id, 'mp4', 'm3u8_native',
102 m3u8_id='hls', fatal=False)
103 else:
104 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
105 f = {
106 'url': variant_url,
107 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
108 'tbr': tbr,
109 }
110 self._search_dimensions_in_video_url(f, variant_url)
111 return [f], {}
112
113 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
114 vmap_url = url_or_none(vmap_url)
115 if not vmap_url:
116 return [], {}
117 vmap_data = self._download_xml(vmap_url, video_id)
118 formats = []
119 subtitles = {}
120 urls = []
121 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
122 video_variant.attrib['url'] = compat_urllib_parse_unquote(
123 video_variant.attrib['url'])
124 urls.append(video_variant.attrib['url'])
125 fmts, subs = self._extract_variant_formats(
126 video_variant.attrib, video_id)
127 formats.extend(fmts)
128 subtitles = self._merge_subtitles(subtitles, subs)
129 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
130 if video_url not in urls:
131 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
132 formats.extend(fmts)
133 subtitles = self._merge_subtitles(subtitles, subs)
134 return formats, subtitles
135
136 @staticmethod
137 def _search_dimensions_in_video_url(a_format, video_url):
138 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
139 if m:
140 a_format.update({
141 'width': int(m.group('width')),
142 'height': int(m.group('height')),
143 })
144
145 @property
146 def is_logged_in(self):
147 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
148
149 def _fetch_guest_token(self, display_id):
150 guest_token = traverse_obj(self._download_json(
151 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
152 headers=self._set_base_headers(legacy=display_id and self._configuration_arg('legacy_api'))),
153 ('guest_token', {str}))
154 if not guest_token:
155 raise ExtractorError('Could not retrieve guest token')
156 return guest_token
157
158 def _set_base_headers(self, legacy=False):
159 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
160 return filter_dict({
161 'Authorization': f'Bearer {bearer_token}',
162 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
163 })
164
165 def _call_login_api(self, note, headers, query={}, data=None):
166 response = self._download_json(
167 f'{self._API_BASE}onboarding/task.json', None, note,
168 headers=headers, query=query, data=data, expected_status=400)
169 error = traverse_obj(response, ('errors', 0, 'message', {str}))
170 if error:
171 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
172 elif traverse_obj(response, 'status') != 'success':
173 raise ExtractorError('Login was unsuccessful')
174
175 subtask = traverse_obj(
176 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
177 if not subtask:
178 raise ExtractorError('Twitter API did not return next login subtask')
179
180 self._flow_token = response['flow_token']
181
182 return subtask
183
184 def _perform_login(self, username, password):
185 if self.is_logged_in:
186 return
187
188 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
189 guest_token = self._search_regex(
190 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._fetch_guest_token(None)
191 headers = {
192 **self._set_base_headers(),
193 'content-type': 'application/json',
194 'x-guest-token': guest_token,
195 'x-twitter-client-language': 'en',
196 'x-twitter-active-user': 'yes',
197 'Referer': 'https://twitter.com/',
198 'Origin': 'https://twitter.com',
199 }
200
201 def build_login_json(*subtask_inputs):
202 return json.dumps({
203 'flow_token': self._flow_token,
204 'subtask_inputs': subtask_inputs
205 }, separators=(',', ':')).encode()
206
207 def input_dict(subtask_id, text):
208 return {
209 'subtask_id': subtask_id,
210 'enter_text': {
211 'text': text,
212 'link': 'next_link'
213 }
214 }
215
216 next_subtask = self._call_login_api(
217 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
218
219 while not self.is_logged_in:
220 if next_subtask == 'LoginJsInstrumentationSubtask':
221 next_subtask = self._call_login_api(
222 'Submitting JS instrumentation response', headers, data=build_login_json({
223 'subtask_id': next_subtask,
224 'js_instrumentation': {
225 'response': '{}',
226 'link': 'next_link'
227 }
228 }))
229
230 elif next_subtask == 'LoginEnterUserIdentifierSSO':
231 next_subtask = self._call_login_api(
232 'Submitting username', headers, data=build_login_json({
233 'subtask_id': next_subtask,
234 'settings_list': {
235 'setting_responses': [{
236 'key': 'user_identifier',
237 'response_data': {
238 'text_data': {
239 'result': username
240 }
241 }
242 }],
243 'link': 'next_link'
244 }
245 }))
246
247 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
248 next_subtask = self._call_login_api(
249 'Submitting alternate identifier', headers,
250 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
251 'one of username, phone number or email that was not used as --username'))))
252
253 elif next_subtask == 'LoginEnterPassword':
254 next_subtask = self._call_login_api(
255 'Submitting password', headers, data=build_login_json({
256 'subtask_id': next_subtask,
257 'enter_password': {
258 'password': password,
259 'link': 'next_link'
260 }
261 }))
262
263 elif next_subtask == 'AccountDuplicationCheck':
264 next_subtask = self._call_login_api(
265 'Submitting account duplication check', headers, data=build_login_json({
266 'subtask_id': next_subtask,
267 'check_logged_in_account': {
268 'link': 'AccountDuplicationCheck_false'
269 }
270 }))
271
272 elif next_subtask == 'LoginTwoFactorAuthChallenge':
273 next_subtask = self._call_login_api(
274 'Submitting 2FA token', headers, data=build_login_json(input_dict(
275 next_subtask, self._get_tfa_info('two-factor authentication token'))))
276
277 elif next_subtask == 'LoginAcid':
278 next_subtask = self._call_login_api(
279 'Submitting confirmation code', headers, data=build_login_json(input_dict(
280 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
281
282 elif next_subtask == 'LoginSuccessSubtask':
283 raise ExtractorError('Twitter API did not grant auth token cookie')
284
285 else:
286 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
287
288 self.report_login()
289
290 def _call_api(self, path, video_id, query={}, graphql=False):
291 headers = self._set_base_headers(legacy=not graphql and self._configuration_arg('legacy_api'))
292 headers.update({
293 'x-twitter-auth-type': 'OAuth2Session',
294 'x-twitter-client-language': 'en',
295 'x-twitter-active-user': 'yes',
296 } if self.is_logged_in else {
297 'x-guest-token': self._fetch_guest_token(video_id)
298 })
299 allowed_status = {400, 401, 403, 404} if graphql else {403}
300 result = self._download_json(
301 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
302 video_id, headers=headers, query=query, expected_status=allowed_status,
303 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
304
305 if result.get('errors'):
306 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
307 raise ExtractorError(
308 f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
309
310 return result
311
312 def _build_graphql_query(self, media_id):
313 raise NotImplementedError('Method must be implemented to support GraphQL')
314
315 def _call_graphql_api(self, endpoint, media_id):
316 data = self._build_graphql_query(media_id)
317 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
318 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
319
320
321class TwitterCardIE(InfoExtractor):
322 IE_NAME = 'twitter:card'
323 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
324 _TESTS = [
325 {
326 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
327 # MD5 checksums are different in different places
328 'info_dict': {
329 'id': '560070131976392705',
330 'ext': 'mp4',
331 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
332 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
333 'uploader': 'Twitter',
334 'uploader_id': 'Twitter',
335 'thumbnail': r're:^https?://.*\.jpg',
336 'duration': 30.033,
337 'timestamp': 1422366112,
338 'upload_date': '20150127',
339 'age_limit': 0,
340 'comment_count': int,
341 'tags': [],
342 'repost_count': int,
343 'like_count': int,
344 'display_id': '560070183650213889',
345 'uploader_url': 'https://twitter.com/Twitter',
346 },
347 },
348 {
349 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
350 'md5': '7137eca597f72b9abbe61e5ae0161399',
351 'info_dict': {
352 'id': '623160978427936768',
353 'ext': 'mp4',
354 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
355 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
356 'uploader': 'NASA',
357 'uploader_id': 'NASA',
358 'timestamp': 1437408129,
359 'upload_date': '20150720',
360 'uploader_url': 'https://twitter.com/NASA',
361 'age_limit': 0,
362 'comment_count': int,
363 'like_count': int,
364 'repost_count': int,
365 'tags': ['PlutoFlyby'],
366 },
367 'params': {'format': '[protocol=https]'}
368 },
369 {
370 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
371 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
372 'info_dict': {
373 'id': 'dq4Oj5quskI',
374 'ext': 'mp4',
375 'title': 'Ubuntu 11.10 Overview',
376 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
377 'upload_date': '20111013',
378 'uploader': 'OMG! UBUNTU!',
379 'uploader_id': 'omgubuntu',
380 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
381 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
382 'channel_follower_count': int,
383 'chapters': 'count:8',
384 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
385 'duration': 138,
386 'categories': ['Film & Animation'],
387 'age_limit': 0,
388 'comment_count': int,
389 'availability': 'public',
390 'like_count': int,
391 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
392 'view_count': int,
393 'tags': 'count:12',
394 'channel': 'OMG! UBUNTU!',
395 'playable_in_embed': True,
396 },
397 'add_ie': ['Youtube'],
398 },
399 {
400 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
401 'info_dict': {
402 'id': 'iBb2x00UVlv',
403 'ext': 'mp4',
404 'upload_date': '20151113',
405 'uploader_id': '1189339351084113920',
406 'uploader': 'ArsenalTerje',
407 'title': 'Vine by ArsenalTerje',
408 'timestamp': 1447451307,
409 'alt_title': 'Vine by ArsenalTerje',
410 'comment_count': int,
411 'like_count': int,
412 'thumbnail': r're:^https?://[^?#]+\.jpg',
413 'view_count': int,
414 'repost_count': int,
415 },
416 'add_ie': ['Vine'],
417 'params': {'skip_download': 'm3u8'},
418 },
419 {
420 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
421 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
422 'info_dict': {
423 'id': '705235433198714880',
424 'ext': 'mp4',
425 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
426 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
427 'uploader': 'Brent Yarina',
428 'uploader_id': 'BTNBrentYarina',
429 'timestamp': 1456976204,
430 'upload_date': '20160303',
431 },
432 'skip': 'This content is no longer available.',
433 },
434 {
435 'url': 'https://twitter.com/i/videos/752274308186120192',
436 'only_matching': True,
437 },
438 ]
439
440 def _real_extract(self, url):
441 status_id = self._match_id(url)
442 return self.url_result(
443 'https://twitter.com/statuses/' + status_id,
444 TwitterIE.ie_key(), status_id)
445
446
447class TwitterIE(TwitterBaseIE):
448 IE_NAME = 'twitter'
449 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
450
451 _TESTS = [{
452 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
453 'info_dict': {
454 'id': '643211870443208704',
455 'display_id': '643211948184596480',
456 'ext': 'mp4',
457 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
458 'thumbnail': r're:^https?://.*\.jpg',
459 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
460 'uploader': 'FREE THE NIPPLE',
461 'uploader_id': 'freethenipple',
462 'duration': 12.922,
463 'timestamp': 1442188653,
464 'upload_date': '20150913',
465 'uploader_url': 'https://twitter.com/freethenipple',
466 'comment_count': int,
467 'repost_count': int,
468 'like_count': int,
469 'view_count': int,
470 'tags': [],
471 'age_limit': 18,
472 },
473 }, {
474 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
475 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
476 'info_dict': {
477 'id': '657991469417025536',
478 'ext': 'mp4',
479 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
480 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
481 'thumbnail': r're:^https?://.*\.png',
482 'uploader': 'Gifs',
483 'uploader_id': 'giphz',
484 },
485 'expected_warnings': ['height', 'width'],
486 'skip': 'Account suspended',
487 }, {
488 'url': 'https://twitter.com/starwars/status/665052190608723968',
489 'info_dict': {
490 'id': '665052190608723968',
491 'display_id': '665052190608723968',
492 'ext': 'mp4',
493 'title': r're:Star Wars.*A new beginning is coming December 18.*',
494 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
495 'uploader_id': 'starwars',
496 'uploader': r're:Star Wars.*',
497 'timestamp': 1447395772,
498 'upload_date': '20151113',
499 'uploader_url': 'https://twitter.com/starwars',
500 'comment_count': int,
501 'repost_count': int,
502 'like_count': int,
503 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
504 'age_limit': 0,
505 },
506 }, {
507 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
508 'info_dict': {
509 'id': '705235433198714880',
510 'ext': 'mp4',
511 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
512 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
513 'uploader_id': 'BTNBrentYarina',
514 'uploader': 'Brent Yarina',
515 'timestamp': 1456976204,
516 'upload_date': '20160303',
517 'uploader_url': 'https://twitter.com/BTNBrentYarina',
518 'comment_count': int,
519 'repost_count': int,
520 'like_count': int,
521 'tags': [],
522 'age_limit': 0,
523 },
524 'params': {
525 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
526 # Test case of TwitterCardIE
527 'skip_download': True,
528 },
529 'skip': 'Dead external link',
530 }, {
531 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
532 'info_dict': {
533 'id': '700207414000242688',
534 'display_id': '700207533655363584',
535 'ext': 'mp4',
536 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
537 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
538 'thumbnail': r're:^https?://.*\.jpg',
539 'uploader': 'jaydin donte geer',
540 'uploader_id': 'jaydingeer',
541 'duration': 30.0,
542 'timestamp': 1455777459,
543 'upload_date': '20160218',
544 'uploader_url': 'https://twitter.com/jaydingeer',
545 'comment_count': int,
546 'repost_count': int,
547 'like_count': int,
548 'view_count': int,
549 'tags': ['Damndaniel'],
550 'age_limit': 0,
551 },
552 }, {
553 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
554 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
555 'info_dict': {
556 'id': 'MIOxnrUteUd',
557 'ext': 'mp4',
558 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
559 'uploader': 'TAKUMA',
560 'uploader_id': '1004126642786242560',
561 'timestamp': 1402826626,
562 'upload_date': '20140615',
563 'thumbnail': r're:^https?://.*\.jpg',
564 'alt_title': 'Vine by TAKUMA',
565 'comment_count': int,
566 'repost_count': int,
567 'like_count': int,
568 'view_count': int,
569 },
570 'add_ie': ['Vine'],
571 }, {
572 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
573 'info_dict': {
574 'id': '717462543795523584',
575 'display_id': '719944021058060289',
576 'ext': 'mp4',
577 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
578 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
579 'uploader_id': 'CaptainAmerica',
580 'uploader': 'Captain America',
581 'duration': 3.17,
582 'timestamp': 1460483005,
583 'upload_date': '20160412',
584 'uploader_url': 'https://twitter.com/CaptainAmerica',
585 'thumbnail': r're:^https?://.*\.jpg',
586 'comment_count': int,
587 'repost_count': int,
588 'like_count': int,
589 'view_count': int,
590 'tags': [],
591 'age_limit': 0,
592 },
593 }, {
594 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
595 'info_dict': {
596 'id': '1zqKVVlkqLaKB',
597 'ext': 'mp4',
598 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
599 'upload_date': '20160923',
600 'uploader_id': '1PmKqpJdOJQoY',
601 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
602 'timestamp': 1474613214,
603 'thumbnail': r're:^https?://.*\.jpg',
604 },
605 'add_ie': ['Periscope'],
606 }, {
607 # has mp4 formats via mobile API
608 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
609 'info_dict': {
610 'id': '852138619213144067',
611 'ext': 'mp4',
612 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
613 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
614 'uploader': 'عالم الأخبار',
615 'uploader_id': 'news_al3alm',
616 'duration': 277.4,
617 'timestamp': 1492000653,
618 'upload_date': '20170412',
619 },
620 'skip': 'Account suspended',
621 }, {
622 'url': 'https://twitter.com/i/web/status/910031516746514432',
623 'info_dict': {
624 'id': '910030238373089285',
625 'display_id': '910031516746514432',
626 'ext': 'mp4',
627 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
628 'thumbnail': r're:^https?://.*\.jpg',
629 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
630 'uploader': 'Préfet de Guadeloupe',
631 'uploader_id': 'Prefet971',
632 'duration': 47.48,
633 'timestamp': 1505803395,
634 'upload_date': '20170919',
635 'uploader_url': 'https://twitter.com/Prefet971',
636 'comment_count': int,
637 'repost_count': int,
638 'like_count': int,
639 'view_count': int,
640 'tags': ['Maria'],
641 'age_limit': 0,
642 },
643 'params': {
644 'skip_download': True, # requires ffmpeg
645 },
646 }, {
647 # card via api.twitter.com/1.1/videos/tweet/config
648 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
649 'info_dict': {
650 'id': '1001551417340022785',
651 'display_id': '1001551623938805763',
652 'ext': 'mp4',
653 'title': 're:.*?Shep is on a roll today.*?',
654 'thumbnail': r're:^https?://.*\.jpg',
655 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
656 'uploader': 'Lis Power',
657 'uploader_id': 'LisPower1',
658 'duration': 111.278,
659 'timestamp': 1527623489,
660 'upload_date': '20180529',
661 'uploader_url': 'https://twitter.com/LisPower1',
662 'comment_count': int,
663 'repost_count': int,
664 'like_count': int,
665 'view_count': int,
666 'tags': [],
667 'age_limit': 0,
668 },
669 'params': {
670 'skip_download': True, # requires ffmpeg
671 },
672 }, {
673 'url': 'https://twitter.com/foobar/status/1087791357756956680',
674 'info_dict': {
675 'id': '1087791272830607360',
676 'display_id': '1087791357756956680',
677 'ext': 'mp4',
678 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
679 'thumbnail': r're:^https?://.*\.jpg',
680 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
681 'uploader': 'Twitter',
682 'uploader_id': 'Twitter',
683 'duration': 61.567,
684 'timestamp': 1548184644,
685 'upload_date': '20190122',
686 'uploader_url': 'https://twitter.com/Twitter',
687 'comment_count': int,
688 'repost_count': int,
689 'like_count': int,
690 'view_count': int,
691 'tags': [],
692 'age_limit': 0,
693 },
694 }, {
695 # not available in Periscope
696 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
697 'info_dict': {
698 'id': '1vOGwqejwoWxB',
699 'ext': 'mp4',
700 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
701 'uploader': 'Vivi',
702 'uploader_id': '1eVjYOLGkGrQL',
703 'thumbnail': r're:^https?://.*\.jpg',
704 'tags': ['EduTECH2019'],
705 'view_count': int,
706 },
707 'add_ie': ['TwitterBroadcast'],
708 }, {
709 # unified card
710 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
711 'info_dict': {
712 'id': '1349774757969989634',
713 'display_id': '1349794411333394432',
714 'ext': 'mp4',
715 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
716 'thumbnail': r're:^https?://.*\.jpg',
717 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
718 'uploader': 'Brooklyn Nets',
719 'uploader_id': 'BrooklynNets',
720 'duration': 324.484,
721 'timestamp': 1610651040,
722 'upload_date': '20210114',
723 'uploader_url': 'https://twitter.com/BrooklynNets',
724 'comment_count': int,
725 'repost_count': int,
726 'like_count': int,
727 'tags': [],
728 'age_limit': 0,
729 },
730 'params': {
731 'skip_download': True,
732 },
733 }, {
734 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
735 'info_dict': {
736 'id': '1577855447914409984',
737 'display_id': '1577855540407197696',
738 'ext': 'mp4',
739 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
740 'description': 'md5:b9c3699335447391d11753ab21c70a74',
741 'upload_date': '20221006',
742 'uploader': 'oshtru',
743 'uploader_id': 'oshtru',
744 'uploader_url': 'https://twitter.com/oshtru',
745 'thumbnail': r're:^https?://.*\.jpg',
746 'duration': 30.03,
747 'timestamp': 1665025050,
748 'comment_count': int,
749 'repost_count': int,
750 'like_count': int,
751 'view_count': int,
752 'tags': [],
753 'age_limit': 0,
754 },
755 'params': {'skip_download': True},
756 }, {
757 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
758 'info_dict': {
759 'id': '1577719286659006464',
760 'title': 'Ultima📛 | #вʟм - Test',
761 'description': 'Test https://t.co/Y3KEZD7Dad',
762 'uploader': 'Ultima📛 | #вʟм',
763 'uploader_id': 'UltimaShadowX',
764 'uploader_url': 'https://twitter.com/UltimaShadowX',
765 'upload_date': '20221005',
766 'timestamp': 1664992565,
767 'comment_count': int,
768 'repost_count': int,
769 'like_count': int,
770 'tags': [],
771 'age_limit': 0,
772 },
773 'playlist_count': 4,
774 'params': {'skip_download': True},
775 }, {
776 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
777 'info_dict': {
778 'id': '1575559336759263233',
779 'display_id': '1575560063510810624',
780 'ext': 'mp4',
781 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
782 'thumbnail': r're:^https?://.*\.jpg',
783 'description': 'md5:95aea692fda36a12081b9629b02daa92',
784 'uploader': 'Max Olson',
785 'uploader_id': 'MesoMax919',
786 'uploader_url': 'https://twitter.com/MesoMax919',
787 'duration': 21.321,
788 'timestamp': 1664477766,
789 'upload_date': '20220929',
790 'comment_count': int,
791 'repost_count': int,
792 'like_count': int,
793 'view_count': int,
794 'tags': ['HurricaneIan'],
795 'age_limit': 0,
796 },
797 }, {
798 # Adult content, fails if not logged in (GraphQL)
799 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
800 'info_dict': {
801 'id': '1575199163847000068',
802 'display_id': '1575199173472927762',
803 'ext': 'mp4',
804 'title': str,
805 'description': str,
806 'uploader': str,
807 'uploader_id': 'Rizdraws',
808 'uploader_url': 'https://twitter.com/Rizdraws',
809 'upload_date': '20220928',
810 'timestamp': 1664391723,
811 'thumbnail': r're:^https?://.+\.jpg',
812 'like_count': int,
813 'repost_count': int,
814 'comment_count': int,
815 'age_limit': 18,
816 'tags': []
817 },
818 'skip': 'Requires authentication',
819 }, {
820 # Playlist result only with auth
821 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
822 'playlist_mincount': 2,
823 'info_dict': {
824 'id': '1395079556562706435',
825 'title': str,
826 'tags': [],
827 'uploader': str,
828 'like_count': int,
829 'upload_date': '20210519',
830 'age_limit': 0,
831 'repost_count': int,
832 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
833 'uploader_id': 'Srirachachau',
834 'comment_count': int,
835 'uploader_url': 'https://twitter.com/Srirachachau',
836 'timestamp': 1621447860,
837 },
838 }, {
839 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
840 'playlist_mincount': 2,
841 'info_dict': {
842 'id': '1578353380363501568',
843 'title': str,
844 'uploader_id': 'DavidToons_',
845 'repost_count': int,
846 'like_count': int,
847 'uploader': str,
848 'timestamp': 1665143744,
849 'uploader_url': 'https://twitter.com/DavidToons_',
850 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
851 'tags': [],
852 'comment_count': int,
853 'upload_date': '20221007',
854 'age_limit': 0,
855 },
856 }, {
857 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
858 'playlist_count': 2,
859 'info_dict': {
860 'id': '1578401165338976258',
861 'title': str,
862 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
863 'uploader': str,
864 'uploader_id': 'primevideouk',
865 'timestamp': 1665155137,
866 'upload_date': '20221007',
867 'age_limit': 0,
868 'uploader_url': 'https://twitter.com/primevideouk',
869 'comment_count': int,
870 'repost_count': int,
871 'like_count': int,
872 'tags': ['TheRingsOfPower'],
873 },
874 }, {
875 # Twitter Spaces
876 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
877 'info_dict': {
878 'id': '1lPJqmBeeNAJb',
879 'ext': 'm4a',
880 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
881 'uploader': r're:Monique Camarra.+?',
882 'uploader_id': 'MoniqueCamarra',
883 'live_status': 'was_live',
884 'release_timestamp': 1658417414,
885 'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
886 'timestamp': 1658407771,
887 'release_date': '20220721',
888 'upload_date': '20220721',
889 },
890 'add_ie': ['TwitterSpaces'],
891 'params': {'skip_download': 'm3u8'},
892 'skip': 'Requires authentication',
893 }, {
894 # URL specifies video number but --yes-playlist
895 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
896 'playlist_mincount': 2,
897 'info_dict': {
898 'id': '1600649710662213632',
899 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
900 'timestamp': 1670459604.0,
901 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
902 'comment_count': int,
903 'uploader_id': 'CTVJLaidlaw',
904 'repost_count': int,
905 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
906 'upload_date': '20221208',
907 'age_limit': 0,
908 'uploader': 'Jocelyn Laidlaw',
909 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
910 'like_count': int,
911 },
912 }, {
913 # URL specifies video number and --no-playlist
914 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
915 'info_dict': {
916 'id': '1600649511827013632',
917 'ext': 'mp4',
918 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
919 'thumbnail': r're:^https?://.+\.jpg',
920 'timestamp': 1670459604.0,
921 'uploader_id': 'CTVJLaidlaw',
922 'uploader': 'Jocelyn Laidlaw',
923 'repost_count': int,
924 'comment_count': int,
925 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
926 'duration': 102.226,
927 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
928 'display_id': '1600649710662213632',
929 'like_count': int,
930 'view_count': int,
931 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
932 'upload_date': '20221208',
933 'age_limit': 0,
934 },
935 'params': {'noplaylist': True},
936 }, {
937 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
938 # note the id different between extraction and url
939 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
940 'info_dict': {
941 'id': '1621117577354424321',
942 'display_id': '1621117700482416640',
943 'ext': 'mp4',
944 'title': '뽀 - 아 최우제 이동속도 봐',
945 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
946 'duration': 24.598,
947 'uploader': '뽀',
948 'uploader_id': 's2FAKER',
949 'uploader_url': 'https://twitter.com/s2FAKER',
950 'upload_date': '20230202',
951 'timestamp': 1675339553.0,
952 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
953 'age_limit': 18,
954 'tags': [],
955 'like_count': int,
956 'repost_count': int,
957 'comment_count': int,
958 'view_count': int,
959 },
960 }, {
961 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
962 'info_dict': {
963 'id': '1599108643743473680',
964 'display_id': '1599108751385972737',
965 'ext': 'mp4',
966 'title': '\u06ea - \U0001F48B',
967 'uploader_url': 'https://twitter.com/hlo_again',
968 'like_count': int,
969 'uploader_id': 'hlo_again',
970 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
971 'repost_count': int,
972 'duration': 9.531,
973 'comment_count': int,
974 'view_count': int,
975 'upload_date': '20221203',
976 'age_limit': 0,
977 'timestamp': 1670092210.0,
978 'tags': [],
979 'uploader': '\u06ea',
980 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
981 },
982 'params': {'noplaylist': True},
983 }, {
984 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
985 'info_dict': {
986 'id': '1600009362759733248',
987 'display_id': '1600009574919962625',
988 'ext': 'mp4',
989 'uploader_url': 'https://twitter.com/MunTheShinobi',
990 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
991 'view_count': int,
992 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
993 'age_limit': 0,
994 'uploader': 'Mün The Shinobi',
995 'repost_count': int,
996 'upload_date': '20221206',
997 'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
998 'comment_count': int,
999 'like_count': int,
1000 'tags': [],
1001 'uploader_id': 'MunTheShinobi',
1002 'duration': 139.987,
1003 'timestamp': 1670306984.0,
1004 },
1005 }, {
1006 # url to retweet id w/ legacy api
1007 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1008 'info_dict': {
1009 'id': '1623274794488659969',
1010 'display_id': '1623739803874349067',
1011 'ext': 'mp4',
1012 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1013 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1014 'uploader': 'Johnny Bullets',
1015 'uploader_id': 'Johnnybull3ts',
1016 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1017 'age_limit': 0,
1018 'tags': [],
1019 'duration': 8.033,
1020 'timestamp': 1675853859.0,
1021 'upload_date': '20230208',
1022 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1023 'like_count': int,
1024 'repost_count': int,
1025 },
1026 'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
1027 }, {
1028 # orig tweet w/ graphql
1029 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1030 'info_dict': {
1031 'id': '1623274794488659969',
1032 'display_id': '1623739803874349067',
1033 'ext': 'mp4',
1034 'title': '@selfisekai@hackerspace.pl 🐀 - RT @Johnnybull3ts: Me after going viral to over 30million people: Whoopsie-daisy',
1035 'description': 'md5:9258bdbb54793bdc124fe1cd47e96c6a',
1036 'uploader': '@selfisekai@hackerspace.pl 🐀',
1037 'uploader_id': 'liberdalau',
1038 'uploader_url': 'https://twitter.com/liberdalau',
1039 'age_limit': 0,
1040 'tags': [],
1041 'duration': 8.033,
1042 'timestamp': 1675964711.0,
1043 'upload_date': '20230209',
1044 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1045 'like_count': int,
1046 'view_count': int,
1047 'repost_count': int,
1048 'comment_count': int,
1049 },
1050 }, {
1051 # onion route
1052 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1053 'only_matching': True,
1054 }, {
1055 # Twitch Clip Embed
1056 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1057 'only_matching': True,
1058 }, {
1059 # promo_video_website card
1060 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1061 'only_matching': True,
1062 }, {
1063 # promo_video_convo card
1064 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1065 'only_matching': True,
1066 }, {
1067 # appplayer card
1068 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1069 'only_matching': True,
1070 }, {
1071 # video_direct_message card
1072 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1073 'only_matching': True,
1074 }, {
1075 # poll2choice_video card
1076 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1077 'only_matching': True,
1078 }, {
1079 # poll3choice_video card
1080 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1081 'only_matching': True,
1082 }, {
1083 # poll4choice_video card
1084 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1085 'only_matching': True,
1086 }]
1087
1088 def _graphql_to_legacy(self, data, twid):
1089 result = traverse_obj(data, (
1090 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1091 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1092 'tweet_results', 'result', ('tweet', None), {dict},
1093 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1094 data, ('tweetResult', 'result', {dict}), default={})
1095
1096 if result.get('__typename') not in ('Tweet', 'TweetTombstone', 'TweetUnavailable', None):
1097 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
1098
1099 if 'tombstone' in result:
1100 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1101 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1102 elif result.get('__typename') == 'TweetUnavailable':
1103 reason = result.get('reason')
1104 if reason == 'NsfwLoggedOut':
1105 self.raise_login_required('NSFW tweet requires authentication')
1106 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1107
1108 status = result.get('legacy', {})
1109 status.update(traverse_obj(result, {
1110 'user': ('core', 'user_results', 'result', 'legacy'),
1111 'card': ('card', 'legacy'),
1112 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1113 }, expected_type=dict, default={}))
1114
1115 # extra transformation is needed since result does not match legacy format
1116 binding_values = {
1117 binding_value.get('key'): binding_value.get('value')
1118 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1119 }
1120 if binding_values:
1121 status['card']['binding_values'] = binding_values
1122
1123 return status
1124
1125 def _build_graphql_query(self, media_id):
1126 return {
1127 'variables': {
1128 'focalTweetId': media_id,
1129 'includePromotedContent': True,
1130 'with_rux_injections': False,
1131 'withBirdwatchNotes': True,
1132 'withCommunity': True,
1133 'withDownvotePerspective': False,
1134 'withQuickPromoteEligibilityTweetFields': True,
1135 'withReactionsMetadata': False,
1136 'withReactionsPerspective': False,
1137 'withSuperFollowsTweetFields': True,
1138 'withSuperFollowsUserFields': True,
1139 'withV2Timeline': True,
1140 'withVoice': True,
1141 },
1142 'features': {
1143 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1144 'interactive_text_enabled': True,
1145 'responsive_web_edit_tweet_api_enabled': True,
1146 'responsive_web_enhance_cards_enabled': True,
1147 'responsive_web_graphql_timeline_navigation_enabled': False,
1148 'responsive_web_text_conversations_enabled': False,
1149 'responsive_web_uc_gql_enabled': True,
1150 'standardized_nudges_misinfo': True,
1151 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1152 'tweetypie_unmention_optimization_enabled': True,
1153 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1154 'verified_phone_label_enabled': False,
1155 'vibe_api_enabled': True,
1156 },
1157 } if self.is_logged_in else {
1158 'variables': {
1159 'tweetId': media_id,
1160 'withCommunity': False,
1161 'includePromotedContent': False,
1162 'withVoice': False,
1163 },
1164 'features': {
1165 'creator_subscriptions_tweet_preview_api_enabled': True,
1166 'tweetypie_unmention_optimization_enabled': True,
1167 'responsive_web_edit_tweet_api_enabled': True,
1168 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1169 'view_counts_everywhere_api_enabled': True,
1170 'longform_notetweets_consumption_enabled': True,
1171 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1172 'tweet_awards_web_tipping_enabled': False,
1173 'freedom_of_speech_not_reach_fetch_enabled': True,
1174 'standardized_nudges_misinfo': True,
1175 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1176 'longform_notetweets_rich_text_read_enabled': True,
1177 'longform_notetweets_inline_media_enabled': True,
1178 'responsive_web_graphql_exclude_directive_enabled': True,
1179 'verified_phone_label_enabled': False,
1180 'responsive_web_media_download_video_enabled': False,
1181 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1182 'responsive_web_graphql_timeline_navigation_enabled': True,
1183 'responsive_web_enhance_cards_enabled': False
1184 },
1185 'fieldToggles': {
1186 'withArticleRichContentState': False
1187 }
1188 }
1189
1190 def _real_extract(self, url):
1191 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1192 if not self.is_logged_in and self._configuration_arg('legacy_api'):
1193 status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
1194 'cards_platform': 'Web-12',
1195 'include_cards': 1,
1196 'include_reply_count': 1,
1197 'include_user_entities': 0,
1198 'tweet_mode': 'extended',
1199 }), 'retweeted_status', None)
1200 elif not self.is_logged_in:
1201 status = self._graphql_to_legacy(
1202 self._call_graphql_api('2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId', twid), twid)
1203 else:
1204 status = self._graphql_to_legacy(
1205 self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
1206
1207 title = description = traverse_obj(
1208 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1209 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1210 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1211 user = status.get('user') or {}
1212 uploader = user.get('name')
1213 if uploader:
1214 title = f'{uploader} - {title}'
1215 uploader_id = user.get('screen_name')
1216
1217 info = {
1218 'id': twid,
1219 'title': title,
1220 'description': description,
1221 'uploader': uploader,
1222 'timestamp': unified_timestamp(status.get('created_at')),
1223 'uploader_id': uploader_id,
1224 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1225 'like_count': int_or_none(status.get('favorite_count')),
1226 'repost_count': int_or_none(status.get('retweet_count')),
1227 'comment_count': int_or_none(status.get('reply_count')),
1228 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1229 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1230 }
1231
1232 def extract_from_video_info(media):
1233 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1234 self.write_debug(f'Extracting from video info: {media_id}')
1235
1236 formats = []
1237 subtitles = {}
1238 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1239 fmts, subs = self._extract_variant_formats(variant, twid)
1240 subtitles = self._merge_subtitles(subtitles, subs)
1241 formats.extend(fmts)
1242
1243 thumbnails = []
1244 media_url = media.get('media_url_https') or media.get('media_url')
1245 if media_url:
1246 def add_thumbnail(name, size):
1247 thumbnails.append({
1248 'id': name,
1249 'url': update_url_query(media_url, {'name': name}),
1250 'width': int_or_none(size.get('w') or size.get('width')),
1251 'height': int_or_none(size.get('h') or size.get('height')),
1252 })
1253 for name, size in media.get('sizes', {}).items():
1254 add_thumbnail(name, size)
1255 add_thumbnail('orig', media.get('original_info') or {})
1256
1257 return {
1258 'id': media_id,
1259 'formats': formats,
1260 'subtitles': subtitles,
1261 'thumbnails': thumbnails,
1262 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1263 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1264 # The codec of http formats are unknown
1265 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1266 }
1267
1268 def extract_from_card_info(card):
1269 if not card:
1270 return
1271
1272 self.write_debug(f'Extracting from card info: {card.get("url")}')
1273 binding_values = card['binding_values']
1274
1275 def get_binding_value(k):
1276 o = binding_values.get(k) or {}
1277 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1278
1279 card_name = card['name'].split(':')[-1]
1280 if card_name == 'player':
1281 yield {
1282 '_type': 'url',
1283 'url': get_binding_value('player_url'),
1284 }
1285 elif card_name == 'periscope_broadcast':
1286 yield {
1287 '_type': 'url',
1288 'url': get_binding_value('url') or get_binding_value('player_url'),
1289 'ie_key': PeriscopeIE.ie_key(),
1290 }
1291 elif card_name == 'broadcast':
1292 yield {
1293 '_type': 'url',
1294 'url': get_binding_value('broadcast_url'),
1295 'ie_key': TwitterBroadcastIE.ie_key(),
1296 }
1297 elif card_name == 'audiospace':
1298 yield {
1299 '_type': 'url',
1300 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1301 'ie_key': TwitterSpacesIE.ie_key(),
1302 }
1303 elif card_name == 'summary':
1304 yield {
1305 '_type': 'url',
1306 'url': get_binding_value('card_url'),
1307 }
1308 elif card_name == 'unified_card':
1309 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1310 yield from map(extract_from_video_info, traverse_obj(
1311 unified_card, ('media_entities', ...), expected_type=dict))
1312 # amplify, promo_video_website, promo_video_convo, appplayer,
1313 # video_direct_message, poll2choice_video, poll3choice_video,
1314 # poll4choice_video, ...
1315 else:
1316 is_amplify = card_name == 'amplify'
1317 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1318 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1319 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1320
1321 thumbnails = []
1322 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1323 image = get_binding_value('player_image' + suffix) or {}
1324 image_url = image.get('url')
1325 if not image_url or '/player-placeholder' in image_url:
1326 continue
1327 thumbnails.append({
1328 'id': suffix[1:] if suffix else 'medium',
1329 'url': image_url,
1330 'width': int_or_none(image.get('width')),
1331 'height': int_or_none(image.get('height')),
1332 })
1333
1334 yield {
1335 'formats': formats,
1336 'subtitles': subtitles,
1337 'thumbnails': thumbnails,
1338 'duration': int_or_none(get_binding_value(
1339 'content_duration_seconds')),
1340 }
1341
1342 videos = traverse_obj(status, (
1343 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1344
1345 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1346 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1347 else:
1348 desired_obj = traverse_obj(status, (
1349 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1350 if not desired_obj:
1351 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1352 elif desired_obj.get('type') != 'video':
1353 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1354
1355 # Restore original archive id and video index in title
1356 for index, entry in enumerate(videos, 1):
1357 if entry.get('id') != desired_obj.get('id'):
1358 continue
1359 if index == 1:
1360 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1361 if len(videos) != 1:
1362 info['title'] += f' #{index}'
1363 break
1364
1365 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1366
1367 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1368 if not entries:
1369 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1370 if not expanded_url or expanded_url == url:
1371 self.raise_no_formats('No video could be found in this tweet', expected=True)
1372 return info
1373
1374 return self.url_result(expanded_url, display_id=twid, **info)
1375
1376 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1377
1378 if len(entries) == 1:
1379 return entries[0]
1380
1381 for index, entry in enumerate(entries, 1):
1382 entry['title'] += f' #{index}'
1383
1384 return self.playlist_result(entries, **info)
1385
1386
1387class TwitterAmplifyIE(TwitterBaseIE):
1388 IE_NAME = 'twitter:amplify'
1389 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1390
1391 _TEST = {
1392 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1393 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1394 'info_dict': {
1395 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1396 'ext': 'mp4',
1397 'title': 'Twitter Video',
1398 'thumbnail': 're:^https?://.*',
1399 },
1400 'params': {'format': '[protocol=https]'},
1401 }
1402
1403 def _real_extract(self, url):
1404 video_id = self._match_id(url)
1405 webpage = self._download_webpage(url, video_id)
1406
1407 vmap_url = self._html_search_meta(
1408 'twitter:amplify:vmap', webpage, 'vmap url')
1409 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1410
1411 thumbnails = []
1412 thumbnail = self._html_search_meta(
1413 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1414
1415 def _find_dimension(target):
1416 w = int_or_none(self._html_search_meta(
1417 'twitter:%s:width' % target, webpage, fatal=False))
1418 h = int_or_none(self._html_search_meta(
1419 'twitter:%s:height' % target, webpage, fatal=False))
1420 return w, h
1421
1422 if thumbnail:
1423 thumbnail_w, thumbnail_h = _find_dimension('image')
1424 thumbnails.append({
1425 'url': thumbnail,
1426 'width': thumbnail_w,
1427 'height': thumbnail_h,
1428 })
1429
1430 video_w, video_h = _find_dimension('player')
1431 formats[0].update({
1432 'width': video_w,
1433 'height': video_h,
1434 })
1435
1436 return {
1437 'id': video_id,
1438 'title': 'Twitter Video',
1439 'formats': formats,
1440 'thumbnails': thumbnails,
1441 }
1442
1443
1444class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1445 IE_NAME = 'twitter:broadcast'
1446 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1447
1448 _TEST = {
1449 # untitled Periscope video
1450 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1451 'info_dict': {
1452 'id': '1yNGaQLWpejGj',
1453 'ext': 'mp4',
1454 'title': 'Andrea May Sahouri - Periscope Broadcast',
1455 'uploader': 'Andrea May Sahouri',
1456 'uploader_id': '1PXEdBZWpGwKe',
1457 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1458 'view_count': int,
1459 },
1460 }
1461
1462 def _real_extract(self, url):
1463 broadcast_id = self._match_id(url)
1464 broadcast = self._call_api(
1465 'broadcasts/show.json', broadcast_id,
1466 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1467 info = self._parse_broadcast_data(broadcast, broadcast_id)
1468 media_key = broadcast['media_key']
1469 source = self._call_api(
1470 f'live_video_stream/status/{media_key}', media_key)['source']
1471 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1472 if '/live_video_stream/geoblocked/' in m3u8_url:
1473 self.raise_geo_restricted()
1474 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1475 m3u8_url).query).get('type', [None])[0]
1476 state, width, height = self._extract_common_format_info(broadcast)
1477 info['formats'] = self._extract_pscp_m3u8_formats(
1478 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1479 return info
1480
1481
1482class TwitterSpacesIE(TwitterBaseIE):
1483 IE_NAME = 'twitter:spaces'
1484 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1485
1486 _TESTS = [{
1487 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1488 'info_dict': {
1489 'id': '1RDxlgyvNXzJL',
1490 'ext': 'm4a',
1491 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1492 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1493 'uploader': r're:Lucio Di Gaetano.*?',
1494 'uploader_id': 'luciodigaetano',
1495 'live_status': 'was_live',
1496 'timestamp': 1659877956,
1497 'upload_date': '20220807',
1498 'release_timestamp': 1659904215,
1499 'release_date': '20220807',
1500 },
1501 'params': {'skip_download': 'm3u8'},
1502 }, {
1503 # post_live/TimedOut but downloadable
1504 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1505 'info_dict': {
1506 'id': '1vAxRAVQWONJl',
1507 'ext': 'm4a',
1508 'title': 'Framing Up FinOps: Billing Tools',
1509 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1510 'uploader': 'Google Cloud',
1511 'uploader_id': 'googlecloud',
1512 'live_status': 'post_live',
1513 'timestamp': 1681409554,
1514 'upload_date': '20230413',
1515 'release_timestamp': 1681839000,
1516 'release_date': '20230418',
1517 },
1518 'params': {'skip_download': 'm3u8'},
1519 }, {
1520 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1521 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1522 'info_dict': {
1523 'id': '1eaKbrQbjoRKX',
1524 'ext': 'm4a',
1525 'title': 'あ',
1526 'description': 'Twitter Space participated by nobody yet',
1527 'uploader': '息根とめる🔪Twitchで復活',
1528 'uploader_id': 'tomeru_ikinone',
1529 'live_status': 'was_live',
1530 'timestamp': 1685617198,
1531 'upload_date': '20230601',
1532 },
1533 'params': {'skip_download': 'm3u8'},
1534 }]
1535
1536 SPACE_STATUS = {
1537 'notstarted': 'is_upcoming',
1538 'ended': 'was_live',
1539 'running': 'is_live',
1540 'timedout': 'post_live',
1541 }
1542
1543 def _build_graphql_query(self, space_id):
1544 return {
1545 'variables': {
1546 'id': space_id,
1547 'isMetatagsQuery': True,
1548 'withDownvotePerspective': False,
1549 'withReactionsMetadata': False,
1550 'withReactionsPerspective': False,
1551 'withReplays': True,
1552 'withSuperFollowsUserFields': True,
1553 'withSuperFollowsTweetFields': True,
1554 },
1555 'features': {
1556 'dont_mention_me_view_api_enabled': True,
1557 'interactive_text_enabled': True,
1558 'responsive_web_edit_tweet_api_enabled': True,
1559 'responsive_web_enhance_cards_enabled': True,
1560 'responsive_web_uc_gql_enabled': True,
1561 'spaces_2022_h2_clipping': True,
1562 'spaces_2022_h2_spaces_communities': False,
1563 'standardized_nudges_misinfo': True,
1564 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1565 'vibe_api_enabled': True,
1566 },
1567 }
1568
1569 def _real_extract(self, url):
1570 space_id = self._match_id(url)
1571 if not self.is_logged_in:
1572 self.raise_login_required('Twitter Spaces require authentication')
1573 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1574 if not space_data:
1575 raise ExtractorError('Twitter Space not found', expected=True)
1576
1577 metadata = space_data['metadata']
1578 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1579 is_live = live_status == 'is_live'
1580
1581 formats = []
1582 if live_status == 'is_upcoming':
1583 self.raise_no_formats('Twitter Space not started yet', expected=True)
1584 elif not is_live and not metadata.get('is_space_available_for_replay'):
1585 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1586 elif metadata.get('media_key'):
1587 source = traverse_obj(
1588 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1589 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1590 formats = self._extract_m3u8_formats( # XXX: Some Spaces need ffmpeg as downloader
1591 source, metadata['media_key'], 'm4a', entry_protocol='m3u8', live=is_live,
1592 headers={'Referer': 'https://twitter.com/'}, fatal=False) if source else []
1593 for fmt in formats:
1594 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1595 if not is_live:
1596 fmt['container'] = 'm4a_dash'
1597
1598 participants = ', '.join(traverse_obj(
1599 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1600
1601 if not formats and live_status == 'post_live':
1602 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1603
1604 return {
1605 'id': space_id,
1606 'title': metadata.get('title'),
1607 'description': f'Twitter Space participated by {participants}',
1608 'uploader': traverse_obj(
1609 metadata, ('creator_results', 'result', 'legacy', 'name')),
1610 'uploader_id': traverse_obj(
1611 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1612 'live_status': live_status,
1613 'release_timestamp': try_call(
1614 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1615 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1616 'formats': formats,
1617 }
1618
1619
1620class TwitterShortenerIE(TwitterBaseIE):
1621 IE_NAME = 'twitter:shortener'
1622 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1623 _BASE_URL = 'https://t.co/'
1624
1625 def _real_extract(self, url):
1626 mobj = self._match_valid_url(url)
1627 eid, id = mobj.group('eid', 'id')
1628 if eid:
1629 id = eid
1630 url = self._BASE_URL + id
1631 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).url
1632 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1633 if new_url.startswith(__UNSAFE_LINK):
1634 new_url = new_url.replace(__UNSAFE_LINK, "")
1635 return self.url_result(new_url)