]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[extractor/twitter] Fix unauthenticated extraction (#7476)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
7a26ce26 1import json
23e7cba8 2import re
49296437 3import urllib.error
23e7cba8
S
4
5from .common import InfoExtractor
13b2ae29 6from .periscope import PeriscopeBaseIE, PeriscopeIE
18ca61c5 7from ..compat import (
18ca61c5
RA
8 compat_parse_qs,
9 compat_urllib_parse_unquote,
10 compat_urllib_parse_urlparse,
11)
23e7cba8 12from ..utils import (
2edfd745 13 ExtractorError,
13b2ae29 14 dict_get,
23e7cba8 15 float_or_none,
13b2ae29 16 format_field,
cf5881fc 17 int_or_none,
13b2ae29 18 make_archive_id,
147e62fc 19 remove_end,
13b2ae29
SS
20 str_or_none,
21 strip_or_none,
f1150b9e 22 traverse_obj,
7a26ce26 23 try_call,
2edfd745 24 try_get,
18ca61c5
RA
25 unified_timestamp,
26 update_url_query,
41d1cca3 27 url_or_none,
2edfd745 28 xpath_text,
23e7cba8
S
29)
30
31
445d72b8 32class TwitterBaseIE(InfoExtractor):
d1795f4a 33 _NETRC_MACHINE = 'twitter'
18ca61c5 34 _API_BASE = 'https://api.twitter.com/1.1/'
7a26ce26 35 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
82fb2357 36 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
147e62fc 37 _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
d1795f4a 38 _flow_token = None
39
40 _LOGIN_INIT_DATA = json.dumps({
41 'input_flow_data': {
42 'flow_context': {
43 'debug_overrides': {},
44 'start_location': {
45 'location': 'unknown'
46 }
47 }
48 },
49 'subtask_versions': {
50 'action_list': 2,
51 'alert_dialog': 1,
52 'app_download_cta': 1,
53 'check_logged_in_account': 1,
54 'choice_selection': 3,
55 'contacts_live_sync_permission_prompt': 0,
56 'cta': 7,
57 'email_verification': 2,
58 'end_flow': 1,
59 'enter_date': 1,
60 'enter_email': 2,
61 'enter_password': 5,
62 'enter_phone': 2,
63 'enter_recaptcha': 1,
64 'enter_text': 5,
65 'enter_username': 2,
66 'generic_urt': 3,
67 'in_app_notification': 1,
68 'interest_picker': 3,
69 'js_instrumentation': 1,
70 'menu_dialog': 1,
71 'notifications_permission_prompt': 2,
72 'open_account': 2,
73 'open_home_timeline': 1,
74 'open_link': 1,
75 'phone_verification': 4,
76 'privacy_options': 1,
77 'security_key': 3,
78 'select_avatar': 4,
79 'select_banner': 2,
80 'settings_list': 7,
81 'show_code': 1,
82 'sign_up': 2,
83 'sign_up_review': 4,
84 'tweet_selection_urt': 1,
85 'update_users': 1,
86 'upload_media': 1,
87 'user_recommendations_list': 4,
88 'user_recommendations_urt': 1,
89 'wait_spinner': 3,
90 'web_modal': 1
91 }
92 }, separators=(',', ':')).encode()
18ca61c5
RA
93
94 def _extract_variant_formats(self, variant, video_id):
95 variant_url = variant.get('url')
96 if not variant_url:
4bed4363 97 return [], {}
18ca61c5 98 elif '.m3u8' in variant_url:
4bed4363 99 return self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
100 variant_url, video_id, 'mp4', 'm3u8_native',
101 m3u8_id='hls', fatal=False)
102 else:
103 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
104 f = {
105 'url': variant_url,
106 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
107 'tbr': tbr,
108 }
109 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 110 return [f], {}
18ca61c5 111
9be31e77 112 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 113 vmap_url = url_or_none(vmap_url)
114 if not vmap_url:
f1150b9e 115 return [], {}
445d72b8 116 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 117 formats = []
4bed4363 118 subtitles = {}
18ca61c5
RA
119 urls = []
120 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
121 video_variant.attrib['url'] = compat_urllib_parse_unquote(
122 video_variant.attrib['url'])
123 urls.append(video_variant.attrib['url'])
4bed4363
F
124 fmts, subs = self._extract_variant_formats(
125 video_variant.attrib, video_id)
126 formats.extend(fmts)
127 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
128 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
129 if video_url not in urls:
4bed4363
F
130 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
131 formats.extend(fmts)
132 subtitles = self._merge_subtitles(subtitles, subs)
133 return formats, subtitles
445d72b8 134
2edfd745
YCH
135 @staticmethod
136 def _search_dimensions_in_video_url(a_format, video_url):
137 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
138 if m:
139 a_format.update({
140 'width': int(m.group('width')),
141 'height': int(m.group('height')),
142 })
143
d1795f4a 144 @property
7a26ce26
SS
145 def is_logged_in(self):
146 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
147
d1795f4a 148 def _set_base_headers(self):
147e62fc 149 headers = self._AUTH.copy()
d1795f4a 150 csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
151 if csrf_token:
152 headers['x-csrf-token'] = csrf_token
153 return headers
154
155 def _call_login_api(self, note, headers, query={}, data=None):
156 response = self._download_json(
157 f'{self._API_BASE}onboarding/task.json', None, note,
158 headers=headers, query=query, data=data, expected_status=400)
159 error = traverse_obj(response, ('errors', 0, 'message', {str}))
160 if error:
161 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
162 elif traverse_obj(response, 'status') != 'success':
163 raise ExtractorError('Login was unsuccessful')
164
165 subtask = traverse_obj(
166 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
167 if not subtask:
168 raise ExtractorError('Twitter API did not return next login subtask')
169
170 self._flow_token = response['flow_token']
7a26ce26 171
d1795f4a 172 return subtask
173
174 def _perform_login(self, username, password):
175 if self.is_logged_in:
176 return
177
49296437 178 webpage = self._download_webpage('https://twitter.com/', None, 'Downloading login page')
d1795f4a 179 headers = self._set_base_headers()
49296437 180 guest_token = self._search_regex(
181 r'\.cookie\s*=\s*["\']gt=(\d+);', webpage, 'gt', default=None) or self._download_json(
182 f'{self._API_BASE}guest/activate.json', None, 'Downloading guest token',
183 data=b'', headers=headers)['guest_token']
d1795f4a 184 headers.update({
185 'content-type': 'application/json',
49296437 186 'x-guest-token': guest_token,
d1795f4a 187 'x-twitter-client-language': 'en',
188 'x-twitter-active-user': 'yes',
189 'Referer': 'https://twitter.com/',
190 'Origin': 'https://twitter.com',
191 })
192
193 def build_login_json(*subtask_inputs):
194 return json.dumps({
195 'flow_token': self._flow_token,
196 'subtask_inputs': subtask_inputs
197 }, separators=(',', ':')).encode()
198
199 def input_dict(subtask_id, text):
200 return {
201 'subtask_id': subtask_id,
202 'enter_text': {
203 'text': text,
204 'link': 'next_link'
205 }
206 }
7a26ce26 207
d1795f4a 208 next_subtask = self._call_login_api(
209 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
210
211 while not self.is_logged_in:
212 if next_subtask == 'LoginJsInstrumentationSubtask':
213 next_subtask = self._call_login_api(
214 'Submitting JS instrumentation response', headers, data=build_login_json({
215 'subtask_id': next_subtask,
216 'js_instrumentation': {
217 'response': '{}',
218 'link': 'next_link'
219 }
220 }))
221
222 elif next_subtask == 'LoginEnterUserIdentifierSSO':
223 next_subtask = self._call_login_api(
224 'Submitting username', headers, data=build_login_json({
225 'subtask_id': next_subtask,
226 'settings_list': {
227 'setting_responses': [{
228 'key': 'user_identifier',
229 'response_data': {
230 'text_data': {
231 'result': username
232 }
233 }
234 }],
235 'link': 'next_link'
236 }
237 }))
238
239 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
240 next_subtask = self._call_login_api(
241 'Submitting alternate identifier', headers,
242 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
243 'one of username, phone number or email that was not used as --username'))))
244
245 elif next_subtask == 'LoginEnterPassword':
246 next_subtask = self._call_login_api(
247 'Submitting password', headers, data=build_login_json({
248 'subtask_id': next_subtask,
249 'enter_password': {
250 'password': password,
251 'link': 'next_link'
252 }
253 }))
254
255 elif next_subtask == 'AccountDuplicationCheck':
256 next_subtask = self._call_login_api(
257 'Submitting account duplication check', headers, data=build_login_json({
258 'subtask_id': next_subtask,
259 'check_logged_in_account': {
260 'link': 'AccountDuplicationCheck_false'
261 }
262 }))
263
264 elif next_subtask == 'LoginTwoFactorAuthChallenge':
265 next_subtask = self._call_login_api(
266 'Submitting 2FA token', headers, data=build_login_json(input_dict(
267 next_subtask, self._get_tfa_info('two-factor authentication token'))))
268
269 elif next_subtask == 'LoginAcid':
270 next_subtask = self._call_login_api(
271 'Submitting confirmation code', headers, data=build_login_json(input_dict(
272 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
273
274 elif next_subtask == 'LoginSuccessSubtask':
275 raise ExtractorError('Twitter API did not grant auth token cookie')
276
277 else:
278 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
279
280 self.report_login()
281
282 def _call_api(self, path, video_id, query={}, graphql=False):
49296437 283 if not self.is_logged_in:
284 self.raise_login_required()
285
286 result = self._download_json(
287 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path, video_id,
288 f'Downloading {"GraphQL" if graphql else "legacy API"} JSON', headers={
289 **self._set_base_headers(),
7a26ce26
SS
290 'x-twitter-auth-type': 'OAuth2Session',
291 'x-twitter-client-language': 'en',
292 'x-twitter-active-user': 'yes',
49296437 293 }, query=query, expected_status={400, 401, 403, 404} if graphql else {403})
7a26ce26 294
49296437 295 if result.get('errors'):
296 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
297 raise ExtractorError(
298 f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
7a26ce26 299
49296437 300 return result
7a26ce26
SS
301
302 def _build_graphql_query(self, media_id):
303 raise NotImplementedError('Method must be implemented to support GraphQL')
304
305 def _call_graphql_api(self, endpoint, media_id):
306 data = self._build_graphql_query(media_id)
307 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
308 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
309
310
311class TwitterCardIE(InfoExtractor):
014e8803 312 IE_NAME = 'twitter:card'
18ca61c5 313 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 314 _TESTS = [
315 {
316 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 317 # MD5 checksums are different in different places
c3dea3f8 318 'info_dict': {
7a26ce26 319 'id': '560070131976392705',
c3dea3f8 320 'ext': 'mp4',
18ca61c5
RA
321 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
322 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
323 'uploader': 'Twitter',
324 'uploader_id': 'Twitter',
325 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 326 'duration': 30.033,
18ca61c5
RA
327 'timestamp': 1422366112,
328 'upload_date': '20150127',
7a26ce26
SS
329 'age_limit': 0,
330 'comment_count': int,
331 'tags': [],
332 'repost_count': int,
333 'like_count': int,
334 'display_id': '560070183650213889',
335 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 336 },
23e7cba8 337 },
c3dea3f8 338 {
339 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 340 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 341 'info_dict': {
342 'id': '623160978427936768',
343 'ext': 'mp4',
18ca61c5
RA
344 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
345 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
346 'uploader': 'NASA',
347 'uploader_id': 'NASA',
348 'timestamp': 1437408129,
349 'upload_date': '20150720',
7a26ce26
SS
350 'uploader_url': 'https://twitter.com/NASA',
351 'age_limit': 0,
352 'comment_count': int,
353 'like_count': int,
354 'repost_count': int,
355 'tags': ['PlutoFlyby'],
c3dea3f8 356 },
7a26ce26 357 'params': {'format': '[protocol=https]'}
4a7b7903
YCH
358 },
359 {
360 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 361 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
362 'info_dict': {
363 'id': 'dq4Oj5quskI',
364 'ext': 'mp4',
365 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 366 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 367 'upload_date': '20111013',
18ca61c5 368 'uploader': 'OMG! UBUNTU!',
4a7b7903 369 'uploader_id': 'omgubuntu',
7a26ce26
SS
370 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
371 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
372 'channel_follower_count': int,
373 'chapters': 'count:8',
374 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
375 'duration': 138,
376 'categories': ['Film & Animation'],
377 'age_limit': 0,
378 'comment_count': int,
379 'availability': 'public',
380 'like_count': int,
381 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
382 'view_count': int,
383 'tags': 'count:12',
384 'channel': 'OMG! UBUNTU!',
385 'playable_in_embed': True,
4a7b7903 386 },
31752f76 387 'add_ie': ['Youtube'],
5f1b2aea
YCH
388 },
389 {
390 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
391 'info_dict': {
392 'id': 'iBb2x00UVlv',
393 'ext': 'mp4',
394 'upload_date': '20151113',
395 'uploader_id': '1189339351084113920',
acb6e97e
YCH
396 'uploader': 'ArsenalTerje',
397 'title': 'Vine by ArsenalTerje',
e8f20ffa 398 'timestamp': 1447451307,
7a26ce26
SS
399 'alt_title': 'Vine by ArsenalTerje',
400 'comment_count': int,
401 'like_count': int,
402 'thumbnail': r're:^https?://[^?#]+\.jpg',
403 'view_count': int,
404 'repost_count': int,
5f1b2aea
YCH
405 },
406 'add_ie': ['Vine'],
7a26ce26
SS
407 'params': {'skip_download': 'm3u8'},
408 },
409 {
0ae937a7 410 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 411 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
412 'info_dict': {
413 'id': '705235433198714880',
414 'ext': 'mp4',
18ca61c5
RA
415 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
416 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
417 'uploader': 'Brent Yarina',
418 'uploader_id': 'BTNBrentYarina',
419 'timestamp': 1456976204,
420 'upload_date': '20160303',
0ae937a7 421 },
18ca61c5 422 'skip': 'This content is no longer available.',
7a26ce26
SS
423 },
424 {
748a462f
S
425 'url': 'https://twitter.com/i/videos/752274308186120192',
426 'only_matching': True,
0ae937a7 427 },
c3dea3f8 428 ]
23e7cba8
S
429
430 def _real_extract(self, url):
18ca61c5
RA
431 status_id = self._match_id(url)
432 return self.url_result(
433 'https://twitter.com/statuses/' + status_id,
434 TwitterIE.ie_key(), status_id)
c8398a9b 435
03879ff0 436
18ca61c5 437class TwitterIE(TwitterBaseIE):
014e8803 438 IE_NAME = 'twitter'
b6795fd3 439 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 440
cf5881fc 441 _TESTS = [{
49296437 442 # comment_count, repost_count, view_count are only available with auth (applies to all tests)
48aae2d2 443 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 444 'info_dict': {
13b2ae29
SS
445 'id': '643211870443208704',
446 'display_id': '643211948184596480',
f57f84f6 447 'ext': 'mp4',
575036b4 448 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 449 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 450 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
48aae2d2
YCH
451 'uploader': 'FREE THE NIPPLE',
452 'uploader_id': 'freethenipple',
3b65a6fb 453 'duration': 12.922,
18ca61c5
RA
454 'timestamp': 1442188653,
455 'upload_date': '20150913',
13b2ae29 456 'uploader_url': 'https://twitter.com/freethenipple',
13b2ae29
SS
457 'like_count': int,
458 'tags': [],
459 'age_limit': 18,
f57f84f6 460 },
cf5881fc
YCH
461 }, {
462 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
463 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
464 'info_dict': {
465 'id': '657991469417025536',
466 'ext': 'mp4',
467 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
468 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 469 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
470 'uploader': 'Gifs',
471 'uploader_id': 'giphz',
472 },
7efc1c2b 473 'expected_warnings': ['height', 'width'],
fc0a45fa 474 'skip': 'Account suspended',
b703ebee
JMF
475 }, {
476 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
477 'info_dict': {
478 'id': '665052190608723968',
13b2ae29 479 'display_id': '665052190608723968',
b703ebee 480 'ext': 'mp4',
b6795fd3 481 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 482 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
b703ebee 483 'uploader_id': 'starwars',
7a26ce26 484 'uploader': r're:Star Wars.*',
18ca61c5
RA
485 'timestamp': 1447395772,
486 'upload_date': '20151113',
13b2ae29 487 'uploader_url': 'https://twitter.com/starwars',
13b2ae29
SS
488 'like_count': int,
489 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
490 'age_limit': 0,
b703ebee 491 },
0ae937a7
YCH
492 }, {
493 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
494 'info_dict': {
495 'id': '705235433198714880',
496 'ext': 'mp4',
18ca61c5
RA
497 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
498 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
499 'uploader_id': 'BTNBrentYarina',
500 'uploader': 'Brent Yarina',
18ca61c5
RA
501 'timestamp': 1456976204,
502 'upload_date': '20160303',
13b2ae29
SS
503 'uploader_url': 'https://twitter.com/BTNBrentYarina',
504 'comment_count': int,
505 'repost_count': int,
506 'like_count': int,
507 'tags': [],
508 'age_limit': 0,
0ae937a7
YCH
509 },
510 'params': {
511 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
512 # Test case of TwitterCardIE
513 'skip_download': True,
514 },
352e7d98 515 'skip': 'Dead external link',
03879ff0
YCH
516 }, {
517 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 518 'info_dict': {
13b2ae29
SS
519 'id': '700207414000242688',
520 'display_id': '700207533655363584',
03879ff0 521 'ext': 'mp4',
13b2ae29 522 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 523 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 524 'thumbnail': r're:^https?://.*\.jpg',
13b2ae29
SS
525 'uploader': 'jaydin donte geer',
526 'uploader_id': 'jaydingeer',
3b65a6fb 527 'duration': 30.0,
18ca61c5
RA
528 'timestamp': 1455777459,
529 'upload_date': '20160218',
13b2ae29 530 'uploader_url': 'https://twitter.com/jaydingeer',
13b2ae29
SS
531 'like_count': int,
532 'tags': ['Damndaniel'],
533 'age_limit': 0,
03879ff0 534 },
395fd4b0
YCH
535 }, {
536 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
537 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
538 'info_dict': {
539 'id': 'MIOxnrUteUd',
540 'ext': 'mp4',
18ca61c5
RA
541 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
542 'uploader': 'TAKUMA',
543 'uploader_id': '1004126642786242560',
3615bfe1 544 'timestamp': 1402826626,
395fd4b0 545 'upload_date': '20140615',
13b2ae29
SS
546 'thumbnail': r're:^https?://.*\.jpg',
547 'alt_title': 'Vine by TAKUMA',
548 'comment_count': int,
549 'repost_count': int,
550 'like_count': int,
551 'view_count': int,
395fd4b0
YCH
552 },
553 'add_ie': ['Vine'],
36b7d9db
YCH
554 }, {
555 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 556 'info_dict': {
13b2ae29
SS
557 'id': '717462543795523584',
558 'display_id': '719944021058060289',
36b7d9db
YCH
559 'ext': 'mp4',
560 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5
RA
561 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
562 'uploader_id': 'CaptainAmerica',
36b7d9db 563 'uploader': 'Captain America',
3b65a6fb 564 'duration': 3.17,
18ca61c5
RA
565 'timestamp': 1460483005,
566 'upload_date': '20160412',
13b2ae29
SS
567 'uploader_url': 'https://twitter.com/CaptainAmerica',
568 'thumbnail': r're:^https?://.*\.jpg',
13b2ae29
SS
569 'like_count': int,
570 'tags': [],
571 'age_limit': 0,
36b7d9db 572 },
f0bc5a86
YCH
573 }, {
574 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
575 'info_dict': {
576 'id': '1zqKVVlkqLaKB',
577 'ext': 'mp4',
18ca61c5 578 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 579 'upload_date': '20160923',
18ca61c5
RA
580 'uploader_id': '1PmKqpJdOJQoY',
581 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 582 'timestamp': 1474613214,
13b2ae29 583 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
584 },
585 'add_ie': ['Periscope'],
2edfd745
YCH
586 }, {
587 # has mp4 formats via mobile API
588 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
589 'info_dict': {
590 'id': '852138619213144067',
591 'ext': 'mp4',
592 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 593 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
2edfd745
YCH
594 'uploader': 'عالم الأخبار',
595 'uploader_id': 'news_al3alm',
3b65a6fb 596 'duration': 277.4,
18ca61c5
RA
597 'timestamp': 1492000653,
598 'upload_date': '20170412',
2edfd745 599 },
00dd0cd5 600 'skip': 'Account suspended',
5c1452e8
GF
601 }, {
602 'url': 'https://twitter.com/i/web/status/910031516746514432',
603 'info_dict': {
13b2ae29
SS
604 'id': '910030238373089285',
605 'display_id': '910031516746514432',
5c1452e8
GF
606 'ext': 'mp4',
607 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
608 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 609 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
5c1452e8
GF
610 'uploader': 'Préfet de Guadeloupe',
611 'uploader_id': 'Prefet971',
612 'duration': 47.48,
18ca61c5
RA
613 'timestamp': 1505803395,
614 'upload_date': '20170919',
13b2ae29 615 'uploader_url': 'https://twitter.com/Prefet971',
13b2ae29
SS
616 'like_count': int,
617 'tags': ['Maria'],
618 'age_limit': 0,
5c1452e8
GF
619 },
620 'params': {
621 'skip_download': True, # requires ffmpeg
622 },
2593725a
S
623 }, {
624 # card via api.twitter.com/1.1/videos/tweet/config
625 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
626 'info_dict': {
13b2ae29
SS
627 'id': '1001551417340022785',
628 'display_id': '1001551623938805763',
2593725a
S
629 'ext': 'mp4',
630 'title': 're:.*?Shep is on a roll today.*?',
631 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 632 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
2593725a
S
633 'uploader': 'Lis Power',
634 'uploader_id': 'LisPower1',
635 'duration': 111.278,
18ca61c5
RA
636 'timestamp': 1527623489,
637 'upload_date': '20180529',
13b2ae29 638 'uploader_url': 'https://twitter.com/LisPower1',
13b2ae29
SS
639 'like_count': int,
640 'tags': [],
641 'age_limit': 0,
2593725a
S
642 },
643 'params': {
644 'skip_download': True, # requires ffmpeg
645 },
b7ef93f0
S
646 }, {
647 'url': 'https://twitter.com/foobar/status/1087791357756956680',
648 'info_dict': {
13b2ae29
SS
649 'id': '1087791272830607360',
650 'display_id': '1087791357756956680',
b7ef93f0
S
651 'ext': 'mp4',
652 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
653 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 654 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
b7ef93f0
S
655 'uploader': 'Twitter',
656 'uploader_id': 'Twitter',
657 'duration': 61.567,
18ca61c5
RA
658 'timestamp': 1548184644,
659 'upload_date': '20190122',
13b2ae29 660 'uploader_url': 'https://twitter.com/Twitter',
13b2ae29
SS
661 'like_count': int,
662 'tags': [],
663 'age_limit': 0,
18ca61c5
RA
664 },
665 }, {
666 # not available in Periscope
667 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
668 'info_dict': {
669 'id': '1vOGwqejwoWxB',
670 'ext': 'mp4',
671 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
672 'uploader': 'Vivi',
673 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
674 'thumbnail': r're:^https?://.*\.jpg',
675 'tags': ['EduTECH2019'],
676 'view_count': int,
b7ef93f0 677 },
18ca61c5 678 'add_ie': ['TwitterBroadcast'],
49296437 679 'skip': 'Requires authentication',
30a074c2 680 }, {
681 # unified card
682 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
683 'info_dict': {
13b2ae29
SS
684 'id': '1349774757969989634',
685 'display_id': '1349794411333394432',
30a074c2 686 'ext': 'mp4',
687 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
688 'thumbnail': r're:^https?://.*\.jpg',
689 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
690 'uploader': 'Brooklyn Nets',
691 'uploader_id': 'BrooklynNets',
692 'duration': 324.484,
693 'timestamp': 1610651040,
694 'upload_date': '20210114',
13b2ae29 695 'uploader_url': 'https://twitter.com/BrooklynNets',
13b2ae29
SS
696 'like_count': int,
697 'tags': [],
698 'age_limit': 0,
30a074c2 699 },
700 'params': {
701 'skip_download': True,
702 },
13b2ae29
SS
703 }, {
704 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
705 'info_dict': {
706 'id': '1577855447914409984',
707 'display_id': '1577855540407197696',
708 'ext': 'mp4',
352e7d98 709 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
710 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 711 'upload_date': '20221006',
352e7d98 712 'uploader': 'oshtru',
13b2ae29
SS
713 'uploader_id': 'oshtru',
714 'uploader_url': 'https://twitter.com/oshtru',
715 'thumbnail': r're:^https?://.*\.jpg',
716 'duration': 30.03,
7a26ce26 717 'timestamp': 1665025050,
13b2ae29
SS
718 'like_count': int,
719 'tags': [],
720 'age_limit': 0,
721 },
722 'params': {'skip_download': True},
723 }, {
724 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
725 'info_dict': {
726 'id': '1577719286659006464',
49296437 727 'title': 'Ultima📛 | #вʟм - Test',
13b2ae29 728 'description': 'Test https://t.co/Y3KEZD7Dad',
49296437 729 'uploader': 'Ultima📛 | #вʟм',
13b2ae29
SS
730 'uploader_id': 'UltimaShadowX',
731 'uploader_url': 'https://twitter.com/UltimaShadowX',
732 'upload_date': '20221005',
7a26ce26 733 'timestamp': 1664992565,
13b2ae29
SS
734 'like_count': int,
735 'tags': [],
736 'age_limit': 0,
737 },
738 'playlist_count': 4,
739 'params': {'skip_download': True},
7a26ce26
SS
740 }, {
741 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
742 'info_dict': {
743 'id': '1575559336759263233',
744 'display_id': '1575560063510810624',
745 'ext': 'mp4',
746 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
747 'thumbnail': r're:^https?://.*\.jpg',
748 'description': 'md5:95aea692fda36a12081b9629b02daa92',
749 'uploader': 'Max Olson',
750 'uploader_id': 'MesoMax919',
751 'uploader_url': 'https://twitter.com/MesoMax919',
752 'duration': 21.321,
753 'timestamp': 1664477766,
754 'upload_date': '20220929',
7a26ce26
SS
755 'like_count': int,
756 'tags': ['HurricaneIan'],
757 'age_limit': 0,
758 },
759 }, {
147e62fc 760 # Adult content, fails if not logged in (GraphQL)
7a26ce26
SS
761 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
762 'info_dict': {
763 'id': '1575199163847000068',
764 'display_id': '1575199173472927762',
765 'ext': 'mp4',
766 'title': str,
767 'description': str,
768 'uploader': str,
769 'uploader_id': 'Rizdraws',
770 'uploader_url': 'https://twitter.com/Rizdraws',
771 'upload_date': '20220928',
772 'timestamp': 1664391723,
16bed382 773 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
774 'like_count': int,
775 'repost_count': int,
776 'comment_count': int,
777 'age_limit': 18,
778 'tags': []
779 },
147e62fc 780 'skip': 'Requires authentication',
7a26ce26 781 }, {
49296437 782 # Single Vimeo video result without auth
783 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
784 'info_dict': {
785 'id': '551578322',
786 'ext': 'mp4',
787 'title': 'Dusty & The Mayor',
788 'uploader': 'Michael Chau',
789 'uploader_id': 'user29061007',
790 'uploader_url': 'https://vimeo.com/user29061007',
791 'duration': 478,
792 'thumbnail': 'https://i.vimeocdn.com/video/1139658575-0dfdce6e9a2401fe09feb24bf0d14e6f24a53c12f447ff688ace61009ad4c1ba-d_1280',
793 },
794 }, {
795 # Playlist result only with auth
7a26ce26
SS
796 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
797 'playlist_mincount': 2,
798 'info_dict': {
799 'id': '1395079556562706435',
800 'title': str,
801 'tags': [],
802 'uploader': str,
803 'like_count': int,
804 'upload_date': '20210519',
805 'age_limit': 0,
806 'repost_count': int,
147e62fc 807 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
7a26ce26
SS
808 'uploader_id': 'Srirachachau',
809 'comment_count': int,
810 'uploader_url': 'https://twitter.com/Srirachachau',
811 'timestamp': 1621447860,
812 },
49296437 813 'skip': 'Requires authentication',
7a26ce26 814 }, {
7a26ce26
SS
815 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
816 'playlist_mincount': 2,
817 'info_dict': {
818 'id': '1578353380363501568',
819 'title': str,
820 'uploader_id': 'DavidToons_',
821 'repost_count': int,
822 'like_count': int,
823 'uploader': str,
824 'timestamp': 1665143744,
825 'uploader_url': 'https://twitter.com/DavidToons_',
147e62fc 826 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
7a26ce26
SS
827 'tags': [],
828 'comment_count': int,
829 'upload_date': '20221007',
830 'age_limit': 0,
831 },
49296437 832 'skip': 'Requires authentication',
7a26ce26
SS
833 }, {
834 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
835 'playlist_count': 2,
836 'info_dict': {
837 'id': '1578401165338976258',
838 'title': str,
839 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
840 'uploader': str,
841 'uploader_id': 'primevideouk',
842 'timestamp': 1665155137,
843 'upload_date': '20221007',
844 'age_limit': 0,
845 'uploader_url': 'https://twitter.com/primevideouk',
7a26ce26
SS
846 'like_count': int,
847 'tags': ['TheRingsOfPower'],
848 },
849 }, {
850 # Twitter Spaces
851 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
852 'info_dict': {
853 'id': '1lPJqmBeeNAJb',
854 'ext': 'm4a',
855 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
856 'uploader': r're:Monique Camarra.+?',
857 'uploader_id': 'MoniqueCamarra',
858 'live_status': 'was_live',
1c16d9df 859 'release_timestamp': 1658417414,
1cffd621 860 'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
861 'timestamp': 1658407771,
862 'release_date': '20220721',
863 'upload_date': '20220721',
7a26ce26
SS
864 },
865 'add_ie': ['TwitterSpaces'],
866 'params': {'skip_download': 'm3u8'},
49296437 867 'skip': 'Requires authentication',
16bed382 868 }, {
869 # URL specifies video number but --yes-playlist
870 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
871 'playlist_mincount': 2,
872 'info_dict': {
873 'id': '1600649710662213632',
874 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
875 'timestamp': 1670459604.0,
876 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
16bed382 877 'uploader_id': 'CTVJLaidlaw',
16bed382 878 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
879 'upload_date': '20221208',
880 'age_limit': 0,
881 'uploader': 'Jocelyn Laidlaw',
882 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
883 'like_count': int,
884 },
885 }, {
886 # URL specifies video number and --no-playlist
887 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
888 'info_dict': {
889 'id': '1600649511827013632',
890 'ext': 'mp4',
147e62fc 891 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
16bed382 892 'thumbnail': r're:^https?://.+\.jpg',
893 'timestamp': 1670459604.0,
894 'uploader_id': 'CTVJLaidlaw',
895 'uploader': 'Jocelyn Laidlaw',
16bed382 896 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
897 'duration': 102.226,
898 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
899 'display_id': '1600649710662213632',
900 'like_count': int,
901 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
902 'upload_date': '20221208',
903 'age_limit': 0,
904 },
905 'params': {'noplaylist': True},
7543c9c9 906 }, {
907 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
908 # note the id different between extraction and url
909 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
910 'info_dict': {
911 'id': '1621117577354424321',
912 'display_id': '1621117700482416640',
913 'ext': 'mp4',
914 'title': '뽀 - 아 최우제 이동속도 봐',
915 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
916 'duration': 24.598,
917 'uploader': '뽀',
918 'uploader_id': 's2FAKER',
919 'uploader_url': 'https://twitter.com/s2FAKER',
920 'upload_date': '20230202',
921 'timestamp': 1675339553.0,
922 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
923 'age_limit': 18,
924 'tags': [],
925 'like_count': int,
7543c9c9 926 },
b6795fd3
SS
927 }, {
928 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
929 'info_dict': {
930 'id': '1599108643743473680',
931 'display_id': '1599108751385972737',
932 'ext': 'mp4',
933 'title': '\u06ea - \U0001F48B',
934 'uploader_url': 'https://twitter.com/hlo_again',
935 'like_count': int,
936 'uploader_id': 'hlo_again',
937 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
b6795fd3 938 'duration': 9.531,
b6795fd3
SS
939 'upload_date': '20221203',
940 'age_limit': 0,
941 'timestamp': 1670092210.0,
942 'tags': [],
943 'uploader': '\u06ea',
944 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
945 },
946 'params': {'noplaylist': True},
947 }, {
b6795fd3
SS
948 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
949 'info_dict': {
950 'id': '1600009362759733248',
951 'display_id': '1600009574919962625',
952 'ext': 'mp4',
953 'uploader_url': 'https://twitter.com/MunTheShinobi',
954 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
b6795fd3
SS
955 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
956 'age_limit': 0,
147e62fc 957 'uploader': 'Mün The Shinobi',
b6795fd3 958 'upload_date': '20221206',
147e62fc 959 'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
b6795fd3
SS
960 'like_count': int,
961 'tags': [],
962 'uploader_id': 'MunTheShinobi',
963 'duration': 139.987,
964 'timestamp': 1670306984.0,
965 },
cf605226 966 }, {
49296437 967 # url to retweet id
cf605226 968 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
969 'info_dict': {
970 'id': '1623274794488659969',
971 'display_id': '1623739803874349067',
972 'ext': 'mp4',
973 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
49296437 974 'description': 'md5:224d62f54b0cdef8e33d4c56c41ac503',
cf605226 975 'uploader': 'Johnny Bullets',
976 'uploader_id': 'Johnnybull3ts',
977 'uploader_url': 'https://twitter.com/Johnnybull3ts',
978 'age_limit': 0,
979 'tags': [],
980 'duration': 8.033,
981 'timestamp': 1675853859.0,
982 'upload_date': '20230208',
983 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
984 'like_count': int,
cf605226 985 },
82fb2357 986 }, {
987 # onion route
988 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
989 'only_matching': True,
18ca61c5
RA
990 }, {
991 # Twitch Clip Embed
992 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
993 'only_matching': True,
10a5091e
RA
994 }, {
995 # promo_video_website card
996 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
997 'only_matching': True,
00dd0cd5 998 }, {
999 # promo_video_convo card
1000 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1001 'only_matching': True,
1002 }, {
1003 # appplayer card
1004 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1005 'only_matching': True,
30a074c2 1006 }, {
1007 # video_direct_message card
1008 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1009 'only_matching': True,
1010 }, {
1011 # poll2choice_video card
1012 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1013 'only_matching': True,
1014 }, {
1015 # poll3choice_video card
1016 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1017 'only_matching': True,
1018 }, {
1019 # poll4choice_video card
1020 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1021 'only_matching': True,
cf5881fc 1022 }]
f57f84f6 1023
7a26ce26
SS
1024 def _graphql_to_legacy(self, data, twid):
1025 result = traverse_obj(data, (
1026 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1027 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
7543c9c9 1028 'tweet_results', 'result', ('tweet', None),
7a26ce26
SS
1029 ), expected_type=dict, default={}, get_all=False)
1030
147e62fc 1031 if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
7543c9c9 1032 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
1033
7a26ce26 1034 if 'tombstone' in result:
147e62fc 1035 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
7a26ce26
SS
1036 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1037
1038 status = result.get('legacy', {})
1039 status.update(traverse_obj(result, {
1040 'user': ('core', 'user_results', 'result', 'legacy'),
1041 'card': ('card', 'legacy'),
1042 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1043 }, expected_type=dict, default={}))
1044
1045 # extra transformation is needed since result does not match legacy format
1046 binding_values = {
1047 binding_value.get('key'): binding_value.get('value')
147e62fc 1048 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
7a26ce26
SS
1049 }
1050 if binding_values:
1051 status['card']['binding_values'] = binding_values
1052
1053 return status
1054
1055 def _build_graphql_query(self, media_id):
1056 return {
1057 'variables': {
1058 'focalTweetId': media_id,
1059 'includePromotedContent': True,
1060 'with_rux_injections': False,
1061 'withBirdwatchNotes': True,
1062 'withCommunity': True,
1063 'withDownvotePerspective': False,
1064 'withQuickPromoteEligibilityTweetFields': True,
1065 'withReactionsMetadata': False,
1066 'withReactionsPerspective': False,
1067 'withSuperFollowsTweetFields': True,
1068 'withSuperFollowsUserFields': True,
1069 'withV2Timeline': True,
1070 'withVoice': True,
1071 },
1072 'features': {
1073 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1074 'interactive_text_enabled': True,
1075 'responsive_web_edit_tweet_api_enabled': True,
1076 'responsive_web_enhance_cards_enabled': True,
1077 'responsive_web_graphql_timeline_navigation_enabled': False,
1078 'responsive_web_text_conversations_enabled': False,
1079 'responsive_web_uc_gql_enabled': True,
1080 'standardized_nudges_misinfo': True,
1081 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1082 'tweetypie_unmention_optimization_enabled': True,
1083 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1084 'verified_phone_label_enabled': False,
1085 'vibe_api_enabled': True,
1086 },
1087 }
1088
f57f84f6 1089 def _real_extract(self, url):
16bed382 1090 twid, selected_index = self._match_valid_url(url).group('id', 'index')
49296437 1091 if not self.is_logged_in:
1092 try:
1093 status = self._download_json(
1094 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1095 headers={'User-Agent': 'Googlebot'}, query={'id': twid})
1096 self.to_screen(f'Some metadata is missing without authentication. {self._login_hint()}')
1097 except ExtractorError as e:
1098 if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
1099 self.raise_login_required('Requested tweet may only be available when logged in')
1100 raise
147e62fc 1101 else:
49296437 1102 status = self._graphql_to_legacy(
1103 self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid), twid)
575036b4 1104
49296437 1105 title = description = traverse_obj(
1106 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
575036b4 1107 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 1108 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
1109 user = status.get('user') or {}
1110 uploader = user.get('name')
1111 if uploader:
7a26ce26 1112 title = f'{uploader} - {title}'
18ca61c5
RA
1113 uploader_id = user.get('screen_name')
1114
cf5881fc 1115 info = {
18ca61c5
RA
1116 'id': twid,
1117 'title': title,
1118 'description': description,
1119 'uploader': uploader,
1120 'timestamp': unified_timestamp(status.get('created_at')),
1121 'uploader_id': uploader_id,
a70635b8 1122 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
1123 'like_count': int_or_none(status.get('favorite_count')),
1124 'repost_count': int_or_none(status.get('retweet_count')),
1125 'comment_count': int_or_none(status.get('reply_count')),
1126 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 1127 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 1128 }
cf5881fc 1129
30a074c2 1130 def extract_from_video_info(media):
13b2ae29 1131 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
49296437 1132 if not media_id:
1133 # workaround for non-authenticated responses
1134 media_id = traverse_obj(media, (
1135 'video_info', 'variants', ..., 'url',
1136 {lambda x: re.search(r'_video/(\d+)/', x)[1]}), get_all=False)
13b2ae29 1137 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
1138
1139 formats = []
4bed4363 1140 subtitles = {}
49296437 1141 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
4bed4363
F
1142 fmts, subs = self._extract_variant_formats(variant, twid)
1143 subtitles = self._merge_subtitles(subtitles, subs)
1144 formats.extend(fmts)
18ca61c5
RA
1145
1146 thumbnails = []
1147 media_url = media.get('media_url_https') or media.get('media_url')
1148 if media_url:
1149 def add_thumbnail(name, size):
1150 thumbnails.append({
1151 'id': name,
1152 'url': update_url_query(media_url, {'name': name}),
1153 'width': int_or_none(size.get('w') or size.get('width')),
1154 'height': int_or_none(size.get('h') or size.get('height')),
1155 })
1156 for name, size in media.get('sizes', {}).items():
1157 add_thumbnail(name, size)
1158 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1159
13b2ae29 1160 return {
49296437 1161 'id': media_id or twid,
18ca61c5 1162 'formats': formats,
4bed4363 1163 'subtitles': subtitles,
18ca61c5 1164 'thumbnails': thumbnails,
b6795fd3 1165 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
49296437 1166 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
9f14daf2 1167 # The codec of http formats are unknown
1168 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
13b2ae29 1169 }
30a074c2 1170
13b2ae29
SS
1171 def extract_from_card_info(card):
1172 if not card:
1173 return
1174
1175 self.write_debug(f'Extracting from card info: {card.get("url")}')
1176 binding_values = card['binding_values']
1177
1178 def get_binding_value(k):
1179 o = binding_values.get(k) or {}
1180 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1181
1182 card_name = card['name'].split(':')[-1]
1183 if card_name == 'player':
7a26ce26 1184 yield {
13b2ae29
SS
1185 '_type': 'url',
1186 'url': get_binding_value('player_url'),
1187 }
1188 elif card_name == 'periscope_broadcast':
7a26ce26 1189 yield {
13b2ae29
SS
1190 '_type': 'url',
1191 'url': get_binding_value('url') or get_binding_value('player_url'),
1192 'ie_key': PeriscopeIE.ie_key(),
1193 }
1194 elif card_name == 'broadcast':
7a26ce26 1195 yield {
13b2ae29
SS
1196 '_type': 'url',
1197 'url': get_binding_value('broadcast_url'),
1198 'ie_key': TwitterBroadcastIE.ie_key(),
1199 }
7a26ce26
SS
1200 elif card_name == 'audiospace':
1201 yield {
1202 '_type': 'url',
1203 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1204 'ie_key': TwitterSpacesIE.ie_key(),
1205 }
13b2ae29 1206 elif card_name == 'summary':
7a26ce26 1207 yield {
18ca61c5 1208 '_type': 'url',
13b2ae29
SS
1209 'url': get_binding_value('card_url'),
1210 }
1211 elif card_name == 'unified_card':
7a26ce26
SS
1212 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1213 yield from map(extract_from_video_info, traverse_obj(
1214 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1215 # amplify, promo_video_website, promo_video_convo, appplayer,
1216 # video_direct_message, poll2choice_video, poll3choice_video,
1217 # poll4choice_video, ...
1218 else:
1219 is_amplify = card_name == 'amplify'
1220 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1221 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1222 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1223
1224 thumbnails = []
1225 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1226 image = get_binding_value('player_image' + suffix) or {}
1227 image_url = image.get('url')
1228 if not image_url or '/player-placeholder' in image_url:
1229 continue
1230 thumbnails.append({
1231 'id': suffix[1:] if suffix else 'medium',
1232 'url': image_url,
1233 'width': int_or_none(image.get('width')),
1234 'height': int_or_none(image.get('height')),
1235 })
1236
7a26ce26 1237 yield {
13b2ae29
SS
1238 'formats': formats,
1239 'subtitles': subtitles,
1240 'thumbnails': thumbnails,
1241 'duration': int_or_none(get_binding_value(
1242 'content_duration_seconds')),
1243 }
1244
b6795fd3 1245 videos = traverse_obj(status, (
49296437 1246 ('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
1247 lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1248
b6795fd3
SS
1249 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1250 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1251 else:
49296437 1252 desired_obj = traverse_obj(status, (
1253 ('mediaDetails', ((None, 'quoted_status'), 'extended_entities', 'media')),
1254 int(selected_index) - 1, {dict}), get_all=False)
b6795fd3
SS
1255 if not desired_obj:
1256 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1257 elif desired_obj.get('type') != 'video':
1258 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1259
1260 # Restore original archive id and video index in title
1261 for index, entry in enumerate(videos, 1):
1262 if entry.get('id') != desired_obj.get('id'):
1263 continue
1264 if index == 1:
1265 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1266 if len(videos) != 1:
1267 info['title'] += f' #{index}'
1268 break
1269
1270 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1271
1272 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1273 if not entries:
1274 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1275 if not expanded_url or expanded_url == url:
147e62fc 1276 self.raise_no_formats('No video could be found in this tweet', expected=True)
1277 return info
13b2ae29
SS
1278
1279 return self.url_result(expanded_url, display_id=twid, **info)
1280
1281 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1282
1283 if len(entries) == 1:
1284 return entries[0]
1285
1286 for index, entry in enumerate(entries, 1):
1287 entry['title'] += f' #{index}'
1288
1289 return self.playlist_result(entries, **info)
445d72b8
YCH
1290
1291
1292class TwitterAmplifyIE(TwitterBaseIE):
1293 IE_NAME = 'twitter:amplify'
25042f73 1294 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1295
1296 _TEST = {
1297 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1298 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1299 'info_dict': {
1300 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1301 'ext': 'mp4',
1302 'title': 'Twitter Video',
bdbf4ba4 1303 'thumbnail': 're:^https?://.*',
445d72b8 1304 },
7a26ce26 1305 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1306 }
1307
1308 def _real_extract(self, url):
1309 video_id = self._match_id(url)
1310 webpage = self._download_webpage(url, video_id)
1311
1312 vmap_url = self._html_search_meta(
1313 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1314 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1315
bdbf4ba4
YCH
1316 thumbnails = []
1317 thumbnail = self._html_search_meta(
1318 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1319
1320 def _find_dimension(target):
1321 w = int_or_none(self._html_search_meta(
1322 'twitter:%s:width' % target, webpage, fatal=False))
1323 h = int_or_none(self._html_search_meta(
1324 'twitter:%s:height' % target, webpage, fatal=False))
1325 return w, h
1326
1327 if thumbnail:
1328 thumbnail_w, thumbnail_h = _find_dimension('image')
1329 thumbnails.append({
1330 'url': thumbnail,
1331 'width': thumbnail_w,
1332 'height': thumbnail_h,
1333 })
1334
1335 video_w, video_h = _find_dimension('player')
9be31e77 1336 formats[0].update({
bdbf4ba4
YCH
1337 'width': video_w,
1338 'height': video_h,
9be31e77 1339 })
bdbf4ba4 1340
445d72b8
YCH
1341 return {
1342 'id': video_id,
1343 'title': 'Twitter Video',
bdbf4ba4
YCH
1344 'formats': formats,
1345 'thumbnails': thumbnails,
445d72b8 1346 }
18ca61c5
RA
1347
1348
1349class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1350 IE_NAME = 'twitter:broadcast'
1351 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1352
7b0b53ea
S
1353 _TEST = {
1354 # untitled Periscope video
1355 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1356 'info_dict': {
1357 'id': '1yNGaQLWpejGj',
1358 'ext': 'mp4',
1359 'title': 'Andrea May Sahouri - Periscope Broadcast',
1360 'uploader': 'Andrea May Sahouri',
1361 'uploader_id': '1PXEdBZWpGwKe',
7a26ce26
SS
1362 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1363 'view_count': int,
7b0b53ea
S
1364 },
1365 }
1366
18ca61c5
RA
1367 def _real_extract(self, url):
1368 broadcast_id = self._match_id(url)
1369 broadcast = self._call_api(
1370 'broadcasts/show.json', broadcast_id,
1371 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1372 info = self._parse_broadcast_data(broadcast, broadcast_id)
1373 media_key = broadcast['media_key']
1374 source = self._call_api(
7a26ce26 1375 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1376 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1377 if '/live_video_stream/geoblocked/' in m3u8_url:
1378 self.raise_geo_restricted()
1379 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1380 m3u8_url).query).get('type', [None])[0]
1381 state, width, height = self._extract_common_format_info(broadcast)
1382 info['formats'] = self._extract_pscp_m3u8_formats(
1383 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1384 return info
86b868c6
U
1385
1386
7a26ce26
SS
1387class TwitterSpacesIE(TwitterBaseIE):
1388 IE_NAME = 'twitter:spaces'
1389 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1390
1391 _TESTS = [{
1392 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1393 'info_dict': {
1394 'id': '1RDxlgyvNXzJL',
1395 'ext': 'm4a',
1396 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1397 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1398 'uploader': r're:Lucio Di Gaetano.*?',
1399 'uploader_id': 'luciodigaetano',
1400 'live_status': 'was_live',
1cffd621 1401 'timestamp': 1659877956,
1402 'upload_date': '20220807',
1403 'release_timestamp': 1659904215,
1404 'release_date': '20220807',
7a26ce26
SS
1405 },
1406 'params': {'skip_download': 'm3u8'},
1407 }]
1408
1409 SPACE_STATUS = {
1410 'notstarted': 'is_upcoming',
1411 'ended': 'was_live',
1412 'running': 'is_live',
1413 'timedout': 'post_live',
1414 }
1415
1416 def _build_graphql_query(self, space_id):
1417 return {
1418 'variables': {
1419 'id': space_id,
1420 'isMetatagsQuery': True,
1421 'withDownvotePerspective': False,
1422 'withReactionsMetadata': False,
1423 'withReactionsPerspective': False,
1424 'withReplays': True,
1425 'withSuperFollowsUserFields': True,
1426 'withSuperFollowsTweetFields': True,
1427 },
1428 'features': {
1429 'dont_mention_me_view_api_enabled': True,
1430 'interactive_text_enabled': True,
1431 'responsive_web_edit_tweet_api_enabled': True,
1432 'responsive_web_enhance_cards_enabled': True,
1433 'responsive_web_uc_gql_enabled': True,
1434 'spaces_2022_h2_clipping': True,
1435 'spaces_2022_h2_spaces_communities': False,
1436 'standardized_nudges_misinfo': True,
1437 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1438 'vibe_api_enabled': True,
1439 },
1440 }
1441
1442 def _real_extract(self, url):
1443 space_id = self._match_id(url)
1444 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1445 if not space_data:
1446 raise ExtractorError('Twitter Space not found', expected=True)
1447
1448 metadata = space_data['metadata']
1449 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1cffd621 1450 is_live = live_status == 'is_live'
7a26ce26
SS
1451
1452 formats = []
1453 if live_status == 'is_upcoming':
1454 self.raise_no_formats('Twitter Space not started yet', expected=True)
1cffd621 1455 elif not is_live and not metadata.get('is_space_available_for_replay'):
1456 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1457 elif metadata.get('media_key'):
1458 source = traverse_obj(
1459 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1460 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
7a26ce26 1461 formats = self._extract_m3u8_formats(
1cffd621 1462 source, metadata['media_key'], 'm4a', live=is_live, fatal=False,
1463 headers={'Referer': 'https://twitter.com/'}) if source else []
7a26ce26
SS
1464 for fmt in formats:
1465 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1cffd621 1466 if not is_live:
1467 fmt['container'] = 'm4a_dash'
7a26ce26
SS
1468
1469 participants = ', '.join(traverse_obj(
1470 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1cffd621 1471
1472 if not formats and live_status == 'post_live':
1473 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1474
7a26ce26
SS
1475 return {
1476 'id': space_id,
1477 'title': metadata.get('title'),
1478 'description': f'Twitter Space participated by {participants}',
1479 'uploader': traverse_obj(
1480 metadata, ('creator_results', 'result', 'legacy', 'name')),
1481 'uploader_id': traverse_obj(
1482 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1483 'live_status': live_status,
1c16d9df
C
1484 'release_timestamp': try_call(
1485 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1cffd621 1486 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
7a26ce26
SS
1487 'formats': formats,
1488 }
1489
1490
86b868c6
U
1491class TwitterShortenerIE(TwitterBaseIE):
1492 IE_NAME = 'twitter:shortener'
a537ab1a
U
1493 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1494 _BASE_URL = 'https://t.co/'
86b868c6
U
1495
1496 def _real_extract(self, url):
5ad28e7f 1497 mobj = self._match_valid_url(url)
a537ab1a
U
1498 eid, id = mobj.group('eid', 'id')
1499 if eid:
1500 id = eid
1501 url = self._BASE_URL + id
1502 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1503 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1504 if new_url.startswith(__UNSAFE_LINK):
1505 new_url = new_url.replace(__UNSAFE_LINK, "")
9e20a9c4 1506 return self.url_result(new_url)