]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[extractor/youtube] Add `ios` to default clients used
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
7a26ce26 1import json
23e7cba8
S
2import re
3
4from .common import InfoExtractor
13b2ae29 5from .periscope import PeriscopeBaseIE, PeriscopeIE
18ca61c5 6from ..compat import (
18ca61c5
RA
7 compat_parse_qs,
8 compat_urllib_parse_unquote,
9 compat_urllib_parse_urlparse,
10)
23e7cba8 11from ..utils import (
2edfd745 12 ExtractorError,
13b2ae29 13 dict_get,
23e7cba8 14 float_or_none,
13b2ae29 15 format_field,
cf5881fc 16 int_or_none,
13b2ae29 17 make_archive_id,
147e62fc 18 remove_end,
13b2ae29
SS
19 str_or_none,
20 strip_or_none,
f1150b9e 21 traverse_obj,
7a26ce26 22 try_call,
2edfd745 23 try_get,
18ca61c5
RA
24 unified_timestamp,
25 update_url_query,
41d1cca3 26 url_or_none,
2edfd745 27 xpath_text,
23e7cba8
S
28)
29
30
445d72b8 31class TwitterBaseIE(InfoExtractor):
d1795f4a 32 _NETRC_MACHINE = 'twitter'
18ca61c5 33 _API_BASE = 'https://api.twitter.com/1.1/'
7a26ce26 34 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
82fb2357 35 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
147e62fc 36 _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
37 _guest_token = None
d1795f4a 38 _flow_token = None
39
40 _LOGIN_INIT_DATA = json.dumps({
41 'input_flow_data': {
42 'flow_context': {
43 'debug_overrides': {},
44 'start_location': {
45 'location': 'unknown'
46 }
47 }
48 },
49 'subtask_versions': {
50 'action_list': 2,
51 'alert_dialog': 1,
52 'app_download_cta': 1,
53 'check_logged_in_account': 1,
54 'choice_selection': 3,
55 'contacts_live_sync_permission_prompt': 0,
56 'cta': 7,
57 'email_verification': 2,
58 'end_flow': 1,
59 'enter_date': 1,
60 'enter_email': 2,
61 'enter_password': 5,
62 'enter_phone': 2,
63 'enter_recaptcha': 1,
64 'enter_text': 5,
65 'enter_username': 2,
66 'generic_urt': 3,
67 'in_app_notification': 1,
68 'interest_picker': 3,
69 'js_instrumentation': 1,
70 'menu_dialog': 1,
71 'notifications_permission_prompt': 2,
72 'open_account': 2,
73 'open_home_timeline': 1,
74 'open_link': 1,
75 'phone_verification': 4,
76 'privacy_options': 1,
77 'security_key': 3,
78 'select_avatar': 4,
79 'select_banner': 2,
80 'settings_list': 7,
81 'show_code': 1,
82 'sign_up': 2,
83 'sign_up_review': 4,
84 'tweet_selection_urt': 1,
85 'update_users': 1,
86 'upload_media': 1,
87 'user_recommendations_list': 4,
88 'user_recommendations_urt': 1,
89 'wait_spinner': 3,
90 'web_modal': 1
91 }
92 }, separators=(',', ':')).encode()
18ca61c5
RA
93
94 def _extract_variant_formats(self, variant, video_id):
95 variant_url = variant.get('url')
96 if not variant_url:
4bed4363 97 return [], {}
18ca61c5 98 elif '.m3u8' in variant_url:
4bed4363 99 return self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
100 variant_url, video_id, 'mp4', 'm3u8_native',
101 m3u8_id='hls', fatal=False)
102 else:
103 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
104 f = {
105 'url': variant_url,
106 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
107 'tbr': tbr,
108 }
109 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 110 return [f], {}
18ca61c5 111
9be31e77 112 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 113 vmap_url = url_or_none(vmap_url)
114 if not vmap_url:
f1150b9e 115 return [], {}
445d72b8 116 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 117 formats = []
4bed4363 118 subtitles = {}
18ca61c5
RA
119 urls = []
120 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
121 video_variant.attrib['url'] = compat_urllib_parse_unquote(
122 video_variant.attrib['url'])
123 urls.append(video_variant.attrib['url'])
4bed4363
F
124 fmts, subs = self._extract_variant_formats(
125 video_variant.attrib, video_id)
126 formats.extend(fmts)
127 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
128 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
129 if video_url not in urls:
4bed4363
F
130 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
131 formats.extend(fmts)
132 subtitles = self._merge_subtitles(subtitles, subs)
133 return formats, subtitles
445d72b8 134
2edfd745
YCH
135 @staticmethod
136 def _search_dimensions_in_video_url(a_format, video_url):
137 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
138 if m:
139 a_format.update({
140 'width': int(m.group('width')),
141 'height': int(m.group('height')),
142 })
143
d1795f4a 144 @property
7a26ce26
SS
145 def is_logged_in(self):
146 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
147
d1795f4a 148 def _fetch_guest_token(self, headers, display_id):
149 headers.pop('x-guest-token', None)
150 self._guest_token = traverse_obj(self._download_json(
151 f'{self._API_BASE}guest/activate.json', display_id,
152 'Downloading guest token', data=b'', headers=headers), 'guest_token')
153 if not self._guest_token:
154 raise ExtractorError('Could not retrieve guest token')
155
156 def _set_base_headers(self):
147e62fc 157 headers = self._AUTH.copy()
d1795f4a 158 csrf_token = try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value)
159 if csrf_token:
160 headers['x-csrf-token'] = csrf_token
161 return headers
162
163 def _call_login_api(self, note, headers, query={}, data=None):
164 response = self._download_json(
165 f'{self._API_BASE}onboarding/task.json', None, note,
166 headers=headers, query=query, data=data, expected_status=400)
167 error = traverse_obj(response, ('errors', 0, 'message', {str}))
168 if error:
169 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
170 elif traverse_obj(response, 'status') != 'success':
171 raise ExtractorError('Login was unsuccessful')
172
173 subtask = traverse_obj(
174 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
175 if not subtask:
176 raise ExtractorError('Twitter API did not return next login subtask')
177
178 self._flow_token = response['flow_token']
7a26ce26 179
d1795f4a 180 return subtask
181
182 def _perform_login(self, username, password):
183 if self.is_logged_in:
184 return
185
186 self._request_webpage('https://twitter.com/', None, 'Requesting cookies')
187 headers = self._set_base_headers()
188 self._fetch_guest_token(headers, None)
189 headers.update({
190 'content-type': 'application/json',
191 'x-guest-token': self._guest_token,
192 'x-twitter-client-language': 'en',
193 'x-twitter-active-user': 'yes',
194 'Referer': 'https://twitter.com/',
195 'Origin': 'https://twitter.com',
196 })
197
198 def build_login_json(*subtask_inputs):
199 return json.dumps({
200 'flow_token': self._flow_token,
201 'subtask_inputs': subtask_inputs
202 }, separators=(',', ':')).encode()
203
204 def input_dict(subtask_id, text):
205 return {
206 'subtask_id': subtask_id,
207 'enter_text': {
208 'text': text,
209 'link': 'next_link'
210 }
211 }
7a26ce26 212
d1795f4a 213 next_subtask = self._call_login_api(
214 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
215
216 while not self.is_logged_in:
217 if next_subtask == 'LoginJsInstrumentationSubtask':
218 next_subtask = self._call_login_api(
219 'Submitting JS instrumentation response', headers, data=build_login_json({
220 'subtask_id': next_subtask,
221 'js_instrumentation': {
222 'response': '{}',
223 'link': 'next_link'
224 }
225 }))
226
227 elif next_subtask == 'LoginEnterUserIdentifierSSO':
228 next_subtask = self._call_login_api(
229 'Submitting username', headers, data=build_login_json({
230 'subtask_id': next_subtask,
231 'settings_list': {
232 'setting_responses': [{
233 'key': 'user_identifier',
234 'response_data': {
235 'text_data': {
236 'result': username
237 }
238 }
239 }],
240 'link': 'next_link'
241 }
242 }))
243
244 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
245 next_subtask = self._call_login_api(
246 'Submitting alternate identifier', headers,
247 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
248 'one of username, phone number or email that was not used as --username'))))
249
250 elif next_subtask == 'LoginEnterPassword':
251 next_subtask = self._call_login_api(
252 'Submitting password', headers, data=build_login_json({
253 'subtask_id': next_subtask,
254 'enter_password': {
255 'password': password,
256 'link': 'next_link'
257 }
258 }))
259
260 elif next_subtask == 'AccountDuplicationCheck':
261 next_subtask = self._call_login_api(
262 'Submitting account duplication check', headers, data=build_login_json({
263 'subtask_id': next_subtask,
264 'check_logged_in_account': {
265 'link': 'AccountDuplicationCheck_false'
266 }
267 }))
268
269 elif next_subtask == 'LoginTwoFactorAuthChallenge':
270 next_subtask = self._call_login_api(
271 'Submitting 2FA token', headers, data=build_login_json(input_dict(
272 next_subtask, self._get_tfa_info('two-factor authentication token'))))
273
274 elif next_subtask == 'LoginAcid':
275 next_subtask = self._call_login_api(
276 'Submitting confirmation code', headers, data=build_login_json(input_dict(
277 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
278
279 elif next_subtask == 'LoginSuccessSubtask':
280 raise ExtractorError('Twitter API did not grant auth token cookie')
281
282 else:
283 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
284
285 self.report_login()
286
287 def _call_api(self, path, video_id, query={}, graphql=False):
288 headers = self._set_base_headers()
7a26ce26
SS
289 if self.is_logged_in:
290 headers.update({
291 'x-twitter-auth-type': 'OAuth2Session',
292 'x-twitter-client-language': 'en',
293 'x-twitter-active-user': 'yes',
294 })
295
147e62fc 296 for first_attempt in (True, False):
d1795f4a 297 if not self.is_logged_in:
298 if not self._guest_token:
299 self._fetch_guest_token(headers, video_id)
147e62fc 300 headers['x-guest-token'] = self._guest_token
7a26ce26 301
147e62fc 302 allowed_status = {400, 401, 403, 404} if graphql else {403}
303 result = self._download_json(
304 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
305 video_id, headers=headers, query=query, expected_status=allowed_status,
306 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
352e7d98 307
147e62fc 308 if result.get('errors'):
309 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
310 if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
311 self.to_screen('Guest token has expired. Refreshing guest token')
312 self._guest_token = None
313 continue
352e7d98 314
147e62fc 315 raise ExtractorError(
316 f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
352e7d98 317
147e62fc 318 return result
7a26ce26
SS
319
320 def _build_graphql_query(self, media_id):
321 raise NotImplementedError('Method must be implemented to support GraphQL')
322
323 def _call_graphql_api(self, endpoint, media_id):
324 data = self._build_graphql_query(media_id)
325 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
326 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
327
328
329class TwitterCardIE(InfoExtractor):
014e8803 330 IE_NAME = 'twitter:card'
18ca61c5 331 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 332 _TESTS = [
333 {
334 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 335 # MD5 checksums are different in different places
c3dea3f8 336 'info_dict': {
7a26ce26 337 'id': '560070131976392705',
c3dea3f8 338 'ext': 'mp4',
18ca61c5
RA
339 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
340 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
341 'uploader': 'Twitter',
342 'uploader_id': 'Twitter',
343 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 344 'duration': 30.033,
18ca61c5
RA
345 'timestamp': 1422366112,
346 'upload_date': '20150127',
7a26ce26
SS
347 'age_limit': 0,
348 'comment_count': int,
349 'tags': [],
350 'repost_count': int,
351 'like_count': int,
352 'display_id': '560070183650213889',
353 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 354 },
23e7cba8 355 },
c3dea3f8 356 {
357 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 358 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 359 'info_dict': {
360 'id': '623160978427936768',
361 'ext': 'mp4',
18ca61c5
RA
362 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
363 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
364 'uploader': 'NASA',
365 'uploader_id': 'NASA',
366 'timestamp': 1437408129,
367 'upload_date': '20150720',
7a26ce26
SS
368 'uploader_url': 'https://twitter.com/NASA',
369 'age_limit': 0,
370 'comment_count': int,
371 'like_count': int,
372 'repost_count': int,
373 'tags': ['PlutoFlyby'],
c3dea3f8 374 },
7a26ce26 375 'params': {'format': '[protocol=https]'}
4a7b7903
YCH
376 },
377 {
378 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 379 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
380 'info_dict': {
381 'id': 'dq4Oj5quskI',
382 'ext': 'mp4',
383 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 384 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 385 'upload_date': '20111013',
18ca61c5 386 'uploader': 'OMG! UBUNTU!',
4a7b7903 387 'uploader_id': 'omgubuntu',
7a26ce26
SS
388 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
389 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
390 'channel_follower_count': int,
391 'chapters': 'count:8',
392 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
393 'duration': 138,
394 'categories': ['Film & Animation'],
395 'age_limit': 0,
396 'comment_count': int,
397 'availability': 'public',
398 'like_count': int,
399 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
400 'view_count': int,
401 'tags': 'count:12',
402 'channel': 'OMG! UBUNTU!',
403 'playable_in_embed': True,
4a7b7903 404 },
31752f76 405 'add_ie': ['Youtube'],
5f1b2aea
YCH
406 },
407 {
408 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
409 'info_dict': {
410 'id': 'iBb2x00UVlv',
411 'ext': 'mp4',
412 'upload_date': '20151113',
413 'uploader_id': '1189339351084113920',
acb6e97e
YCH
414 'uploader': 'ArsenalTerje',
415 'title': 'Vine by ArsenalTerje',
e8f20ffa 416 'timestamp': 1447451307,
7a26ce26
SS
417 'alt_title': 'Vine by ArsenalTerje',
418 'comment_count': int,
419 'like_count': int,
420 'thumbnail': r're:^https?://[^?#]+\.jpg',
421 'view_count': int,
422 'repost_count': int,
5f1b2aea
YCH
423 },
424 'add_ie': ['Vine'],
7a26ce26
SS
425 'params': {'skip_download': 'm3u8'},
426 },
427 {
0ae937a7 428 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 429 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
430 'info_dict': {
431 'id': '705235433198714880',
432 'ext': 'mp4',
18ca61c5
RA
433 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
434 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
435 'uploader': 'Brent Yarina',
436 'uploader_id': 'BTNBrentYarina',
437 'timestamp': 1456976204,
438 'upload_date': '20160303',
0ae937a7 439 },
18ca61c5 440 'skip': 'This content is no longer available.',
7a26ce26
SS
441 },
442 {
748a462f
S
443 'url': 'https://twitter.com/i/videos/752274308186120192',
444 'only_matching': True,
0ae937a7 445 },
c3dea3f8 446 ]
23e7cba8
S
447
448 def _real_extract(self, url):
18ca61c5
RA
449 status_id = self._match_id(url)
450 return self.url_result(
451 'https://twitter.com/statuses/' + status_id,
452 TwitterIE.ie_key(), status_id)
c8398a9b 453
03879ff0 454
18ca61c5 455class TwitterIE(TwitterBaseIE):
014e8803 456 IE_NAME = 'twitter'
b6795fd3 457 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 458
cf5881fc 459 _TESTS = [{
48aae2d2 460 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 461 'info_dict': {
13b2ae29
SS
462 'id': '643211870443208704',
463 'display_id': '643211948184596480',
f57f84f6 464 'ext': 'mp4',
575036b4 465 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 466 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 467 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
48aae2d2
YCH
468 'uploader': 'FREE THE NIPPLE',
469 'uploader_id': 'freethenipple',
3b65a6fb 470 'duration': 12.922,
18ca61c5
RA
471 'timestamp': 1442188653,
472 'upload_date': '20150913',
13b2ae29
SS
473 'uploader_url': 'https://twitter.com/freethenipple',
474 'comment_count': int,
475 'repost_count': int,
476 'like_count': int,
147e62fc 477 'view_count': int,
13b2ae29
SS
478 'tags': [],
479 'age_limit': 18,
f57f84f6 480 },
cf5881fc
YCH
481 }, {
482 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
483 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
484 'info_dict': {
485 'id': '657991469417025536',
486 'ext': 'mp4',
487 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
488 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 489 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
490 'uploader': 'Gifs',
491 'uploader_id': 'giphz',
492 },
7efc1c2b 493 'expected_warnings': ['height', 'width'],
fc0a45fa 494 'skip': 'Account suspended',
b703ebee
JMF
495 }, {
496 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
497 'info_dict': {
498 'id': '665052190608723968',
13b2ae29 499 'display_id': '665052190608723968',
b703ebee 500 'ext': 'mp4',
b6795fd3 501 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 502 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
b703ebee 503 'uploader_id': 'starwars',
7a26ce26 504 'uploader': r're:Star Wars.*',
18ca61c5
RA
505 'timestamp': 1447395772,
506 'upload_date': '20151113',
13b2ae29
SS
507 'uploader_url': 'https://twitter.com/starwars',
508 'comment_count': int,
509 'repost_count': int,
510 'like_count': int,
511 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
512 'age_limit': 0,
b703ebee 513 },
0ae937a7
YCH
514 }, {
515 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
516 'info_dict': {
517 'id': '705235433198714880',
518 'ext': 'mp4',
18ca61c5
RA
519 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
520 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
521 'uploader_id': 'BTNBrentYarina',
522 'uploader': 'Brent Yarina',
18ca61c5
RA
523 'timestamp': 1456976204,
524 'upload_date': '20160303',
13b2ae29
SS
525 'uploader_url': 'https://twitter.com/BTNBrentYarina',
526 'comment_count': int,
527 'repost_count': int,
528 'like_count': int,
529 'tags': [],
530 'age_limit': 0,
0ae937a7
YCH
531 },
532 'params': {
533 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
534 # Test case of TwitterCardIE
535 'skip_download': True,
536 },
352e7d98 537 'skip': 'Dead external link',
03879ff0
YCH
538 }, {
539 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 540 'info_dict': {
13b2ae29
SS
541 'id': '700207414000242688',
542 'display_id': '700207533655363584',
03879ff0 543 'ext': 'mp4',
13b2ae29 544 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 545 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 546 'thumbnail': r're:^https?://.*\.jpg',
13b2ae29
SS
547 'uploader': 'jaydin donte geer',
548 'uploader_id': 'jaydingeer',
3b65a6fb 549 'duration': 30.0,
18ca61c5
RA
550 'timestamp': 1455777459,
551 'upload_date': '20160218',
13b2ae29
SS
552 'uploader_url': 'https://twitter.com/jaydingeer',
553 'comment_count': int,
554 'repost_count': int,
555 'like_count': int,
147e62fc 556 'view_count': int,
13b2ae29
SS
557 'tags': ['Damndaniel'],
558 'age_limit': 0,
03879ff0 559 },
395fd4b0
YCH
560 }, {
561 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
562 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
563 'info_dict': {
564 'id': 'MIOxnrUteUd',
565 'ext': 'mp4',
18ca61c5
RA
566 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
567 'uploader': 'TAKUMA',
568 'uploader_id': '1004126642786242560',
3615bfe1 569 'timestamp': 1402826626,
395fd4b0 570 'upload_date': '20140615',
13b2ae29
SS
571 'thumbnail': r're:^https?://.*\.jpg',
572 'alt_title': 'Vine by TAKUMA',
573 'comment_count': int,
574 'repost_count': int,
575 'like_count': int,
576 'view_count': int,
395fd4b0
YCH
577 },
578 'add_ie': ['Vine'],
36b7d9db
YCH
579 }, {
580 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 581 'info_dict': {
13b2ae29
SS
582 'id': '717462543795523584',
583 'display_id': '719944021058060289',
36b7d9db
YCH
584 'ext': 'mp4',
585 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5
RA
586 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
587 'uploader_id': 'CaptainAmerica',
36b7d9db 588 'uploader': 'Captain America',
3b65a6fb 589 'duration': 3.17,
18ca61c5
RA
590 'timestamp': 1460483005,
591 'upload_date': '20160412',
13b2ae29
SS
592 'uploader_url': 'https://twitter.com/CaptainAmerica',
593 'thumbnail': r're:^https?://.*\.jpg',
594 'comment_count': int,
595 'repost_count': int,
596 'like_count': int,
147e62fc 597 'view_count': int,
13b2ae29
SS
598 'tags': [],
599 'age_limit': 0,
36b7d9db 600 },
f0bc5a86
YCH
601 }, {
602 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
603 'info_dict': {
604 'id': '1zqKVVlkqLaKB',
605 'ext': 'mp4',
18ca61c5 606 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 607 'upload_date': '20160923',
18ca61c5
RA
608 'uploader_id': '1PmKqpJdOJQoY',
609 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 610 'timestamp': 1474613214,
13b2ae29 611 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
612 },
613 'add_ie': ['Periscope'],
2edfd745
YCH
614 }, {
615 # has mp4 formats via mobile API
616 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
617 'info_dict': {
618 'id': '852138619213144067',
619 'ext': 'mp4',
620 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 621 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
2edfd745
YCH
622 'uploader': 'عالم الأخبار',
623 'uploader_id': 'news_al3alm',
3b65a6fb 624 'duration': 277.4,
18ca61c5
RA
625 'timestamp': 1492000653,
626 'upload_date': '20170412',
2edfd745 627 },
00dd0cd5 628 'skip': 'Account suspended',
5c1452e8
GF
629 }, {
630 'url': 'https://twitter.com/i/web/status/910031516746514432',
631 'info_dict': {
13b2ae29
SS
632 'id': '910030238373089285',
633 'display_id': '910031516746514432',
5c1452e8
GF
634 'ext': 'mp4',
635 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
636 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 637 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
5c1452e8
GF
638 'uploader': 'Préfet de Guadeloupe',
639 'uploader_id': 'Prefet971',
640 'duration': 47.48,
18ca61c5
RA
641 'timestamp': 1505803395,
642 'upload_date': '20170919',
13b2ae29
SS
643 'uploader_url': 'https://twitter.com/Prefet971',
644 'comment_count': int,
645 'repost_count': int,
646 'like_count': int,
147e62fc 647 'view_count': int,
13b2ae29
SS
648 'tags': ['Maria'],
649 'age_limit': 0,
5c1452e8
GF
650 },
651 'params': {
652 'skip_download': True, # requires ffmpeg
653 },
2593725a
S
654 }, {
655 # card via api.twitter.com/1.1/videos/tweet/config
656 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
657 'info_dict': {
13b2ae29
SS
658 'id': '1001551417340022785',
659 'display_id': '1001551623938805763',
2593725a
S
660 'ext': 'mp4',
661 'title': 're:.*?Shep is on a roll today.*?',
662 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 663 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
2593725a
S
664 'uploader': 'Lis Power',
665 'uploader_id': 'LisPower1',
666 'duration': 111.278,
18ca61c5
RA
667 'timestamp': 1527623489,
668 'upload_date': '20180529',
13b2ae29
SS
669 'uploader_url': 'https://twitter.com/LisPower1',
670 'comment_count': int,
671 'repost_count': int,
672 'like_count': int,
147e62fc 673 'view_count': int,
13b2ae29
SS
674 'tags': [],
675 'age_limit': 0,
2593725a
S
676 },
677 'params': {
678 'skip_download': True, # requires ffmpeg
679 },
b7ef93f0
S
680 }, {
681 'url': 'https://twitter.com/foobar/status/1087791357756956680',
682 'info_dict': {
13b2ae29
SS
683 'id': '1087791272830607360',
684 'display_id': '1087791357756956680',
b7ef93f0
S
685 'ext': 'mp4',
686 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
687 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 688 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
b7ef93f0
S
689 'uploader': 'Twitter',
690 'uploader_id': 'Twitter',
691 'duration': 61.567,
18ca61c5
RA
692 'timestamp': 1548184644,
693 'upload_date': '20190122',
13b2ae29
SS
694 'uploader_url': 'https://twitter.com/Twitter',
695 'comment_count': int,
696 'repost_count': int,
697 'like_count': int,
147e62fc 698 'view_count': int,
13b2ae29
SS
699 'tags': [],
700 'age_limit': 0,
18ca61c5
RA
701 },
702 }, {
703 # not available in Periscope
704 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
705 'info_dict': {
706 'id': '1vOGwqejwoWxB',
707 'ext': 'mp4',
708 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
709 'uploader': 'Vivi',
710 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
711 'thumbnail': r're:^https?://.*\.jpg',
712 'tags': ['EduTECH2019'],
713 'view_count': int,
b7ef93f0 714 },
18ca61c5 715 'add_ie': ['TwitterBroadcast'],
30a074c2 716 }, {
717 # unified card
718 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
719 'info_dict': {
13b2ae29
SS
720 'id': '1349774757969989634',
721 'display_id': '1349794411333394432',
30a074c2 722 'ext': 'mp4',
723 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
724 'thumbnail': r're:^https?://.*\.jpg',
725 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
726 'uploader': 'Brooklyn Nets',
727 'uploader_id': 'BrooklynNets',
728 'duration': 324.484,
729 'timestamp': 1610651040,
730 'upload_date': '20210114',
13b2ae29
SS
731 'uploader_url': 'https://twitter.com/BrooklynNets',
732 'comment_count': int,
733 'repost_count': int,
734 'like_count': int,
735 'tags': [],
736 'age_limit': 0,
30a074c2 737 },
738 'params': {
739 'skip_download': True,
740 },
13b2ae29
SS
741 }, {
742 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
743 'info_dict': {
744 'id': '1577855447914409984',
745 'display_id': '1577855540407197696',
746 'ext': 'mp4',
352e7d98 747 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
748 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 749 'upload_date': '20221006',
352e7d98 750 'uploader': 'oshtru',
13b2ae29
SS
751 'uploader_id': 'oshtru',
752 'uploader_url': 'https://twitter.com/oshtru',
753 'thumbnail': r're:^https?://.*\.jpg',
754 'duration': 30.03,
7a26ce26 755 'timestamp': 1665025050,
13b2ae29
SS
756 'comment_count': int,
757 'repost_count': int,
758 'like_count': int,
147e62fc 759 'view_count': int,
13b2ae29
SS
760 'tags': [],
761 'age_limit': 0,
762 },
763 'params': {'skip_download': True},
764 }, {
765 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
766 'info_dict': {
767 'id': '1577719286659006464',
768 'title': 'Ultima | #\u0432\u029f\u043c - Test',
769 'description': 'Test https://t.co/Y3KEZD7Dad',
770 'uploader': 'Ultima | #\u0432\u029f\u043c',
771 'uploader_id': 'UltimaShadowX',
772 'uploader_url': 'https://twitter.com/UltimaShadowX',
773 'upload_date': '20221005',
7a26ce26 774 'timestamp': 1664992565,
13b2ae29
SS
775 'comment_count': int,
776 'repost_count': int,
777 'like_count': int,
778 'tags': [],
779 'age_limit': 0,
780 },
781 'playlist_count': 4,
782 'params': {'skip_download': True},
7a26ce26
SS
783 }, {
784 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
785 'info_dict': {
786 'id': '1575559336759263233',
787 'display_id': '1575560063510810624',
788 'ext': 'mp4',
789 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
790 'thumbnail': r're:^https?://.*\.jpg',
791 'description': 'md5:95aea692fda36a12081b9629b02daa92',
792 'uploader': 'Max Olson',
793 'uploader_id': 'MesoMax919',
794 'uploader_url': 'https://twitter.com/MesoMax919',
795 'duration': 21.321,
796 'timestamp': 1664477766,
797 'upload_date': '20220929',
798 'comment_count': int,
799 'repost_count': int,
800 'like_count': int,
147e62fc 801 'view_count': int,
7a26ce26
SS
802 'tags': ['HurricaneIan'],
803 'age_limit': 0,
804 },
805 }, {
147e62fc 806 # Adult content, fails if not logged in (GraphQL)
7a26ce26
SS
807 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
808 'info_dict': {
809 'id': '1575199163847000068',
810 'display_id': '1575199173472927762',
811 'ext': 'mp4',
812 'title': str,
813 'description': str,
814 'uploader': str,
815 'uploader_id': 'Rizdraws',
816 'uploader_url': 'https://twitter.com/Rizdraws',
817 'upload_date': '20220928',
818 'timestamp': 1664391723,
16bed382 819 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
820 'like_count': int,
821 'repost_count': int,
822 'comment_count': int,
823 'age_limit': 18,
824 'tags': []
825 },
147e62fc 826 'skip': 'Requires authentication',
7a26ce26 827 }, {
7a26ce26
SS
828 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
829 'playlist_mincount': 2,
830 'info_dict': {
831 'id': '1395079556562706435',
832 'title': str,
833 'tags': [],
834 'uploader': str,
835 'like_count': int,
836 'upload_date': '20210519',
837 'age_limit': 0,
838 'repost_count': int,
147e62fc 839 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
7a26ce26
SS
840 'uploader_id': 'Srirachachau',
841 'comment_count': int,
842 'uploader_url': 'https://twitter.com/Srirachachau',
843 'timestamp': 1621447860,
844 },
845 }, {
7a26ce26
SS
846 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
847 'playlist_mincount': 2,
848 'info_dict': {
849 'id': '1578353380363501568',
850 'title': str,
851 'uploader_id': 'DavidToons_',
852 'repost_count': int,
853 'like_count': int,
854 'uploader': str,
855 'timestamp': 1665143744,
856 'uploader_url': 'https://twitter.com/DavidToons_',
147e62fc 857 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
7a26ce26
SS
858 'tags': [],
859 'comment_count': int,
860 'upload_date': '20221007',
861 'age_limit': 0,
862 },
863 }, {
864 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
865 'playlist_count': 2,
866 'info_dict': {
867 'id': '1578401165338976258',
868 'title': str,
869 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
870 'uploader': str,
871 'uploader_id': 'primevideouk',
872 'timestamp': 1665155137,
873 'upload_date': '20221007',
874 'age_limit': 0,
875 'uploader_url': 'https://twitter.com/primevideouk',
876 'comment_count': int,
877 'repost_count': int,
878 'like_count': int,
879 'tags': ['TheRingsOfPower'],
880 },
881 }, {
882 # Twitter Spaces
883 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
884 'info_dict': {
885 'id': '1lPJqmBeeNAJb',
886 'ext': 'm4a',
887 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
888 'uploader': r're:Monique Camarra.+?',
889 'uploader_id': 'MoniqueCamarra',
890 'live_status': 'was_live',
1c16d9df 891 'release_timestamp': 1658417414,
7a26ce26
SS
892 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
893 'timestamp': 1658407771464,
894 },
895 'add_ie': ['TwitterSpaces'],
896 'params': {'skip_download': 'm3u8'},
16bed382 897 }, {
898 # URL specifies video number but --yes-playlist
899 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
900 'playlist_mincount': 2,
901 'info_dict': {
902 'id': '1600649710662213632',
903 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
904 'timestamp': 1670459604.0,
905 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
906 'comment_count': int,
907 'uploader_id': 'CTVJLaidlaw',
908 'repost_count': int,
909 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
910 'upload_date': '20221208',
911 'age_limit': 0,
912 'uploader': 'Jocelyn Laidlaw',
913 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
914 'like_count': int,
915 },
916 }, {
917 # URL specifies video number and --no-playlist
918 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
919 'info_dict': {
920 'id': '1600649511827013632',
921 'ext': 'mp4',
147e62fc 922 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
16bed382 923 'thumbnail': r're:^https?://.+\.jpg',
924 'timestamp': 1670459604.0,
925 'uploader_id': 'CTVJLaidlaw',
926 'uploader': 'Jocelyn Laidlaw',
927 'repost_count': int,
928 'comment_count': int,
929 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
930 'duration': 102.226,
931 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
932 'display_id': '1600649710662213632',
933 'like_count': int,
147e62fc 934 'view_count': int,
16bed382 935 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
936 'upload_date': '20221208',
937 'age_limit': 0,
938 },
939 'params': {'noplaylist': True},
7543c9c9 940 }, {
941 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
942 # note the id different between extraction and url
943 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
944 'info_dict': {
945 'id': '1621117577354424321',
946 'display_id': '1621117700482416640',
947 'ext': 'mp4',
948 'title': '뽀 - 아 최우제 이동속도 봐',
949 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
950 'duration': 24.598,
951 'uploader': '뽀',
952 'uploader_id': 's2FAKER',
953 'uploader_url': 'https://twitter.com/s2FAKER',
954 'upload_date': '20230202',
955 'timestamp': 1675339553.0,
956 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
957 'age_limit': 18,
958 'tags': [],
959 'like_count': int,
960 'repost_count': int,
961 'comment_count': int,
147e62fc 962 'view_count': int,
7543c9c9 963 },
b6795fd3
SS
964 }, {
965 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
966 'info_dict': {
967 'id': '1599108643743473680',
968 'display_id': '1599108751385972737',
969 'ext': 'mp4',
970 'title': '\u06ea - \U0001F48B',
971 'uploader_url': 'https://twitter.com/hlo_again',
972 'like_count': int,
973 'uploader_id': 'hlo_again',
974 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
975 'repost_count': int,
976 'duration': 9.531,
977 'comment_count': int,
147e62fc 978 'view_count': int,
b6795fd3
SS
979 'upload_date': '20221203',
980 'age_limit': 0,
981 'timestamp': 1670092210.0,
982 'tags': [],
983 'uploader': '\u06ea',
984 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
985 },
986 'params': {'noplaylist': True},
987 }, {
b6795fd3
SS
988 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
989 'info_dict': {
990 'id': '1600009362759733248',
991 'display_id': '1600009574919962625',
992 'ext': 'mp4',
993 'uploader_url': 'https://twitter.com/MunTheShinobi',
994 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
995 'view_count': int,
996 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
997 'age_limit': 0,
147e62fc 998 'uploader': 'Mün The Shinobi',
b6795fd3
SS
999 'repost_count': int,
1000 'upload_date': '20221206',
147e62fc 1001 'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
b6795fd3
SS
1002 'comment_count': int,
1003 'like_count': int,
1004 'tags': [],
1005 'uploader_id': 'MunTheShinobi',
1006 'duration': 139.987,
1007 'timestamp': 1670306984.0,
1008 },
cf605226 1009 }, {
147e62fc 1010 # url to retweet id, legacy API
cf605226 1011 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1012 'info_dict': {
1013 'id': '1623274794488659969',
1014 'display_id': '1623739803874349067',
1015 'ext': 'mp4',
1016 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1017 'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
1018 'uploader': 'Johnny Bullets',
1019 'uploader_id': 'Johnnybull3ts',
1020 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1021 'age_limit': 0,
1022 'tags': [],
1023 'duration': 8.033,
1024 'timestamp': 1675853859.0,
1025 'upload_date': '20230208',
1026 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1027 'like_count': int,
1028 'repost_count': int,
1029 'comment_count': int,
1030 },
147e62fc 1031 'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
82fb2357 1032 }, {
1033 # onion route
1034 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1035 'only_matching': True,
18ca61c5
RA
1036 }, {
1037 # Twitch Clip Embed
1038 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1039 'only_matching': True,
10a5091e
RA
1040 }, {
1041 # promo_video_website card
1042 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1043 'only_matching': True,
00dd0cd5 1044 }, {
1045 # promo_video_convo card
1046 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1047 'only_matching': True,
1048 }, {
1049 # appplayer card
1050 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1051 'only_matching': True,
30a074c2 1052 }, {
1053 # video_direct_message card
1054 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1055 'only_matching': True,
1056 }, {
1057 # poll2choice_video card
1058 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1059 'only_matching': True,
1060 }, {
1061 # poll3choice_video card
1062 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1063 'only_matching': True,
1064 }, {
1065 # poll4choice_video card
1066 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1067 'only_matching': True,
cf5881fc 1068 }]
f57f84f6 1069
7a26ce26
SS
1070 def _graphql_to_legacy(self, data, twid):
1071 result = traverse_obj(data, (
1072 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1073 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
7543c9c9 1074 'tweet_results', 'result', ('tweet', None),
7a26ce26
SS
1075 ), expected_type=dict, default={}, get_all=False)
1076
147e62fc 1077 if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
7543c9c9 1078 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
1079
7a26ce26 1080 if 'tombstone' in result:
147e62fc 1081 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1082 if cause and 'adult content' in cause:
1083 self.raise_login_required(cause)
7a26ce26
SS
1084 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1085
1086 status = result.get('legacy', {})
1087 status.update(traverse_obj(result, {
1088 'user': ('core', 'user_results', 'result', 'legacy'),
1089 'card': ('card', 'legacy'),
1090 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1091 }, expected_type=dict, default={}))
1092
1093 # extra transformation is needed since result does not match legacy format
1094 binding_values = {
1095 binding_value.get('key'): binding_value.get('value')
147e62fc 1096 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
7a26ce26
SS
1097 }
1098 if binding_values:
1099 status['card']['binding_values'] = binding_values
1100
1101 return status
1102
1103 def _build_graphql_query(self, media_id):
1104 return {
1105 'variables': {
1106 'focalTweetId': media_id,
1107 'includePromotedContent': True,
1108 'with_rux_injections': False,
1109 'withBirdwatchNotes': True,
1110 'withCommunity': True,
1111 'withDownvotePerspective': False,
1112 'withQuickPromoteEligibilityTweetFields': True,
1113 'withReactionsMetadata': False,
1114 'withReactionsPerspective': False,
1115 'withSuperFollowsTweetFields': True,
1116 'withSuperFollowsUserFields': True,
1117 'withV2Timeline': True,
1118 'withVoice': True,
1119 },
1120 'features': {
1121 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1122 'interactive_text_enabled': True,
1123 'responsive_web_edit_tweet_api_enabled': True,
1124 'responsive_web_enhance_cards_enabled': True,
1125 'responsive_web_graphql_timeline_navigation_enabled': False,
1126 'responsive_web_text_conversations_enabled': False,
1127 'responsive_web_uc_gql_enabled': True,
1128 'standardized_nudges_misinfo': True,
1129 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1130 'tweetypie_unmention_optimization_enabled': True,
1131 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1132 'verified_phone_label_enabled': False,
1133 'vibe_api_enabled': True,
1134 },
1135 }
1136
f57f84f6 1137 def _real_extract(self, url):
16bed382 1138 twid, selected_index = self._match_valid_url(url).group('id', 'index')
147e62fc 1139 if self._configuration_arg('legacy_api') and not self.is_logged_in:
cf605226 1140 status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
18ca61c5
RA
1141 'cards_platform': 'Web-12',
1142 'include_cards': 1,
1143 'include_reply_count': 1,
1144 'include_user_entities': 0,
1145 'tweet_mode': 'extended',
cf605226 1146 }), 'retweeted_status', None)
147e62fc 1147 else:
1148 result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
1149 status = self._graphql_to_legacy(result, twid)
575036b4 1150
18ca61c5 1151 title = description = status['full_text'].replace('\n', ' ')
575036b4 1152 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 1153 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
1154 user = status.get('user') or {}
1155 uploader = user.get('name')
1156 if uploader:
7a26ce26 1157 title = f'{uploader} - {title}'
18ca61c5
RA
1158 uploader_id = user.get('screen_name')
1159
cf5881fc 1160 info = {
18ca61c5
RA
1161 'id': twid,
1162 'title': title,
1163 'description': description,
1164 'uploader': uploader,
1165 'timestamp': unified_timestamp(status.get('created_at')),
1166 'uploader_id': uploader_id,
a70635b8 1167 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
1168 'like_count': int_or_none(status.get('favorite_count')),
1169 'repost_count': int_or_none(status.get('retweet_count')),
1170 'comment_count': int_or_none(status.get('reply_count')),
1171 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 1172 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 1173 }
cf5881fc 1174
30a074c2 1175 def extract_from_video_info(media):
13b2ae29
SS
1176 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1177 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
1178 video_info = media.get('video_info') or {}
1179
1180 formats = []
4bed4363 1181 subtitles = {}
18ca61c5 1182 for variant in video_info.get('variants', []):
4bed4363
F
1183 fmts, subs = self._extract_variant_formats(variant, twid)
1184 subtitles = self._merge_subtitles(subtitles, subs)
1185 formats.extend(fmts)
18ca61c5
RA
1186
1187 thumbnails = []
1188 media_url = media.get('media_url_https') or media.get('media_url')
1189 if media_url:
1190 def add_thumbnail(name, size):
1191 thumbnails.append({
1192 'id': name,
1193 'url': update_url_query(media_url, {'name': name}),
1194 'width': int_or_none(size.get('w') or size.get('width')),
1195 'height': int_or_none(size.get('h') or size.get('height')),
1196 })
1197 for name, size in media.get('sizes', {}).items():
1198 add_thumbnail(name, size)
1199 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1200
13b2ae29
SS
1201 return {
1202 'id': media_id,
18ca61c5 1203 'formats': formats,
4bed4363 1204 'subtitles': subtitles,
18ca61c5 1205 'thumbnails': thumbnails,
b6795fd3 1206 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
18ca61c5 1207 'duration': float_or_none(video_info.get('duration_millis'), 1000),
9f14daf2 1208 # The codec of http formats are unknown
1209 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
13b2ae29 1210 }
30a074c2 1211
13b2ae29
SS
1212 def extract_from_card_info(card):
1213 if not card:
1214 return
1215
1216 self.write_debug(f'Extracting from card info: {card.get("url")}')
1217 binding_values = card['binding_values']
1218
1219 def get_binding_value(k):
1220 o = binding_values.get(k) or {}
1221 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1222
1223 card_name = card['name'].split(':')[-1]
1224 if card_name == 'player':
7a26ce26 1225 yield {
13b2ae29
SS
1226 '_type': 'url',
1227 'url': get_binding_value('player_url'),
1228 }
1229 elif card_name == 'periscope_broadcast':
7a26ce26 1230 yield {
13b2ae29
SS
1231 '_type': 'url',
1232 'url': get_binding_value('url') or get_binding_value('player_url'),
1233 'ie_key': PeriscopeIE.ie_key(),
1234 }
1235 elif card_name == 'broadcast':
7a26ce26 1236 yield {
13b2ae29
SS
1237 '_type': 'url',
1238 'url': get_binding_value('broadcast_url'),
1239 'ie_key': TwitterBroadcastIE.ie_key(),
1240 }
7a26ce26
SS
1241 elif card_name == 'audiospace':
1242 yield {
1243 '_type': 'url',
1244 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1245 'ie_key': TwitterSpacesIE.ie_key(),
1246 }
13b2ae29 1247 elif card_name == 'summary':
7a26ce26 1248 yield {
18ca61c5 1249 '_type': 'url',
13b2ae29
SS
1250 'url': get_binding_value('card_url'),
1251 }
1252 elif card_name == 'unified_card':
7a26ce26
SS
1253 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1254 yield from map(extract_from_video_info, traverse_obj(
1255 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1256 # amplify, promo_video_website, promo_video_convo, appplayer,
1257 # video_direct_message, poll2choice_video, poll3choice_video,
1258 # poll4choice_video, ...
1259 else:
1260 is_amplify = card_name == 'amplify'
1261 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1262 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1263 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1264
1265 thumbnails = []
1266 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1267 image = get_binding_value('player_image' + suffix) or {}
1268 image_url = image.get('url')
1269 if not image_url or '/player-placeholder' in image_url:
1270 continue
1271 thumbnails.append({
1272 'id': suffix[1:] if suffix else 'medium',
1273 'url': image_url,
1274 'width': int_or_none(image.get('width')),
1275 'height': int_or_none(image.get('height')),
1276 })
1277
7a26ce26 1278 yield {
13b2ae29
SS
1279 'formats': formats,
1280 'subtitles': subtitles,
1281 'thumbnails': thumbnails,
1282 'duration': int_or_none(get_binding_value(
1283 'content_duration_seconds')),
1284 }
1285
b6795fd3
SS
1286 videos = traverse_obj(status, (
1287 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1288
b6795fd3
SS
1289 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1290 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1291 else:
1292 desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
1293 if not desired_obj:
1294 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1295 elif desired_obj.get('type') != 'video':
1296 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1297
1298 # Restore original archive id and video index in title
1299 for index, entry in enumerate(videos, 1):
1300 if entry.get('id') != desired_obj.get('id'):
1301 continue
1302 if index == 1:
1303 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1304 if len(videos) != 1:
1305 info['title'] += f' #{index}'
1306 break
1307
1308 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1309
1310 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1311 if not entries:
1312 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1313 if not expanded_url or expanded_url == url:
147e62fc 1314 self.raise_no_formats('No video could be found in this tweet', expected=True)
1315 return info
13b2ae29
SS
1316
1317 return self.url_result(expanded_url, display_id=twid, **info)
1318
1319 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1320
1321 if len(entries) == 1:
1322 return entries[0]
1323
1324 for index, entry in enumerate(entries, 1):
1325 entry['title'] += f' #{index}'
1326
1327 return self.playlist_result(entries, **info)
445d72b8
YCH
1328
1329
1330class TwitterAmplifyIE(TwitterBaseIE):
1331 IE_NAME = 'twitter:amplify'
25042f73 1332 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1333
1334 _TEST = {
1335 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1336 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1337 'info_dict': {
1338 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1339 'ext': 'mp4',
1340 'title': 'Twitter Video',
bdbf4ba4 1341 'thumbnail': 're:^https?://.*',
445d72b8 1342 },
7a26ce26 1343 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1344 }
1345
1346 def _real_extract(self, url):
1347 video_id = self._match_id(url)
1348 webpage = self._download_webpage(url, video_id)
1349
1350 vmap_url = self._html_search_meta(
1351 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1352 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1353
bdbf4ba4
YCH
1354 thumbnails = []
1355 thumbnail = self._html_search_meta(
1356 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1357
1358 def _find_dimension(target):
1359 w = int_or_none(self._html_search_meta(
1360 'twitter:%s:width' % target, webpage, fatal=False))
1361 h = int_or_none(self._html_search_meta(
1362 'twitter:%s:height' % target, webpage, fatal=False))
1363 return w, h
1364
1365 if thumbnail:
1366 thumbnail_w, thumbnail_h = _find_dimension('image')
1367 thumbnails.append({
1368 'url': thumbnail,
1369 'width': thumbnail_w,
1370 'height': thumbnail_h,
1371 })
1372
1373 video_w, video_h = _find_dimension('player')
9be31e77 1374 formats[0].update({
bdbf4ba4
YCH
1375 'width': video_w,
1376 'height': video_h,
9be31e77 1377 })
bdbf4ba4 1378
445d72b8
YCH
1379 return {
1380 'id': video_id,
1381 'title': 'Twitter Video',
bdbf4ba4
YCH
1382 'formats': formats,
1383 'thumbnails': thumbnails,
445d72b8 1384 }
18ca61c5
RA
1385
1386
1387class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1388 IE_NAME = 'twitter:broadcast'
1389 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1390
7b0b53ea
S
1391 _TEST = {
1392 # untitled Periscope video
1393 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1394 'info_dict': {
1395 'id': '1yNGaQLWpejGj',
1396 'ext': 'mp4',
1397 'title': 'Andrea May Sahouri - Periscope Broadcast',
1398 'uploader': 'Andrea May Sahouri',
1399 'uploader_id': '1PXEdBZWpGwKe',
7a26ce26
SS
1400 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1401 'view_count': int,
7b0b53ea
S
1402 },
1403 }
1404
18ca61c5
RA
1405 def _real_extract(self, url):
1406 broadcast_id = self._match_id(url)
1407 broadcast = self._call_api(
1408 'broadcasts/show.json', broadcast_id,
1409 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1410 info = self._parse_broadcast_data(broadcast, broadcast_id)
1411 media_key = broadcast['media_key']
1412 source = self._call_api(
7a26ce26 1413 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1414 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1415 if '/live_video_stream/geoblocked/' in m3u8_url:
1416 self.raise_geo_restricted()
1417 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1418 m3u8_url).query).get('type', [None])[0]
1419 state, width, height = self._extract_common_format_info(broadcast)
1420 info['formats'] = self._extract_pscp_m3u8_formats(
1421 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1422 return info
86b868c6
U
1423
1424
7a26ce26
SS
1425class TwitterSpacesIE(TwitterBaseIE):
1426 IE_NAME = 'twitter:spaces'
1427 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1428
1429 _TESTS = [{
1430 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1431 'info_dict': {
1432 'id': '1RDxlgyvNXzJL',
1433 'ext': 'm4a',
1434 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1435 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1436 'uploader': r're:Lucio Di Gaetano.*?',
1437 'uploader_id': 'luciodigaetano',
1438 'live_status': 'was_live',
1439 'timestamp': 1659877956397,
1440 },
1441 'params': {'skip_download': 'm3u8'},
1442 }]
1443
1444 SPACE_STATUS = {
1445 'notstarted': 'is_upcoming',
1446 'ended': 'was_live',
1447 'running': 'is_live',
1448 'timedout': 'post_live',
1449 }
1450
1451 def _build_graphql_query(self, space_id):
1452 return {
1453 'variables': {
1454 'id': space_id,
1455 'isMetatagsQuery': True,
1456 'withDownvotePerspective': False,
1457 'withReactionsMetadata': False,
1458 'withReactionsPerspective': False,
1459 'withReplays': True,
1460 'withSuperFollowsUserFields': True,
1461 'withSuperFollowsTweetFields': True,
1462 },
1463 'features': {
1464 'dont_mention_me_view_api_enabled': True,
1465 'interactive_text_enabled': True,
1466 'responsive_web_edit_tweet_api_enabled': True,
1467 'responsive_web_enhance_cards_enabled': True,
1468 'responsive_web_uc_gql_enabled': True,
1469 'spaces_2022_h2_clipping': True,
1470 'spaces_2022_h2_spaces_communities': False,
1471 'standardized_nudges_misinfo': True,
1472 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1473 'vibe_api_enabled': True,
1474 },
1475 }
1476
1477 def _real_extract(self, url):
1478 space_id = self._match_id(url)
1479 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1480 if not space_data:
1481 raise ExtractorError('Twitter Space not found', expected=True)
1482
1483 metadata = space_data['metadata']
1484 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1485
1486 formats = []
1487 if live_status == 'is_upcoming':
1488 self.raise_no_formats('Twitter Space not started yet', expected=True)
1489 elif live_status == 'post_live':
1490 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1491 else:
1492 source = self._call_api(
1493 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1494
1495 # XXX: Native downloader does not work
1496 formats = self._extract_m3u8_formats(
1497 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
9a0416c6 1498 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1499 headers={'Referer': 'https://twitter.com/'})
7a26ce26
SS
1500 for fmt in formats:
1501 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1502
1503 participants = ', '.join(traverse_obj(
1504 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1505 return {
1506 'id': space_id,
1507 'title': metadata.get('title'),
1508 'description': f'Twitter Space participated by {participants}',
1509 'uploader': traverse_obj(
1510 metadata, ('creator_results', 'result', 'legacy', 'name')),
1511 'uploader_id': traverse_obj(
1512 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1513 'live_status': live_status,
1c16d9df
C
1514 'release_timestamp': try_call(
1515 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
7a26ce26
SS
1516 'timestamp': metadata.get('created_at'),
1517 'formats': formats,
1518 }
1519
1520
86b868c6
U
1521class TwitterShortenerIE(TwitterBaseIE):
1522 IE_NAME = 'twitter:shortener'
a537ab1a
U
1523 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1524 _BASE_URL = 'https://t.co/'
86b868c6
U
1525
1526 def _real_extract(self, url):
5ad28e7f 1527 mobj = self._match_valid_url(url)
a537ab1a
U
1528 eid, id = mobj.group('eid', 'id')
1529 if eid:
1530 id = eid
1531 url = self._BASE_URL + id
1532 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1533 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1534 if new_url.startswith(__UNSAFE_LINK):
1535 new_url = new_url.replace(__UNSAFE_LINK, "")
9e20a9c4 1536 return self.url_result(new_url)