]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[extractor/twitter] Default to GraphQL, handle auth errors (#6957)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import json
2 import re
3
4 from .common import InfoExtractor
5 from .periscope import PeriscopeBaseIE, PeriscopeIE
6 from ..compat import functools # isort: split
7 from ..compat import (
8 compat_parse_qs,
9 compat_urllib_parse_unquote,
10 compat_urllib_parse_urlparse,
11 )
12 from ..utils import (
13 ExtractorError,
14 dict_get,
15 float_or_none,
16 format_field,
17 int_or_none,
18 make_archive_id,
19 remove_end,
20 str_or_none,
21 strip_or_none,
22 traverse_obj,
23 try_call,
24 try_get,
25 unified_timestamp,
26 update_url_query,
27 url_or_none,
28 xpath_text,
29 )
30
31
32 class TwitterBaseIE(InfoExtractor):
33 _API_BASE = 'https://api.twitter.com/1.1/'
34 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
35 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
36 _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
37 _guest_token = None
38
39 def _extract_variant_formats(self, variant, video_id):
40 variant_url = variant.get('url')
41 if not variant_url:
42 return [], {}
43 elif '.m3u8' in variant_url:
44 return self._extract_m3u8_formats_and_subtitles(
45 variant_url, video_id, 'mp4', 'm3u8_native',
46 m3u8_id='hls', fatal=False)
47 else:
48 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
49 f = {
50 'url': variant_url,
51 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
52 'tbr': tbr,
53 }
54 self._search_dimensions_in_video_url(f, variant_url)
55 return [f], {}
56
57 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
58 vmap_url = url_or_none(vmap_url)
59 if not vmap_url:
60 return [], {}
61 vmap_data = self._download_xml(vmap_url, video_id)
62 formats = []
63 subtitles = {}
64 urls = []
65 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
66 video_variant.attrib['url'] = compat_urllib_parse_unquote(
67 video_variant.attrib['url'])
68 urls.append(video_variant.attrib['url'])
69 fmts, subs = self._extract_variant_formats(
70 video_variant.attrib, video_id)
71 formats.extend(fmts)
72 subtitles = self._merge_subtitles(subtitles, subs)
73 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
74 if video_url not in urls:
75 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
76 formats.extend(fmts)
77 subtitles = self._merge_subtitles(subtitles, subs)
78 return formats, subtitles
79
80 @staticmethod
81 def _search_dimensions_in_video_url(a_format, video_url):
82 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
83 if m:
84 a_format.update({
85 'width': int(m.group('width')),
86 'height': int(m.group('height')),
87 })
88
89 @functools.cached_property
90 def is_logged_in(self):
91 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
92
93 def _call_api(self, path, video_id, query={}, graphql=False):
94 cookies = self._get_cookies(self._API_BASE)
95 headers = self._AUTH.copy()
96
97 csrf_cookie = cookies.get('ct0')
98 if csrf_cookie:
99 headers['x-csrf-token'] = csrf_cookie.value
100
101 if self.is_logged_in:
102 headers.update({
103 'x-twitter-auth-type': 'OAuth2Session',
104 'x-twitter-client-language': 'en',
105 'x-twitter-active-user': 'yes',
106 })
107
108 for first_attempt in (True, False):
109 if not self.is_logged_in and not self._guest_token:
110 headers.pop('x-guest-token', None)
111 self._guest_token = traverse_obj(self._download_json(
112 f'{self._API_BASE}guest/activate.json', video_id,
113 'Downloading guest token', data=b'', headers=headers), 'guest_token')
114 if self._guest_token:
115 headers['x-guest-token'] = self._guest_token
116 elif not self.is_logged_in:
117 raise ExtractorError('Could not retrieve guest token')
118
119 allowed_status = {400, 401, 403, 404} if graphql else {403}
120 result = self._download_json(
121 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
122 video_id, headers=headers, query=query, expected_status=allowed_status,
123 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
124
125 if result.get('errors'):
126 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
127 if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
128 self.to_screen('Guest token has expired. Refreshing guest token')
129 self._guest_token = None
130 continue
131
132 raise ExtractorError(
133 f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
134
135 return result
136
137 def _build_graphql_query(self, media_id):
138 raise NotImplementedError('Method must be implemented to support GraphQL')
139
140 def _call_graphql_api(self, endpoint, media_id):
141 data = self._build_graphql_query(media_id)
142 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
143 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
144
145
146 class TwitterCardIE(InfoExtractor):
147 IE_NAME = 'twitter:card'
148 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
149 _TESTS = [
150 {
151 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
152 # MD5 checksums are different in different places
153 'info_dict': {
154 'id': '560070131976392705',
155 'ext': 'mp4',
156 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
157 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
158 'uploader': 'Twitter',
159 'uploader_id': 'Twitter',
160 'thumbnail': r're:^https?://.*\.jpg',
161 'duration': 30.033,
162 'timestamp': 1422366112,
163 'upload_date': '20150127',
164 'age_limit': 0,
165 'comment_count': int,
166 'tags': [],
167 'repost_count': int,
168 'like_count': int,
169 'display_id': '560070183650213889',
170 'uploader_url': 'https://twitter.com/Twitter',
171 },
172 },
173 {
174 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
175 'md5': '7137eca597f72b9abbe61e5ae0161399',
176 'info_dict': {
177 'id': '623160978427936768',
178 'ext': 'mp4',
179 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
180 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
181 'uploader': 'NASA',
182 'uploader_id': 'NASA',
183 'timestamp': 1437408129,
184 'upload_date': '20150720',
185 'uploader_url': 'https://twitter.com/NASA',
186 'age_limit': 0,
187 'comment_count': int,
188 'like_count': int,
189 'repost_count': int,
190 'tags': ['PlutoFlyby'],
191 },
192 'params': {'format': '[protocol=https]'}
193 },
194 {
195 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
196 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
197 'info_dict': {
198 'id': 'dq4Oj5quskI',
199 'ext': 'mp4',
200 'title': 'Ubuntu 11.10 Overview',
201 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
202 'upload_date': '20111013',
203 'uploader': 'OMG! UBUNTU!',
204 'uploader_id': 'omgubuntu',
205 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
206 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
207 'channel_follower_count': int,
208 'chapters': 'count:8',
209 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
210 'duration': 138,
211 'categories': ['Film & Animation'],
212 'age_limit': 0,
213 'comment_count': int,
214 'availability': 'public',
215 'like_count': int,
216 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
217 'view_count': int,
218 'tags': 'count:12',
219 'channel': 'OMG! UBUNTU!',
220 'playable_in_embed': True,
221 },
222 'add_ie': ['Youtube'],
223 },
224 {
225 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
226 'info_dict': {
227 'id': 'iBb2x00UVlv',
228 'ext': 'mp4',
229 'upload_date': '20151113',
230 'uploader_id': '1189339351084113920',
231 'uploader': 'ArsenalTerje',
232 'title': 'Vine by ArsenalTerje',
233 'timestamp': 1447451307,
234 'alt_title': 'Vine by ArsenalTerje',
235 'comment_count': int,
236 'like_count': int,
237 'thumbnail': r're:^https?://[^?#]+\.jpg',
238 'view_count': int,
239 'repost_count': int,
240 },
241 'add_ie': ['Vine'],
242 'params': {'skip_download': 'm3u8'},
243 },
244 {
245 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
246 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
247 'info_dict': {
248 'id': '705235433198714880',
249 'ext': 'mp4',
250 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
251 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
252 'uploader': 'Brent Yarina',
253 'uploader_id': 'BTNBrentYarina',
254 'timestamp': 1456976204,
255 'upload_date': '20160303',
256 },
257 'skip': 'This content is no longer available.',
258 },
259 {
260 'url': 'https://twitter.com/i/videos/752274308186120192',
261 'only_matching': True,
262 },
263 ]
264
265 def _real_extract(self, url):
266 status_id = self._match_id(url)
267 return self.url_result(
268 'https://twitter.com/statuses/' + status_id,
269 TwitterIE.ie_key(), status_id)
270
271
272 class TwitterIE(TwitterBaseIE):
273 IE_NAME = 'twitter'
274 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
275
276 _TESTS = [{
277 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
278 'info_dict': {
279 'id': '643211870443208704',
280 'display_id': '643211948184596480',
281 'ext': 'mp4',
282 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
283 'thumbnail': r're:^https?://.*\.jpg',
284 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
285 'uploader': 'FREE THE NIPPLE',
286 'uploader_id': 'freethenipple',
287 'duration': 12.922,
288 'timestamp': 1442188653,
289 'upload_date': '20150913',
290 'uploader_url': 'https://twitter.com/freethenipple',
291 'comment_count': int,
292 'repost_count': int,
293 'like_count': int,
294 'view_count': int,
295 'tags': [],
296 'age_limit': 18,
297 },
298 }, {
299 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
300 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
301 'info_dict': {
302 'id': '657991469417025536',
303 'ext': 'mp4',
304 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
305 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
306 'thumbnail': r're:^https?://.*\.png',
307 'uploader': 'Gifs',
308 'uploader_id': 'giphz',
309 },
310 'expected_warnings': ['height', 'width'],
311 'skip': 'Account suspended',
312 }, {
313 'url': 'https://twitter.com/starwars/status/665052190608723968',
314 'info_dict': {
315 'id': '665052190608723968',
316 'display_id': '665052190608723968',
317 'ext': 'mp4',
318 'title': r're:Star Wars.*A new beginning is coming December 18.*',
319 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
320 'uploader_id': 'starwars',
321 'uploader': r're:Star Wars.*',
322 'timestamp': 1447395772,
323 'upload_date': '20151113',
324 'uploader_url': 'https://twitter.com/starwars',
325 'comment_count': int,
326 'repost_count': int,
327 'like_count': int,
328 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
329 'age_limit': 0,
330 },
331 }, {
332 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
333 'info_dict': {
334 'id': '705235433198714880',
335 'ext': 'mp4',
336 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
337 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
338 'uploader_id': 'BTNBrentYarina',
339 'uploader': 'Brent Yarina',
340 'timestamp': 1456976204,
341 'upload_date': '20160303',
342 'uploader_url': 'https://twitter.com/BTNBrentYarina',
343 'comment_count': int,
344 'repost_count': int,
345 'like_count': int,
346 'tags': [],
347 'age_limit': 0,
348 },
349 'params': {
350 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
351 # Test case of TwitterCardIE
352 'skip_download': True,
353 },
354 'skip': 'Dead external link',
355 }, {
356 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
357 'info_dict': {
358 'id': '700207414000242688',
359 'display_id': '700207533655363584',
360 'ext': 'mp4',
361 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
362 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
363 'thumbnail': r're:^https?://.*\.jpg',
364 'uploader': 'jaydin donte geer',
365 'uploader_id': 'jaydingeer',
366 'duration': 30.0,
367 'timestamp': 1455777459,
368 'upload_date': '20160218',
369 'uploader_url': 'https://twitter.com/jaydingeer',
370 'comment_count': int,
371 'repost_count': int,
372 'like_count': int,
373 'view_count': int,
374 'tags': ['Damndaniel'],
375 'age_limit': 0,
376 },
377 }, {
378 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
379 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
380 'info_dict': {
381 'id': 'MIOxnrUteUd',
382 'ext': 'mp4',
383 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
384 'uploader': 'TAKUMA',
385 'uploader_id': '1004126642786242560',
386 'timestamp': 1402826626,
387 'upload_date': '20140615',
388 'thumbnail': r're:^https?://.*\.jpg',
389 'alt_title': 'Vine by TAKUMA',
390 'comment_count': int,
391 'repost_count': int,
392 'like_count': int,
393 'view_count': int,
394 },
395 'add_ie': ['Vine'],
396 }, {
397 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
398 'info_dict': {
399 'id': '717462543795523584',
400 'display_id': '719944021058060289',
401 'ext': 'mp4',
402 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
403 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
404 'uploader_id': 'CaptainAmerica',
405 'uploader': 'Captain America',
406 'duration': 3.17,
407 'timestamp': 1460483005,
408 'upload_date': '20160412',
409 'uploader_url': 'https://twitter.com/CaptainAmerica',
410 'thumbnail': r're:^https?://.*\.jpg',
411 'comment_count': int,
412 'repost_count': int,
413 'like_count': int,
414 'view_count': int,
415 'tags': [],
416 'age_limit': 0,
417 },
418 }, {
419 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
420 'info_dict': {
421 'id': '1zqKVVlkqLaKB',
422 'ext': 'mp4',
423 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
424 'upload_date': '20160923',
425 'uploader_id': '1PmKqpJdOJQoY',
426 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
427 'timestamp': 1474613214,
428 'thumbnail': r're:^https?://.*\.jpg',
429 },
430 'add_ie': ['Periscope'],
431 }, {
432 # has mp4 formats via mobile API
433 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
434 'info_dict': {
435 'id': '852138619213144067',
436 'ext': 'mp4',
437 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
438 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
439 'uploader': 'عالم الأخبار',
440 'uploader_id': 'news_al3alm',
441 'duration': 277.4,
442 'timestamp': 1492000653,
443 'upload_date': '20170412',
444 },
445 'skip': 'Account suspended',
446 }, {
447 'url': 'https://twitter.com/i/web/status/910031516746514432',
448 'info_dict': {
449 'id': '910030238373089285',
450 'display_id': '910031516746514432',
451 'ext': 'mp4',
452 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
453 'thumbnail': r're:^https?://.*\.jpg',
454 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
455 'uploader': 'Préfet de Guadeloupe',
456 'uploader_id': 'Prefet971',
457 'duration': 47.48,
458 'timestamp': 1505803395,
459 'upload_date': '20170919',
460 'uploader_url': 'https://twitter.com/Prefet971',
461 'comment_count': int,
462 'repost_count': int,
463 'like_count': int,
464 'view_count': int,
465 'tags': ['Maria'],
466 'age_limit': 0,
467 },
468 'params': {
469 'skip_download': True, # requires ffmpeg
470 },
471 }, {
472 # card via api.twitter.com/1.1/videos/tweet/config
473 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
474 'info_dict': {
475 'id': '1001551417340022785',
476 'display_id': '1001551623938805763',
477 'ext': 'mp4',
478 'title': 're:.*?Shep is on a roll today.*?',
479 'thumbnail': r're:^https?://.*\.jpg',
480 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
481 'uploader': 'Lis Power',
482 'uploader_id': 'LisPower1',
483 'duration': 111.278,
484 'timestamp': 1527623489,
485 'upload_date': '20180529',
486 'uploader_url': 'https://twitter.com/LisPower1',
487 'comment_count': int,
488 'repost_count': int,
489 'like_count': int,
490 'view_count': int,
491 'tags': [],
492 'age_limit': 0,
493 },
494 'params': {
495 'skip_download': True, # requires ffmpeg
496 },
497 }, {
498 'url': 'https://twitter.com/foobar/status/1087791357756956680',
499 'info_dict': {
500 'id': '1087791272830607360',
501 'display_id': '1087791357756956680',
502 'ext': 'mp4',
503 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
504 'thumbnail': r're:^https?://.*\.jpg',
505 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
506 'uploader': 'Twitter',
507 'uploader_id': 'Twitter',
508 'duration': 61.567,
509 'timestamp': 1548184644,
510 'upload_date': '20190122',
511 'uploader_url': 'https://twitter.com/Twitter',
512 'comment_count': int,
513 'repost_count': int,
514 'like_count': int,
515 'view_count': int,
516 'tags': [],
517 'age_limit': 0,
518 },
519 }, {
520 # not available in Periscope
521 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
522 'info_dict': {
523 'id': '1vOGwqejwoWxB',
524 'ext': 'mp4',
525 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
526 'uploader': 'Vivi',
527 'uploader_id': '1eVjYOLGkGrQL',
528 'thumbnail': r're:^https?://.*\.jpg',
529 'tags': ['EduTECH2019'],
530 'view_count': int,
531 },
532 'add_ie': ['TwitterBroadcast'],
533 }, {
534 # unified card
535 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
536 'info_dict': {
537 'id': '1349774757969989634',
538 'display_id': '1349794411333394432',
539 'ext': 'mp4',
540 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
541 'thumbnail': r're:^https?://.*\.jpg',
542 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
543 'uploader': 'Brooklyn Nets',
544 'uploader_id': 'BrooklynNets',
545 'duration': 324.484,
546 'timestamp': 1610651040,
547 'upload_date': '20210114',
548 'uploader_url': 'https://twitter.com/BrooklynNets',
549 'comment_count': int,
550 'repost_count': int,
551 'like_count': int,
552 'tags': [],
553 'age_limit': 0,
554 },
555 'params': {
556 'skip_download': True,
557 },
558 }, {
559 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
560 'info_dict': {
561 'id': '1577855447914409984',
562 'display_id': '1577855540407197696',
563 'ext': 'mp4',
564 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
565 'description': 'md5:b9c3699335447391d11753ab21c70a74',
566 'upload_date': '20221006',
567 'uploader': 'oshtru',
568 'uploader_id': 'oshtru',
569 'uploader_url': 'https://twitter.com/oshtru',
570 'thumbnail': r're:^https?://.*\.jpg',
571 'duration': 30.03,
572 'timestamp': 1665025050,
573 'comment_count': int,
574 'repost_count': int,
575 'like_count': int,
576 'view_count': int,
577 'tags': [],
578 'age_limit': 0,
579 },
580 'params': {'skip_download': True},
581 }, {
582 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
583 'info_dict': {
584 'id': '1577719286659006464',
585 'title': 'Ultima | #\u0432\u029f\u043c - Test',
586 'description': 'Test https://t.co/Y3KEZD7Dad',
587 'uploader': 'Ultima | #\u0432\u029f\u043c',
588 'uploader_id': 'UltimaShadowX',
589 'uploader_url': 'https://twitter.com/UltimaShadowX',
590 'upload_date': '20221005',
591 'timestamp': 1664992565,
592 'comment_count': int,
593 'repost_count': int,
594 'like_count': int,
595 'tags': [],
596 'age_limit': 0,
597 },
598 'playlist_count': 4,
599 'params': {'skip_download': True},
600 }, {
601 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
602 'info_dict': {
603 'id': '1575559336759263233',
604 'display_id': '1575560063510810624',
605 'ext': 'mp4',
606 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
607 'thumbnail': r're:^https?://.*\.jpg',
608 'description': 'md5:95aea692fda36a12081b9629b02daa92',
609 'uploader': 'Max Olson',
610 'uploader_id': 'MesoMax919',
611 'uploader_url': 'https://twitter.com/MesoMax919',
612 'duration': 21.321,
613 'timestamp': 1664477766,
614 'upload_date': '20220929',
615 'comment_count': int,
616 'repost_count': int,
617 'like_count': int,
618 'view_count': int,
619 'tags': ['HurricaneIan'],
620 'age_limit': 0,
621 },
622 }, {
623 # Adult content, fails if not logged in (GraphQL)
624 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
625 'info_dict': {
626 'id': '1575199163847000068',
627 'display_id': '1575199173472927762',
628 'ext': 'mp4',
629 'title': str,
630 'description': str,
631 'uploader': str,
632 'uploader_id': 'Rizdraws',
633 'uploader_url': 'https://twitter.com/Rizdraws',
634 'upload_date': '20220928',
635 'timestamp': 1664391723,
636 'thumbnail': r're:^https?://.+\.jpg',
637 'like_count': int,
638 'repost_count': int,
639 'comment_count': int,
640 'age_limit': 18,
641 'tags': []
642 },
643 'skip': 'Requires authentication',
644 }, {
645 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
646 'playlist_mincount': 2,
647 'info_dict': {
648 'id': '1395079556562706435',
649 'title': str,
650 'tags': [],
651 'uploader': str,
652 'like_count': int,
653 'upload_date': '20210519',
654 'age_limit': 0,
655 'repost_count': int,
656 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
657 'uploader_id': 'Srirachachau',
658 'comment_count': int,
659 'uploader_url': 'https://twitter.com/Srirachachau',
660 'timestamp': 1621447860,
661 },
662 }, {
663 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
664 'playlist_mincount': 2,
665 'info_dict': {
666 'id': '1578353380363501568',
667 'title': str,
668 'uploader_id': 'DavidToons_',
669 'repost_count': int,
670 'like_count': int,
671 'uploader': str,
672 'timestamp': 1665143744,
673 'uploader_url': 'https://twitter.com/DavidToons_',
674 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
675 'tags': [],
676 'comment_count': int,
677 'upload_date': '20221007',
678 'age_limit': 0,
679 },
680 }, {
681 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
682 'playlist_count': 2,
683 'info_dict': {
684 'id': '1578401165338976258',
685 'title': str,
686 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
687 'uploader': str,
688 'uploader_id': 'primevideouk',
689 'timestamp': 1665155137,
690 'upload_date': '20221007',
691 'age_limit': 0,
692 'uploader_url': 'https://twitter.com/primevideouk',
693 'comment_count': int,
694 'repost_count': int,
695 'like_count': int,
696 'tags': ['TheRingsOfPower'],
697 },
698 }, {
699 # Twitter Spaces
700 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
701 'info_dict': {
702 'id': '1lPJqmBeeNAJb',
703 'ext': 'm4a',
704 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
705 'uploader': r're:Monique Camarra.+?',
706 'uploader_id': 'MoniqueCamarra',
707 'live_status': 'was_live',
708 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
709 'timestamp': 1658407771464,
710 },
711 'add_ie': ['TwitterSpaces'],
712 'params': {'skip_download': 'm3u8'},
713 }, {
714 # URL specifies video number but --yes-playlist
715 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
716 'playlist_mincount': 2,
717 'info_dict': {
718 'id': '1600649710662213632',
719 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
720 'timestamp': 1670459604.0,
721 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
722 'comment_count': int,
723 'uploader_id': 'CTVJLaidlaw',
724 'repost_count': int,
725 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
726 'upload_date': '20221208',
727 'age_limit': 0,
728 'uploader': 'Jocelyn Laidlaw',
729 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
730 'like_count': int,
731 },
732 }, {
733 # URL specifies video number and --no-playlist
734 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
735 'info_dict': {
736 'id': '1600649511827013632',
737 'ext': 'mp4',
738 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
739 'thumbnail': r're:^https?://.+\.jpg',
740 'timestamp': 1670459604.0,
741 'uploader_id': 'CTVJLaidlaw',
742 'uploader': 'Jocelyn Laidlaw',
743 'repost_count': int,
744 'comment_count': int,
745 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
746 'duration': 102.226,
747 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
748 'display_id': '1600649710662213632',
749 'like_count': int,
750 'view_count': int,
751 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
752 'upload_date': '20221208',
753 'age_limit': 0,
754 },
755 'params': {'noplaylist': True},
756 }, {
757 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
758 # note the id different between extraction and url
759 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
760 'info_dict': {
761 'id': '1621117577354424321',
762 'display_id': '1621117700482416640',
763 'ext': 'mp4',
764 'title': '뽀 - 아 최우제 이동속도 봐',
765 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
766 'duration': 24.598,
767 'uploader': '뽀',
768 'uploader_id': 's2FAKER',
769 'uploader_url': 'https://twitter.com/s2FAKER',
770 'upload_date': '20230202',
771 'timestamp': 1675339553.0,
772 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
773 'age_limit': 18,
774 'tags': [],
775 'like_count': int,
776 'repost_count': int,
777 'comment_count': int,
778 'view_count': int,
779 },
780 }, {
781 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
782 'info_dict': {
783 'id': '1599108643743473680',
784 'display_id': '1599108751385972737',
785 'ext': 'mp4',
786 'title': '\u06ea - \U0001F48B',
787 'uploader_url': 'https://twitter.com/hlo_again',
788 'like_count': int,
789 'uploader_id': 'hlo_again',
790 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
791 'repost_count': int,
792 'duration': 9.531,
793 'comment_count': int,
794 'view_count': int,
795 'upload_date': '20221203',
796 'age_limit': 0,
797 'timestamp': 1670092210.0,
798 'tags': [],
799 'uploader': '\u06ea',
800 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
801 },
802 'params': {'noplaylist': True},
803 }, {
804 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
805 'info_dict': {
806 'id': '1600009362759733248',
807 'display_id': '1600009574919962625',
808 'ext': 'mp4',
809 'uploader_url': 'https://twitter.com/MunTheShinobi',
810 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
811 'view_count': int,
812 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
813 'age_limit': 0,
814 'uploader': 'Mün The Shinobi',
815 'repost_count': int,
816 'upload_date': '20221206',
817 'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
818 'comment_count': int,
819 'like_count': int,
820 'tags': [],
821 'uploader_id': 'MunTheShinobi',
822 'duration': 139.987,
823 'timestamp': 1670306984.0,
824 },
825 }, {
826 # url to retweet id, legacy API
827 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
828 'info_dict': {
829 'id': '1623274794488659969',
830 'display_id': '1623739803874349067',
831 'ext': 'mp4',
832 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
833 'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
834 'uploader': 'Johnny Bullets',
835 'uploader_id': 'Johnnybull3ts',
836 'uploader_url': 'https://twitter.com/Johnnybull3ts',
837 'age_limit': 0,
838 'tags': [],
839 'duration': 8.033,
840 'timestamp': 1675853859.0,
841 'upload_date': '20230208',
842 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
843 'like_count': int,
844 'repost_count': int,
845 'comment_count': int,
846 },
847 'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
848 }, {
849 # onion route
850 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
851 'only_matching': True,
852 }, {
853 # Twitch Clip Embed
854 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
855 'only_matching': True,
856 }, {
857 # promo_video_website card
858 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
859 'only_matching': True,
860 }, {
861 # promo_video_convo card
862 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
863 'only_matching': True,
864 }, {
865 # appplayer card
866 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
867 'only_matching': True,
868 }, {
869 # video_direct_message card
870 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
871 'only_matching': True,
872 }, {
873 # poll2choice_video card
874 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
875 'only_matching': True,
876 }, {
877 # poll3choice_video card
878 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
879 'only_matching': True,
880 }, {
881 # poll4choice_video card
882 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
883 'only_matching': True,
884 }]
885
886 def _graphql_to_legacy(self, data, twid):
887 result = traverse_obj(data, (
888 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
889 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
890 'tweet_results', 'result', ('tweet', None),
891 ), expected_type=dict, default={}, get_all=False)
892
893 if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
894 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
895
896 if 'tombstone' in result:
897 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
898 if cause and 'adult content' in cause:
899 self.raise_login_required(cause)
900 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
901
902 status = result.get('legacy', {})
903 status.update(traverse_obj(result, {
904 'user': ('core', 'user_results', 'result', 'legacy'),
905 'card': ('card', 'legacy'),
906 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
907 }, expected_type=dict, default={}))
908
909 # extra transformation is needed since result does not match legacy format
910 binding_values = {
911 binding_value.get('key'): binding_value.get('value')
912 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
913 }
914 if binding_values:
915 status['card']['binding_values'] = binding_values
916
917 return status
918
919 def _build_graphql_query(self, media_id):
920 return {
921 'variables': {
922 'focalTweetId': media_id,
923 'includePromotedContent': True,
924 'with_rux_injections': False,
925 'withBirdwatchNotes': True,
926 'withCommunity': True,
927 'withDownvotePerspective': False,
928 'withQuickPromoteEligibilityTweetFields': True,
929 'withReactionsMetadata': False,
930 'withReactionsPerspective': False,
931 'withSuperFollowsTweetFields': True,
932 'withSuperFollowsUserFields': True,
933 'withV2Timeline': True,
934 'withVoice': True,
935 },
936 'features': {
937 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
938 'interactive_text_enabled': True,
939 'responsive_web_edit_tweet_api_enabled': True,
940 'responsive_web_enhance_cards_enabled': True,
941 'responsive_web_graphql_timeline_navigation_enabled': False,
942 'responsive_web_text_conversations_enabled': False,
943 'responsive_web_uc_gql_enabled': True,
944 'standardized_nudges_misinfo': True,
945 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
946 'tweetypie_unmention_optimization_enabled': True,
947 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
948 'verified_phone_label_enabled': False,
949 'vibe_api_enabled': True,
950 },
951 }
952
953 def _real_extract(self, url):
954 twid, selected_index = self._match_valid_url(url).group('id', 'index')
955 if self._configuration_arg('legacy_api') and not self.is_logged_in:
956 status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
957 'cards_platform': 'Web-12',
958 'include_cards': 1,
959 'include_reply_count': 1,
960 'include_user_entities': 0,
961 'tweet_mode': 'extended',
962 }), 'retweeted_status', None)
963 else:
964 result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
965 status = self._graphql_to_legacy(result, twid)
966
967 title = description = status['full_text'].replace('\n', ' ')
968 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
969 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
970 user = status.get('user') or {}
971 uploader = user.get('name')
972 if uploader:
973 title = f'{uploader} - {title}'
974 uploader_id = user.get('screen_name')
975
976 info = {
977 'id': twid,
978 'title': title,
979 'description': description,
980 'uploader': uploader,
981 'timestamp': unified_timestamp(status.get('created_at')),
982 'uploader_id': uploader_id,
983 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
984 'like_count': int_or_none(status.get('favorite_count')),
985 'repost_count': int_or_none(status.get('retweet_count')),
986 'comment_count': int_or_none(status.get('reply_count')),
987 'age_limit': 18 if status.get('possibly_sensitive') else 0,
988 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
989 }
990
991 def extract_from_video_info(media):
992 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
993 self.write_debug(f'Extracting from video info: {media_id}')
994 video_info = media.get('video_info') or {}
995
996 formats = []
997 subtitles = {}
998 for variant in video_info.get('variants', []):
999 fmts, subs = self._extract_variant_formats(variant, twid)
1000 subtitles = self._merge_subtitles(subtitles, subs)
1001 formats.extend(fmts)
1002
1003 thumbnails = []
1004 media_url = media.get('media_url_https') or media.get('media_url')
1005 if media_url:
1006 def add_thumbnail(name, size):
1007 thumbnails.append({
1008 'id': name,
1009 'url': update_url_query(media_url, {'name': name}),
1010 'width': int_or_none(size.get('w') or size.get('width')),
1011 'height': int_or_none(size.get('h') or size.get('height')),
1012 })
1013 for name, size in media.get('sizes', {}).items():
1014 add_thumbnail(name, size)
1015 add_thumbnail('orig', media.get('original_info') or {})
1016
1017 return {
1018 'id': media_id,
1019 'formats': formats,
1020 'subtitles': subtitles,
1021 'thumbnails': thumbnails,
1022 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1023 'duration': float_or_none(video_info.get('duration_millis'), 1000),
1024 # The codec of http formats are unknown
1025 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1026 }
1027
1028 def extract_from_card_info(card):
1029 if not card:
1030 return
1031
1032 self.write_debug(f'Extracting from card info: {card.get("url")}')
1033 binding_values = card['binding_values']
1034
1035 def get_binding_value(k):
1036 o = binding_values.get(k) or {}
1037 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1038
1039 card_name = card['name'].split(':')[-1]
1040 if card_name == 'player':
1041 yield {
1042 '_type': 'url',
1043 'url': get_binding_value('player_url'),
1044 }
1045 elif card_name == 'periscope_broadcast':
1046 yield {
1047 '_type': 'url',
1048 'url': get_binding_value('url') or get_binding_value('player_url'),
1049 'ie_key': PeriscopeIE.ie_key(),
1050 }
1051 elif card_name == 'broadcast':
1052 yield {
1053 '_type': 'url',
1054 'url': get_binding_value('broadcast_url'),
1055 'ie_key': TwitterBroadcastIE.ie_key(),
1056 }
1057 elif card_name == 'audiospace':
1058 yield {
1059 '_type': 'url',
1060 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1061 'ie_key': TwitterSpacesIE.ie_key(),
1062 }
1063 elif card_name == 'summary':
1064 yield {
1065 '_type': 'url',
1066 'url': get_binding_value('card_url'),
1067 }
1068 elif card_name == 'unified_card':
1069 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1070 yield from map(extract_from_video_info, traverse_obj(
1071 unified_card, ('media_entities', ...), expected_type=dict))
1072 # amplify, promo_video_website, promo_video_convo, appplayer,
1073 # video_direct_message, poll2choice_video, poll3choice_video,
1074 # poll4choice_video, ...
1075 else:
1076 is_amplify = card_name == 'amplify'
1077 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1078 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1079 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1080
1081 thumbnails = []
1082 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1083 image = get_binding_value('player_image' + suffix) or {}
1084 image_url = image.get('url')
1085 if not image_url or '/player-placeholder' in image_url:
1086 continue
1087 thumbnails.append({
1088 'id': suffix[1:] if suffix else 'medium',
1089 'url': image_url,
1090 'width': int_or_none(image.get('width')),
1091 'height': int_or_none(image.get('height')),
1092 })
1093
1094 yield {
1095 'formats': formats,
1096 'subtitles': subtitles,
1097 'thumbnails': thumbnails,
1098 'duration': int_or_none(get_binding_value(
1099 'content_duration_seconds')),
1100 }
1101
1102 videos = traverse_obj(status, (
1103 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1104
1105 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1106 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1107 else:
1108 desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
1109 if not desired_obj:
1110 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1111 elif desired_obj.get('type') != 'video':
1112 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1113
1114 # Restore original archive id and video index in title
1115 for index, entry in enumerate(videos, 1):
1116 if entry.get('id') != desired_obj.get('id'):
1117 continue
1118 if index == 1:
1119 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1120 if len(videos) != 1:
1121 info['title'] += f' #{index}'
1122 break
1123
1124 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1125
1126 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1127 if not entries:
1128 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1129 if not expanded_url or expanded_url == url:
1130 self.raise_no_formats('No video could be found in this tweet', expected=True)
1131 return info
1132
1133 return self.url_result(expanded_url, display_id=twid, **info)
1134
1135 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1136
1137 if len(entries) == 1:
1138 return entries[0]
1139
1140 for index, entry in enumerate(entries, 1):
1141 entry['title'] += f' #{index}'
1142
1143 return self.playlist_result(entries, **info)
1144
1145
1146 class TwitterAmplifyIE(TwitterBaseIE):
1147 IE_NAME = 'twitter:amplify'
1148 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1149
1150 _TEST = {
1151 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1152 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1153 'info_dict': {
1154 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1155 'ext': 'mp4',
1156 'title': 'Twitter Video',
1157 'thumbnail': 're:^https?://.*',
1158 },
1159 'params': {'format': '[protocol=https]'},
1160 }
1161
1162 def _real_extract(self, url):
1163 video_id = self._match_id(url)
1164 webpage = self._download_webpage(url, video_id)
1165
1166 vmap_url = self._html_search_meta(
1167 'twitter:amplify:vmap', webpage, 'vmap url')
1168 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1169
1170 thumbnails = []
1171 thumbnail = self._html_search_meta(
1172 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1173
1174 def _find_dimension(target):
1175 w = int_or_none(self._html_search_meta(
1176 'twitter:%s:width' % target, webpage, fatal=False))
1177 h = int_or_none(self._html_search_meta(
1178 'twitter:%s:height' % target, webpage, fatal=False))
1179 return w, h
1180
1181 if thumbnail:
1182 thumbnail_w, thumbnail_h = _find_dimension('image')
1183 thumbnails.append({
1184 'url': thumbnail,
1185 'width': thumbnail_w,
1186 'height': thumbnail_h,
1187 })
1188
1189 video_w, video_h = _find_dimension('player')
1190 formats[0].update({
1191 'width': video_w,
1192 'height': video_h,
1193 })
1194
1195 return {
1196 'id': video_id,
1197 'title': 'Twitter Video',
1198 'formats': formats,
1199 'thumbnails': thumbnails,
1200 }
1201
1202
1203 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1204 IE_NAME = 'twitter:broadcast'
1205 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1206
1207 _TEST = {
1208 # untitled Periscope video
1209 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1210 'info_dict': {
1211 'id': '1yNGaQLWpejGj',
1212 'ext': 'mp4',
1213 'title': 'Andrea May Sahouri - Periscope Broadcast',
1214 'uploader': 'Andrea May Sahouri',
1215 'uploader_id': '1PXEdBZWpGwKe',
1216 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1217 'view_count': int,
1218 },
1219 }
1220
1221 def _real_extract(self, url):
1222 broadcast_id = self._match_id(url)
1223 broadcast = self._call_api(
1224 'broadcasts/show.json', broadcast_id,
1225 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1226 info = self._parse_broadcast_data(broadcast, broadcast_id)
1227 media_key = broadcast['media_key']
1228 source = self._call_api(
1229 f'live_video_stream/status/{media_key}', media_key)['source']
1230 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1231 if '/live_video_stream/geoblocked/' in m3u8_url:
1232 self.raise_geo_restricted()
1233 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1234 m3u8_url).query).get('type', [None])[0]
1235 state, width, height = self._extract_common_format_info(broadcast)
1236 info['formats'] = self._extract_pscp_m3u8_formats(
1237 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1238 return info
1239
1240
1241 class TwitterSpacesIE(TwitterBaseIE):
1242 IE_NAME = 'twitter:spaces'
1243 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1244
1245 _TESTS = [{
1246 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1247 'info_dict': {
1248 'id': '1RDxlgyvNXzJL',
1249 'ext': 'm4a',
1250 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1251 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1252 'uploader': r're:Lucio Di Gaetano.*?',
1253 'uploader_id': 'luciodigaetano',
1254 'live_status': 'was_live',
1255 'timestamp': 1659877956397,
1256 },
1257 'params': {'skip_download': 'm3u8'},
1258 }]
1259
1260 SPACE_STATUS = {
1261 'notstarted': 'is_upcoming',
1262 'ended': 'was_live',
1263 'running': 'is_live',
1264 'timedout': 'post_live',
1265 }
1266
1267 def _build_graphql_query(self, space_id):
1268 return {
1269 'variables': {
1270 'id': space_id,
1271 'isMetatagsQuery': True,
1272 'withDownvotePerspective': False,
1273 'withReactionsMetadata': False,
1274 'withReactionsPerspective': False,
1275 'withReplays': True,
1276 'withSuperFollowsUserFields': True,
1277 'withSuperFollowsTweetFields': True,
1278 },
1279 'features': {
1280 'dont_mention_me_view_api_enabled': True,
1281 'interactive_text_enabled': True,
1282 'responsive_web_edit_tweet_api_enabled': True,
1283 'responsive_web_enhance_cards_enabled': True,
1284 'responsive_web_uc_gql_enabled': True,
1285 'spaces_2022_h2_clipping': True,
1286 'spaces_2022_h2_spaces_communities': False,
1287 'standardized_nudges_misinfo': True,
1288 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1289 'vibe_api_enabled': True,
1290 },
1291 }
1292
1293 def _real_extract(self, url):
1294 space_id = self._match_id(url)
1295 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1296 if not space_data:
1297 raise ExtractorError('Twitter Space not found', expected=True)
1298
1299 metadata = space_data['metadata']
1300 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1301
1302 formats = []
1303 if live_status == 'is_upcoming':
1304 self.raise_no_formats('Twitter Space not started yet', expected=True)
1305 elif live_status == 'post_live':
1306 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1307 else:
1308 source = self._call_api(
1309 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1310
1311 # XXX: Native downloader does not work
1312 formats = self._extract_m3u8_formats(
1313 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1314 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1315 headers={'Referer': 'https://twitter.com/'})
1316 for fmt in formats:
1317 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1318
1319 participants = ', '.join(traverse_obj(
1320 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1321 return {
1322 'id': space_id,
1323 'title': metadata.get('title'),
1324 'description': f'Twitter Space participated by {participants}',
1325 'uploader': traverse_obj(
1326 metadata, ('creator_results', 'result', 'legacy', 'name')),
1327 'uploader_id': traverse_obj(
1328 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1329 'live_status': live_status,
1330 'timestamp': metadata.get('created_at'),
1331 'formats': formats,
1332 }
1333
1334
1335 class TwitterShortenerIE(TwitterBaseIE):
1336 IE_NAME = 'twitter:shortener'
1337 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1338 _BASE_URL = 'https://t.co/'
1339
1340 def _real_extract(self, url):
1341 mobj = self._match_valid_url(url)
1342 eid, id = mobj.group('eid', 'id')
1343 if eid:
1344 id = eid
1345 url = self._BASE_URL + id
1346 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1347 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1348 if new_url.startswith(__UNSAFE_LINK):
1349 new_url = new_url.replace(__UNSAFE_LINK, "")
1350 return self.url_result(new_url)