]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[extractor/twitter] Heed `--no-playlist` for multi-video tweets (#5757)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import json
2 import re
3 import urllib.error
4
5 from .common import InfoExtractor
6 from .periscope import PeriscopeBaseIE, PeriscopeIE
7 from ..compat import functools # isort: split
8 from ..compat import (
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12 )
13 from ..utils import (
14 ExtractorError,
15 dict_get,
16 float_or_none,
17 format_field,
18 int_or_none,
19 make_archive_id,
20 str_or_none,
21 strip_or_none,
22 traverse_obj,
23 try_call,
24 try_get,
25 unified_timestamp,
26 update_url_query,
27 url_or_none,
28 xpath_text,
29 )
30
31
32 class TwitterBaseIE(InfoExtractor):
33 _API_BASE = 'https://api.twitter.com/1.1/'
34 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
35 _TOKENS = {
36 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
37 'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
38 }
39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
40
41 def _extract_variant_formats(self, variant, video_id):
42 variant_url = variant.get('url')
43 if not variant_url:
44 return [], {}
45 elif '.m3u8' in variant_url:
46 return self._extract_m3u8_formats_and_subtitles(
47 variant_url, video_id, 'mp4', 'm3u8_native',
48 m3u8_id='hls', fatal=False)
49 else:
50 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
51 f = {
52 'url': variant_url,
53 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
54 'tbr': tbr,
55 }
56 self._search_dimensions_in_video_url(f, variant_url)
57 return [f], {}
58
59 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
60 vmap_url = url_or_none(vmap_url)
61 if not vmap_url:
62 return [], {}
63 vmap_data = self._download_xml(vmap_url, video_id)
64 formats = []
65 subtitles = {}
66 urls = []
67 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
68 video_variant.attrib['url'] = compat_urllib_parse_unquote(
69 video_variant.attrib['url'])
70 urls.append(video_variant.attrib['url'])
71 fmts, subs = self._extract_variant_formats(
72 video_variant.attrib, video_id)
73 formats.extend(fmts)
74 subtitles = self._merge_subtitles(subtitles, subs)
75 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
76 if video_url not in urls:
77 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
78 formats.extend(fmts)
79 subtitles = self._merge_subtitles(subtitles, subs)
80 return formats, subtitles
81
82 @staticmethod
83 def _search_dimensions_in_video_url(a_format, video_url):
84 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
85 if m:
86 a_format.update({
87 'width': int(m.group('width')),
88 'height': int(m.group('height')),
89 })
90
91 @functools.cached_property
92 def is_logged_in(self):
93 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
94
95 def _call_api(self, path, video_id, query={}, graphql=False):
96 cookies = self._get_cookies(self._API_BASE)
97 headers = {}
98
99 csrf_cookie = cookies.get('ct0')
100 if csrf_cookie:
101 headers['x-csrf-token'] = csrf_cookie.value
102
103 if self.is_logged_in:
104 headers.update({
105 'x-twitter-auth-type': 'OAuth2Session',
106 'x-twitter-client-language': 'en',
107 'x-twitter-active-user': 'yes',
108 })
109
110 last_error = None
111 for bearer_token in self._TOKENS:
112 for first_attempt in (True, False):
113 headers['Authorization'] = f'Bearer {bearer_token}'
114
115 if not self.is_logged_in:
116 if not self._TOKENS[bearer_token]:
117 headers.pop('x-guest-token', None)
118 guest_token_response = self._download_json(
119 self._API_BASE + 'guest/activate.json', video_id,
120 'Downloading guest token', data=b'', headers=headers)
121
122 self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
123 if not self._TOKENS[bearer_token]:
124 raise ExtractorError('Could not retrieve guest token')
125
126 headers['x-guest-token'] = self._TOKENS[bearer_token]
127
128 try:
129 allowed_status = {400, 403, 404} if graphql else {403}
130 result = self._download_json(
131 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
132 video_id, headers=headers, query=query, expected_status=allowed_status)
133
134 except ExtractorError as e:
135 if last_error:
136 raise last_error
137
138 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
139 raise
140
141 last_error = e
142 self.report_warning(
143 'Twitter API gave 404 response, retrying with deprecated auth token. '
144 'Only one media item can be extracted')
145 break # continue outer loop with next bearer_token
146
147 if result.get('errors'):
148 errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
149 if first_attempt and any('bad guest token' in error.lower() for error in errors):
150 self.to_screen('Guest token has expired. Refreshing guest token')
151 self._TOKENS[bearer_token] = None
152 continue
153
154 error_message = ', '.join(set(errors)) or 'Unknown error'
155 raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
156
157 return result
158
159 def _build_graphql_query(self, media_id):
160 raise NotImplementedError('Method must be implemented to support GraphQL')
161
162 def _call_graphql_api(self, endpoint, media_id):
163 data = self._build_graphql_query(media_id)
164 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
165 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
166
167
168 class TwitterCardIE(InfoExtractor):
169 IE_NAME = 'twitter:card'
170 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
171 _TESTS = [
172 {
173 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
174 # MD5 checksums are different in different places
175 'info_dict': {
176 'id': '560070131976392705',
177 'ext': 'mp4',
178 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
179 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
180 'uploader': 'Twitter',
181 'uploader_id': 'Twitter',
182 'thumbnail': r're:^https?://.*\.jpg',
183 'duration': 30.033,
184 'timestamp': 1422366112,
185 'upload_date': '20150127',
186 'age_limit': 0,
187 'comment_count': int,
188 'tags': [],
189 'repost_count': int,
190 'like_count': int,
191 'display_id': '560070183650213889',
192 'uploader_url': 'https://twitter.com/Twitter',
193 },
194 },
195 {
196 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
197 'md5': '7137eca597f72b9abbe61e5ae0161399',
198 'info_dict': {
199 'id': '623160978427936768',
200 'ext': 'mp4',
201 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
202 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
203 'uploader': 'NASA',
204 'uploader_id': 'NASA',
205 'timestamp': 1437408129,
206 'upload_date': '20150720',
207 'uploader_url': 'https://twitter.com/NASA',
208 'age_limit': 0,
209 'comment_count': int,
210 'like_count': int,
211 'repost_count': int,
212 'tags': ['PlutoFlyby'],
213 },
214 'params': {'format': '[protocol=https]'}
215 },
216 {
217 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
218 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
219 'info_dict': {
220 'id': 'dq4Oj5quskI',
221 'ext': 'mp4',
222 'title': 'Ubuntu 11.10 Overview',
223 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
224 'upload_date': '20111013',
225 'uploader': 'OMG! UBUNTU!',
226 'uploader_id': 'omgubuntu',
227 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
228 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
229 'channel_follower_count': int,
230 'chapters': 'count:8',
231 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
232 'duration': 138,
233 'categories': ['Film & Animation'],
234 'age_limit': 0,
235 'comment_count': int,
236 'availability': 'public',
237 'like_count': int,
238 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
239 'view_count': int,
240 'tags': 'count:12',
241 'channel': 'OMG! UBUNTU!',
242 'playable_in_embed': True,
243 },
244 'add_ie': ['Youtube'],
245 },
246 {
247 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
248 'info_dict': {
249 'id': 'iBb2x00UVlv',
250 'ext': 'mp4',
251 'upload_date': '20151113',
252 'uploader_id': '1189339351084113920',
253 'uploader': 'ArsenalTerje',
254 'title': 'Vine by ArsenalTerje',
255 'timestamp': 1447451307,
256 'alt_title': 'Vine by ArsenalTerje',
257 'comment_count': int,
258 'like_count': int,
259 'thumbnail': r're:^https?://[^?#]+\.jpg',
260 'view_count': int,
261 'repost_count': int,
262 },
263 'add_ie': ['Vine'],
264 'params': {'skip_download': 'm3u8'},
265 },
266 {
267 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
268 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
269 'info_dict': {
270 'id': '705235433198714880',
271 'ext': 'mp4',
272 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
273 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
274 'uploader': 'Brent Yarina',
275 'uploader_id': 'BTNBrentYarina',
276 'timestamp': 1456976204,
277 'upload_date': '20160303',
278 },
279 'skip': 'This content is no longer available.',
280 },
281 {
282 'url': 'https://twitter.com/i/videos/752274308186120192',
283 'only_matching': True,
284 },
285 ]
286
287 def _real_extract(self, url):
288 status_id = self._match_id(url)
289 return self.url_result(
290 'https://twitter.com/statuses/' + status_id,
291 TwitterIE.ie_key(), status_id)
292
293
294 class TwitterIE(TwitterBaseIE):
295 IE_NAME = 'twitter'
296 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/video/(?P<index>\d+))?'
297
298 _TESTS = [{
299 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
300 'info_dict': {
301 'id': '643211870443208704',
302 'display_id': '643211948184596480',
303 'ext': 'mp4',
304 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
305 'thumbnail': r're:^https?://.*\.jpg',
306 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
307 'uploader': 'FREE THE NIPPLE',
308 'uploader_id': 'freethenipple',
309 'duration': 12.922,
310 'timestamp': 1442188653,
311 'upload_date': '20150913',
312 'uploader_url': 'https://twitter.com/freethenipple',
313 'comment_count': int,
314 'repost_count': int,
315 'like_count': int,
316 'tags': [],
317 'age_limit': 18,
318 },
319 }, {
320 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
321 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
322 'info_dict': {
323 'id': '657991469417025536',
324 'ext': 'mp4',
325 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
326 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
327 'thumbnail': r're:^https?://.*\.png',
328 'uploader': 'Gifs',
329 'uploader_id': 'giphz',
330 },
331 'expected_warnings': ['height', 'width'],
332 'skip': 'Account suspended',
333 }, {
334 'url': 'https://twitter.com/starwars/status/665052190608723968',
335 'info_dict': {
336 'id': '665052190608723968',
337 'display_id': '665052190608723968',
338 'ext': 'mp4',
339 'title': 'md5:e99588f17b3dd0503814ffb560e64731',
340 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
341 'uploader_id': 'starwars',
342 'uploader': r're:Star Wars.*',
343 'timestamp': 1447395772,
344 'upload_date': '20151113',
345 'uploader_url': 'https://twitter.com/starwars',
346 'comment_count': int,
347 'repost_count': int,
348 'like_count': int,
349 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
350 'age_limit': 0,
351 },
352 }, {
353 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
354 'info_dict': {
355 'id': '705235433198714880',
356 'ext': 'mp4',
357 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
358 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
359 'uploader_id': 'BTNBrentYarina',
360 'uploader': 'Brent Yarina',
361 'timestamp': 1456976204,
362 'upload_date': '20160303',
363 'uploader_url': 'https://twitter.com/BTNBrentYarina',
364 'comment_count': int,
365 'repost_count': int,
366 'like_count': int,
367 'tags': [],
368 'age_limit': 0,
369 },
370 'params': {
371 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
372 # Test case of TwitterCardIE
373 'skip_download': True,
374 },
375 'skip': 'Dead external link',
376 }, {
377 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
378 'info_dict': {
379 'id': '700207414000242688',
380 'display_id': '700207533655363584',
381 'ext': 'mp4',
382 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
383 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
384 'thumbnail': r're:^https?://.*\.jpg',
385 'uploader': 'jaydin donte geer',
386 'uploader_id': 'jaydingeer',
387 'duration': 30.0,
388 'timestamp': 1455777459,
389 'upload_date': '20160218',
390 'uploader_url': 'https://twitter.com/jaydingeer',
391 'comment_count': int,
392 'repost_count': int,
393 'like_count': int,
394 'tags': ['Damndaniel'],
395 'age_limit': 0,
396 },
397 }, {
398 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
399 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
400 'info_dict': {
401 'id': 'MIOxnrUteUd',
402 'ext': 'mp4',
403 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
404 'uploader': 'TAKUMA',
405 'uploader_id': '1004126642786242560',
406 'timestamp': 1402826626,
407 'upload_date': '20140615',
408 'thumbnail': r're:^https?://.*\.jpg',
409 'alt_title': 'Vine by TAKUMA',
410 'comment_count': int,
411 'repost_count': int,
412 'like_count': int,
413 'view_count': int,
414 },
415 'add_ie': ['Vine'],
416 }, {
417 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
418 'info_dict': {
419 'id': '717462543795523584',
420 'display_id': '719944021058060289',
421 'ext': 'mp4',
422 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
423 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
424 'uploader_id': 'CaptainAmerica',
425 'uploader': 'Captain America',
426 'duration': 3.17,
427 'timestamp': 1460483005,
428 'upload_date': '20160412',
429 'uploader_url': 'https://twitter.com/CaptainAmerica',
430 'thumbnail': r're:^https?://.*\.jpg',
431 'comment_count': int,
432 'repost_count': int,
433 'like_count': int,
434 'tags': [],
435 'age_limit': 0,
436 },
437 }, {
438 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
439 'info_dict': {
440 'id': '1zqKVVlkqLaKB',
441 'ext': 'mp4',
442 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
443 'upload_date': '20160923',
444 'uploader_id': '1PmKqpJdOJQoY',
445 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
446 'timestamp': 1474613214,
447 'thumbnail': r're:^https?://.*\.jpg',
448 },
449 'add_ie': ['Periscope'],
450 }, {
451 # has mp4 formats via mobile API
452 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
453 'info_dict': {
454 'id': '852138619213144067',
455 'ext': 'mp4',
456 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
457 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
458 'uploader': 'عالم الأخبار',
459 'uploader_id': 'news_al3alm',
460 'duration': 277.4,
461 'timestamp': 1492000653,
462 'upload_date': '20170412',
463 },
464 'skip': 'Account suspended',
465 }, {
466 'url': 'https://twitter.com/i/web/status/910031516746514432',
467 'info_dict': {
468 'id': '910030238373089285',
469 'display_id': '910031516746514432',
470 'ext': 'mp4',
471 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
472 'thumbnail': r're:^https?://.*\.jpg',
473 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
474 'uploader': 'Préfet de Guadeloupe',
475 'uploader_id': 'Prefet971',
476 'duration': 47.48,
477 'timestamp': 1505803395,
478 'upload_date': '20170919',
479 'uploader_url': 'https://twitter.com/Prefet971',
480 'comment_count': int,
481 'repost_count': int,
482 'like_count': int,
483 'tags': ['Maria'],
484 'age_limit': 0,
485 },
486 'params': {
487 'skip_download': True, # requires ffmpeg
488 },
489 }, {
490 # card via api.twitter.com/1.1/videos/tweet/config
491 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
492 'info_dict': {
493 'id': '1001551417340022785',
494 'display_id': '1001551623938805763',
495 'ext': 'mp4',
496 'title': 're:.*?Shep is on a roll today.*?',
497 'thumbnail': r're:^https?://.*\.jpg',
498 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
499 'uploader': 'Lis Power',
500 'uploader_id': 'LisPower1',
501 'duration': 111.278,
502 'timestamp': 1527623489,
503 'upload_date': '20180529',
504 'uploader_url': 'https://twitter.com/LisPower1',
505 'comment_count': int,
506 'repost_count': int,
507 'like_count': int,
508 'tags': [],
509 'age_limit': 0,
510 },
511 'params': {
512 'skip_download': True, # requires ffmpeg
513 },
514 }, {
515 'url': 'https://twitter.com/foobar/status/1087791357756956680',
516 'info_dict': {
517 'id': '1087791272830607360',
518 'display_id': '1087791357756956680',
519 'ext': 'mp4',
520 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
521 'thumbnail': r're:^https?://.*\.jpg',
522 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
523 'uploader': 'Twitter',
524 'uploader_id': 'Twitter',
525 'duration': 61.567,
526 'timestamp': 1548184644,
527 'upload_date': '20190122',
528 'uploader_url': 'https://twitter.com/Twitter',
529 'comment_count': int,
530 'repost_count': int,
531 'like_count': int,
532 'tags': [],
533 'age_limit': 0,
534 },
535 }, {
536 # not available in Periscope
537 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
538 'info_dict': {
539 'id': '1vOGwqejwoWxB',
540 'ext': 'mp4',
541 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
542 'uploader': 'Vivi',
543 'uploader_id': '1eVjYOLGkGrQL',
544 'thumbnail': r're:^https?://.*\.jpg',
545 'tags': ['EduTECH2019'],
546 'view_count': int,
547 },
548 'add_ie': ['TwitterBroadcast'],
549 }, {
550 # unified card
551 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
552 'info_dict': {
553 'id': '1349774757969989634',
554 'display_id': '1349794411333394432',
555 'ext': 'mp4',
556 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
557 'thumbnail': r're:^https?://.*\.jpg',
558 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
559 'uploader': 'Brooklyn Nets',
560 'uploader_id': 'BrooklynNets',
561 'duration': 324.484,
562 'timestamp': 1610651040,
563 'upload_date': '20210114',
564 'uploader_url': 'https://twitter.com/BrooklynNets',
565 'comment_count': int,
566 'repost_count': int,
567 'like_count': int,
568 'tags': [],
569 'age_limit': 0,
570 },
571 'params': {
572 'skip_download': True,
573 },
574 }, {
575 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
576 'info_dict': {
577 'id': '1577855447914409984',
578 'display_id': '1577855540407197696',
579 'ext': 'mp4',
580 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
581 'description': 'md5:b9c3699335447391d11753ab21c70a74',
582 'upload_date': '20221006',
583 'uploader': 'oshtru',
584 'uploader_id': 'oshtru',
585 'uploader_url': 'https://twitter.com/oshtru',
586 'thumbnail': r're:^https?://.*\.jpg',
587 'duration': 30.03,
588 'timestamp': 1665025050,
589 'comment_count': int,
590 'repost_count': int,
591 'like_count': int,
592 'tags': [],
593 'age_limit': 0,
594 },
595 'params': {'skip_download': True},
596 }, {
597 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
598 'info_dict': {
599 'id': '1577719286659006464',
600 'title': 'Ultima | #\u0432\u029f\u043c - Test',
601 'description': 'Test https://t.co/Y3KEZD7Dad',
602 'uploader': 'Ultima | #\u0432\u029f\u043c',
603 'uploader_id': 'UltimaShadowX',
604 'uploader_url': 'https://twitter.com/UltimaShadowX',
605 'upload_date': '20221005',
606 'timestamp': 1664992565,
607 'comment_count': int,
608 'repost_count': int,
609 'like_count': int,
610 'tags': [],
611 'age_limit': 0,
612 },
613 'playlist_count': 4,
614 'params': {'skip_download': True},
615 }, {
616 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
617 'info_dict': {
618 'id': '1575559336759263233',
619 'display_id': '1575560063510810624',
620 'ext': 'mp4',
621 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
622 'thumbnail': r're:^https?://.*\.jpg',
623 'description': 'md5:95aea692fda36a12081b9629b02daa92',
624 'uploader': 'Max Olson',
625 'uploader_id': 'MesoMax919',
626 'uploader_url': 'https://twitter.com/MesoMax919',
627 'duration': 21.321,
628 'timestamp': 1664477766,
629 'upload_date': '20220929',
630 'comment_count': int,
631 'repost_count': int,
632 'like_count': int,
633 'tags': ['HurricaneIan'],
634 'age_limit': 0,
635 },
636 }, {
637 # Adult content, uses old token
638 # Fails if not logged in (GraphQL)
639 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
640 'info_dict': {
641 'id': '1575199163847000068',
642 'display_id': '1575199173472927762',
643 'ext': 'mp4',
644 'title': str,
645 'description': str,
646 'uploader': str,
647 'uploader_id': 'Rizdraws',
648 'uploader_url': 'https://twitter.com/Rizdraws',
649 'upload_date': '20220928',
650 'timestamp': 1664391723,
651 'thumbnail': r're:^https?://.+\.jpg',
652 'like_count': int,
653 'repost_count': int,
654 'comment_count': int,
655 'age_limit': 18,
656 'tags': []
657 },
658 'expected_warnings': ['404'],
659 }, {
660 # Description is missing one https://t.co url (GraphQL)
661 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
662 'playlist_mincount': 2,
663 'info_dict': {
664 'id': '1395079556562706435',
665 'title': str,
666 'tags': [],
667 'uploader': str,
668 'like_count': int,
669 'upload_date': '20210519',
670 'age_limit': 0,
671 'repost_count': int,
672 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
673 'uploader_id': 'Srirachachau',
674 'comment_count': int,
675 'uploader_url': 'https://twitter.com/Srirachachau',
676 'timestamp': 1621447860,
677 },
678 }, {
679 # Description is missing one https://t.co url (GraphQL)
680 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
681 'playlist_mincount': 2,
682 'info_dict': {
683 'id': '1578353380363501568',
684 'title': str,
685 'uploader_id': 'DavidToons_',
686 'repost_count': int,
687 'like_count': int,
688 'uploader': str,
689 'timestamp': 1665143744,
690 'uploader_url': 'https://twitter.com/DavidToons_',
691 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
692 'tags': [],
693 'comment_count': int,
694 'upload_date': '20221007',
695 'age_limit': 0,
696 },
697 }, {
698 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
699 'playlist_count': 2,
700 'info_dict': {
701 'id': '1578401165338976258',
702 'title': str,
703 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
704 'uploader': str,
705 'uploader_id': 'primevideouk',
706 'timestamp': 1665155137,
707 'upload_date': '20221007',
708 'age_limit': 0,
709 'uploader_url': 'https://twitter.com/primevideouk',
710 'comment_count': int,
711 'repost_count': int,
712 'like_count': int,
713 'tags': ['TheRingsOfPower'],
714 },
715 }, {
716 # Twitter Spaces
717 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
718 'info_dict': {
719 'id': '1lPJqmBeeNAJb',
720 'ext': 'm4a',
721 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
722 'uploader': r're:Monique Camarra.+?',
723 'uploader_id': 'MoniqueCamarra',
724 'live_status': 'was_live',
725 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
726 'timestamp': 1658407771464,
727 },
728 'add_ie': ['TwitterSpaces'],
729 'params': {'skip_download': 'm3u8'},
730 }, {
731 # URL specifies video number but --yes-playlist
732 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
733 'playlist_mincount': 2,
734 'info_dict': {
735 'id': '1600649710662213632',
736 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
737 'timestamp': 1670459604.0,
738 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
739 'comment_count': int,
740 'uploader_id': 'CTVJLaidlaw',
741 'repost_count': int,
742 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
743 'upload_date': '20221208',
744 'age_limit': 0,
745 'uploader': 'Jocelyn Laidlaw',
746 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
747 'like_count': int,
748 },
749 }, {
750 # URL specifies video number and --no-playlist
751 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
752 'info_dict': {
753 'id': '1600649511827013632',
754 'ext': 'mp4',
755 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
756 'thumbnail': r're:^https?://.+\.jpg',
757 'timestamp': 1670459604.0,
758 'uploader_id': 'CTVJLaidlaw',
759 'uploader': 'Jocelyn Laidlaw',
760 'repost_count': int,
761 'comment_count': int,
762 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
763 'duration': 102.226,
764 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
765 'display_id': '1600649710662213632',
766 'like_count': int,
767 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
768 'upload_date': '20221208',
769 'age_limit': 0,
770 },
771 'params': {'noplaylist': True},
772 }, {
773 # onion route
774 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
775 'only_matching': True,
776 }, {
777 # Twitch Clip Embed
778 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
779 'only_matching': True,
780 }, {
781 # promo_video_website card
782 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
783 'only_matching': True,
784 }, {
785 # promo_video_convo card
786 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
787 'only_matching': True,
788 }, {
789 # appplayer card
790 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
791 'only_matching': True,
792 }, {
793 # video_direct_message card
794 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
795 'only_matching': True,
796 }, {
797 # poll2choice_video card
798 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
799 'only_matching': True,
800 }, {
801 # poll3choice_video card
802 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
803 'only_matching': True,
804 }, {
805 # poll4choice_video card
806 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
807 'only_matching': True,
808 }]
809
810 def _graphql_to_legacy(self, data, twid):
811 result = traverse_obj(data, (
812 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
813 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
814 'tweet_results', 'result'
815 ), expected_type=dict, default={}, get_all=False)
816
817 if 'tombstone' in result:
818 cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
819 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
820
821 status = result.get('legacy', {})
822 status.update(traverse_obj(result, {
823 'user': ('core', 'user_results', 'result', 'legacy'),
824 'card': ('card', 'legacy'),
825 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
826 }, expected_type=dict, default={}))
827
828 # extra transformation is needed since result does not match legacy format
829 binding_values = {
830 binding_value.get('key'): binding_value.get('value')
831 for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
832 }
833 if binding_values:
834 status['card']['binding_values'] = binding_values
835
836 return status
837
838 def _build_graphql_query(self, media_id):
839 return {
840 'variables': {
841 'focalTweetId': media_id,
842 'includePromotedContent': True,
843 'with_rux_injections': False,
844 'withBirdwatchNotes': True,
845 'withCommunity': True,
846 'withDownvotePerspective': False,
847 'withQuickPromoteEligibilityTweetFields': True,
848 'withReactionsMetadata': False,
849 'withReactionsPerspective': False,
850 'withSuperFollowsTweetFields': True,
851 'withSuperFollowsUserFields': True,
852 'withV2Timeline': True,
853 'withVoice': True,
854 },
855 'features': {
856 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
857 'interactive_text_enabled': True,
858 'responsive_web_edit_tweet_api_enabled': True,
859 'responsive_web_enhance_cards_enabled': True,
860 'responsive_web_graphql_timeline_navigation_enabled': False,
861 'responsive_web_text_conversations_enabled': False,
862 'responsive_web_uc_gql_enabled': True,
863 'standardized_nudges_misinfo': True,
864 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
865 'tweetypie_unmention_optimization_enabled': True,
866 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
867 'verified_phone_label_enabled': False,
868 'vibe_api_enabled': True,
869 },
870 }
871
872 def _real_extract(self, url):
873 twid, selected_index = self._match_valid_url(url).group('id', 'index')
874 if self.is_logged_in or self._configuration_arg('force_graphql'):
875 self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
876 result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
877 status = self._graphql_to_legacy(result, twid)
878
879 else:
880 status = self._call_api(f'statuses/show/{twid}.json', twid, {
881 'cards_platform': 'Web-12',
882 'include_cards': 1,
883 'include_reply_count': 1,
884 'include_user_entities': 0,
885 'tweet_mode': 'extended',
886 })
887
888 title = description = status['full_text'].replace('\n', ' ')
889 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
890 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
891 user = status.get('user') or {}
892 uploader = user.get('name')
893 if uploader:
894 title = f'{uploader} - {title}'
895 uploader_id = user.get('screen_name')
896
897 tags = []
898 for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
899 hashtag_text = hashtag.get('text')
900 if not hashtag_text:
901 continue
902 tags.append(hashtag_text)
903
904 info = {
905 'id': twid,
906 'title': title,
907 'description': description,
908 'uploader': uploader,
909 'timestamp': unified_timestamp(status.get('created_at')),
910 'uploader_id': uploader_id,
911 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
912 'like_count': int_or_none(status.get('favorite_count')),
913 'repost_count': int_or_none(status.get('retweet_count')),
914 'comment_count': int_or_none(status.get('reply_count')),
915 'age_limit': 18 if status.get('possibly_sensitive') else 0,
916 'tags': tags,
917 }
918
919 def extract_from_video_info(media):
920 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
921 self.write_debug(f'Extracting from video info: {media_id}')
922 video_info = media.get('video_info') or {}
923
924 formats = []
925 subtitles = {}
926 for variant in video_info.get('variants', []):
927 fmts, subs = self._extract_variant_formats(variant, twid)
928 subtitles = self._merge_subtitles(subtitles, subs)
929 formats.extend(fmts)
930
931 thumbnails = []
932 media_url = media.get('media_url_https') or media.get('media_url')
933 if media_url:
934 def add_thumbnail(name, size):
935 thumbnails.append({
936 'id': name,
937 'url': update_url_query(media_url, {'name': name}),
938 'width': int_or_none(size.get('w') or size.get('width')),
939 'height': int_or_none(size.get('h') or size.get('height')),
940 })
941 for name, size in media.get('sizes', {}).items():
942 add_thumbnail(name, size)
943 add_thumbnail('orig', media.get('original_info') or {})
944
945 return {
946 'id': media_id,
947 'formats': formats,
948 'subtitles': subtitles,
949 'thumbnails': thumbnails,
950 'duration': float_or_none(video_info.get('duration_millis'), 1000),
951 # The codec of http formats are unknown
952 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
953 }
954
955 def extract_from_card_info(card):
956 if not card:
957 return
958
959 self.write_debug(f'Extracting from card info: {card.get("url")}')
960 binding_values = card['binding_values']
961
962 def get_binding_value(k):
963 o = binding_values.get(k) or {}
964 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
965
966 card_name = card['name'].split(':')[-1]
967 if card_name == 'player':
968 yield {
969 '_type': 'url',
970 'url': get_binding_value('player_url'),
971 }
972 elif card_name == 'periscope_broadcast':
973 yield {
974 '_type': 'url',
975 'url': get_binding_value('url') or get_binding_value('player_url'),
976 'ie_key': PeriscopeIE.ie_key(),
977 }
978 elif card_name == 'broadcast':
979 yield {
980 '_type': 'url',
981 'url': get_binding_value('broadcast_url'),
982 'ie_key': TwitterBroadcastIE.ie_key(),
983 }
984 elif card_name == 'audiospace':
985 yield {
986 '_type': 'url',
987 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
988 'ie_key': TwitterSpacesIE.ie_key(),
989 }
990 elif card_name == 'summary':
991 yield {
992 '_type': 'url',
993 'url': get_binding_value('card_url'),
994 }
995 elif card_name == 'unified_card':
996 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
997 yield from map(extract_from_video_info, traverse_obj(
998 unified_card, ('media_entities', ...), expected_type=dict))
999 # amplify, promo_video_website, promo_video_convo, appplayer,
1000 # video_direct_message, poll2choice_video, poll3choice_video,
1001 # poll4choice_video, ...
1002 else:
1003 is_amplify = card_name == 'amplify'
1004 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1005 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1006 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1007
1008 thumbnails = []
1009 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1010 image = get_binding_value('player_image' + suffix) or {}
1011 image_url = image.get('url')
1012 if not image_url or '/player-placeholder' in image_url:
1013 continue
1014 thumbnails.append({
1015 'id': suffix[1:] if suffix else 'medium',
1016 'url': image_url,
1017 'width': int_or_none(image.get('width')),
1018 'height': int_or_none(image.get('height')),
1019 })
1020
1021 yield {
1022 'formats': formats,
1023 'subtitles': subtitles,
1024 'thumbnails': thumbnails,
1025 'duration': int_or_none(get_binding_value(
1026 'content_duration_seconds')),
1027 }
1028
1029 media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
1030 videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
1031 cards = extract_from_card_info(status.get('card'))
1032 entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)]
1033
1034 if not entries:
1035 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1036 if not expanded_url or expanded_url == url:
1037 raise ExtractorError('No video could be found in this tweet', expected=True)
1038
1039 return self.url_result(expanded_url, display_id=twid, **info)
1040
1041 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1042
1043 if not self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1044 index = int(selected_index) - 1
1045 if index >= len(entries):
1046 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1047
1048 return entries[index]
1049
1050 if len(entries) == 1:
1051 return entries[0]
1052
1053 for index, entry in enumerate(entries, 1):
1054 entry['title'] += f' #{index}'
1055
1056 return self.playlist_result(entries, **info)
1057
1058
1059 class TwitterAmplifyIE(TwitterBaseIE):
1060 IE_NAME = 'twitter:amplify'
1061 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1062
1063 _TEST = {
1064 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1065 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1066 'info_dict': {
1067 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1068 'ext': 'mp4',
1069 'title': 'Twitter Video',
1070 'thumbnail': 're:^https?://.*',
1071 },
1072 'params': {'format': '[protocol=https]'},
1073 }
1074
1075 def _real_extract(self, url):
1076 video_id = self._match_id(url)
1077 webpage = self._download_webpage(url, video_id)
1078
1079 vmap_url = self._html_search_meta(
1080 'twitter:amplify:vmap', webpage, 'vmap url')
1081 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1082
1083 thumbnails = []
1084 thumbnail = self._html_search_meta(
1085 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1086
1087 def _find_dimension(target):
1088 w = int_or_none(self._html_search_meta(
1089 'twitter:%s:width' % target, webpage, fatal=False))
1090 h = int_or_none(self._html_search_meta(
1091 'twitter:%s:height' % target, webpage, fatal=False))
1092 return w, h
1093
1094 if thumbnail:
1095 thumbnail_w, thumbnail_h = _find_dimension('image')
1096 thumbnails.append({
1097 'url': thumbnail,
1098 'width': thumbnail_w,
1099 'height': thumbnail_h,
1100 })
1101
1102 video_w, video_h = _find_dimension('player')
1103 formats[0].update({
1104 'width': video_w,
1105 'height': video_h,
1106 })
1107
1108 return {
1109 'id': video_id,
1110 'title': 'Twitter Video',
1111 'formats': formats,
1112 'thumbnails': thumbnails,
1113 }
1114
1115
1116 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1117 IE_NAME = 'twitter:broadcast'
1118 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1119
1120 _TEST = {
1121 # untitled Periscope video
1122 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1123 'info_dict': {
1124 'id': '1yNGaQLWpejGj',
1125 'ext': 'mp4',
1126 'title': 'Andrea May Sahouri - Periscope Broadcast',
1127 'uploader': 'Andrea May Sahouri',
1128 'uploader_id': '1PXEdBZWpGwKe',
1129 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1130 'view_count': int,
1131 },
1132 }
1133
1134 def _real_extract(self, url):
1135 broadcast_id = self._match_id(url)
1136 broadcast = self._call_api(
1137 'broadcasts/show.json', broadcast_id,
1138 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1139 info = self._parse_broadcast_data(broadcast, broadcast_id)
1140 media_key = broadcast['media_key']
1141 source = self._call_api(
1142 f'live_video_stream/status/{media_key}', media_key)['source']
1143 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1144 if '/live_video_stream/geoblocked/' in m3u8_url:
1145 self.raise_geo_restricted()
1146 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1147 m3u8_url).query).get('type', [None])[0]
1148 state, width, height = self._extract_common_format_info(broadcast)
1149 info['formats'] = self._extract_pscp_m3u8_formats(
1150 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1151 return info
1152
1153
1154 class TwitterSpacesIE(TwitterBaseIE):
1155 IE_NAME = 'twitter:spaces'
1156 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1157
1158 _TESTS = [{
1159 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1160 'info_dict': {
1161 'id': '1RDxlgyvNXzJL',
1162 'ext': 'm4a',
1163 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1164 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1165 'uploader': r're:Lucio Di Gaetano.*?',
1166 'uploader_id': 'luciodigaetano',
1167 'live_status': 'was_live',
1168 'timestamp': 1659877956397,
1169 },
1170 'params': {'skip_download': 'm3u8'},
1171 }]
1172
1173 SPACE_STATUS = {
1174 'notstarted': 'is_upcoming',
1175 'ended': 'was_live',
1176 'running': 'is_live',
1177 'timedout': 'post_live',
1178 }
1179
1180 def _build_graphql_query(self, space_id):
1181 return {
1182 'variables': {
1183 'id': space_id,
1184 'isMetatagsQuery': True,
1185 'withDownvotePerspective': False,
1186 'withReactionsMetadata': False,
1187 'withReactionsPerspective': False,
1188 'withReplays': True,
1189 'withSuperFollowsUserFields': True,
1190 'withSuperFollowsTweetFields': True,
1191 },
1192 'features': {
1193 'dont_mention_me_view_api_enabled': True,
1194 'interactive_text_enabled': True,
1195 'responsive_web_edit_tweet_api_enabled': True,
1196 'responsive_web_enhance_cards_enabled': True,
1197 'responsive_web_uc_gql_enabled': True,
1198 'spaces_2022_h2_clipping': True,
1199 'spaces_2022_h2_spaces_communities': False,
1200 'standardized_nudges_misinfo': True,
1201 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1202 'vibe_api_enabled': True,
1203 },
1204 }
1205
1206 def _real_extract(self, url):
1207 space_id = self._match_id(url)
1208 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1209 if not space_data:
1210 raise ExtractorError('Twitter Space not found', expected=True)
1211
1212 metadata = space_data['metadata']
1213 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1214
1215 formats = []
1216 if live_status == 'is_upcoming':
1217 self.raise_no_formats('Twitter Space not started yet', expected=True)
1218 elif live_status == 'post_live':
1219 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1220 else:
1221 source = self._call_api(
1222 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1223
1224 # XXX: Native downloader does not work
1225 formats = self._extract_m3u8_formats(
1226 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1227 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1228 headers={'Referer': 'https://twitter.com/'})
1229 for fmt in formats:
1230 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1231
1232 participants = ', '.join(traverse_obj(
1233 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1234 return {
1235 'id': space_id,
1236 'title': metadata.get('title'),
1237 'description': f'Twitter Space participated by {participants}',
1238 'uploader': traverse_obj(
1239 metadata, ('creator_results', 'result', 'legacy', 'name')),
1240 'uploader_id': traverse_obj(
1241 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1242 'live_status': live_status,
1243 'timestamp': metadata.get('created_at'),
1244 'formats': formats,
1245 }
1246
1247
1248 class TwitterShortenerIE(TwitterBaseIE):
1249 IE_NAME = 'twitter:shortener'
1250 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1251 _BASE_URL = 'https://t.co/'
1252
1253 def _real_extract(self, url):
1254 mobj = self._match_valid_url(url)
1255 eid, id = mobj.group('eid', 'id')
1256 if eid:
1257 id = eid
1258 url = self._BASE_URL + id
1259 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1260 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1261 if new_url.startswith(__UNSAFE_LINK):
1262 new_url = new_url.replace(__UNSAFE_LINK, "")
1263 return self.url_result(new_url)