]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[extractor/youtube] Handle `consent.youtube`
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import json
2 import re
3 import urllib.error
4
5 from .common import InfoExtractor
6 from .periscope import PeriscopeBaseIE, PeriscopeIE
7 from ..compat import functools # isort: split
8 from ..compat import (
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12 )
13 from ..utils import (
14 ExtractorError,
15 dict_get,
16 float_or_none,
17 format_field,
18 int_or_none,
19 make_archive_id,
20 str_or_none,
21 strip_or_none,
22 traverse_obj,
23 try_call,
24 try_get,
25 unified_timestamp,
26 update_url_query,
27 url_or_none,
28 xpath_text,
29 )
30
31
32 class TwitterBaseIE(InfoExtractor):
33 _API_BASE = 'https://api.twitter.com/1.1/'
34 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
35 _TOKENS = {
36 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
37 'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
38 }
39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
40
41 def _extract_variant_formats(self, variant, video_id):
42 variant_url = variant.get('url')
43 if not variant_url:
44 return [], {}
45 elif '.m3u8' in variant_url:
46 return self._extract_m3u8_formats_and_subtitles(
47 variant_url, video_id, 'mp4', 'm3u8_native',
48 m3u8_id='hls', fatal=False)
49 else:
50 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
51 f = {
52 'url': variant_url,
53 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
54 'tbr': tbr,
55 }
56 self._search_dimensions_in_video_url(f, variant_url)
57 return [f], {}
58
59 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
60 vmap_url = url_or_none(vmap_url)
61 if not vmap_url:
62 return [], {}
63 vmap_data = self._download_xml(vmap_url, video_id)
64 formats = []
65 subtitles = {}
66 urls = []
67 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
68 video_variant.attrib['url'] = compat_urllib_parse_unquote(
69 video_variant.attrib['url'])
70 urls.append(video_variant.attrib['url'])
71 fmts, subs = self._extract_variant_formats(
72 video_variant.attrib, video_id)
73 formats.extend(fmts)
74 subtitles = self._merge_subtitles(subtitles, subs)
75 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
76 if video_url not in urls:
77 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
78 formats.extend(fmts)
79 subtitles = self._merge_subtitles(subtitles, subs)
80 return formats, subtitles
81
82 @staticmethod
83 def _search_dimensions_in_video_url(a_format, video_url):
84 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
85 if m:
86 a_format.update({
87 'width': int(m.group('width')),
88 'height': int(m.group('height')),
89 })
90
91 @functools.cached_property
92 def is_logged_in(self):
93 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
94
95 def _call_api(self, path, video_id, query={}, graphql=False):
96 cookies = self._get_cookies(self._API_BASE)
97 headers = {}
98
99 csrf_cookie = cookies.get('ct0')
100 if csrf_cookie:
101 headers['x-csrf-token'] = csrf_cookie.value
102
103 if self.is_logged_in:
104 headers.update({
105 'x-twitter-auth-type': 'OAuth2Session',
106 'x-twitter-client-language': 'en',
107 'x-twitter-active-user': 'yes',
108 })
109
110 last_error = None
111 for bearer_token in self._TOKENS:
112 for first_attempt in (True, False):
113 headers['Authorization'] = f'Bearer {bearer_token}'
114
115 if not self.is_logged_in:
116 if not self._TOKENS[bearer_token]:
117 headers.pop('x-guest-token', None)
118 guest_token_response = self._download_json(
119 self._API_BASE + 'guest/activate.json', video_id,
120 'Downloading guest token', data=b'', headers=headers)
121
122 self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
123 if not self._TOKENS[bearer_token]:
124 raise ExtractorError('Could not retrieve guest token')
125
126 headers['x-guest-token'] = self._TOKENS[bearer_token]
127
128 try:
129 allowed_status = {400, 403, 404} if graphql else {403}
130 result = self._download_json(
131 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
132 video_id, headers=headers, query=query, expected_status=allowed_status)
133
134 except ExtractorError as e:
135 if last_error:
136 raise last_error
137
138 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
139 raise
140
141 last_error = e
142 self.report_warning(
143 'Twitter API gave 404 response, retrying with deprecated auth token. '
144 'Only one media item can be extracted')
145 break # continue outer loop with next bearer_token
146
147 if result.get('errors'):
148 errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
149 if first_attempt and any('bad guest token' in error.lower() for error in errors):
150 self.to_screen('Guest token has expired. Refreshing guest token')
151 self._TOKENS[bearer_token] = None
152 continue
153
154 error_message = ', '.join(set(errors)) or 'Unknown error'
155 raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
156
157 return result
158
159 def _build_graphql_query(self, media_id):
160 raise NotImplementedError('Method must be implemented to support GraphQL')
161
162 def _call_graphql_api(self, endpoint, media_id):
163 data = self._build_graphql_query(media_id)
164 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
165 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
166
167
168 class TwitterCardIE(InfoExtractor):
169 IE_NAME = 'twitter:card'
170 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
171 _TESTS = [
172 {
173 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
174 # MD5 checksums are different in different places
175 'info_dict': {
176 'id': '560070131976392705',
177 'ext': 'mp4',
178 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
179 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
180 'uploader': 'Twitter',
181 'uploader_id': 'Twitter',
182 'thumbnail': r're:^https?://.*\.jpg',
183 'duration': 30.033,
184 'timestamp': 1422366112,
185 'upload_date': '20150127',
186 'age_limit': 0,
187 'comment_count': int,
188 'tags': [],
189 'repost_count': int,
190 'like_count': int,
191 'display_id': '560070183650213889',
192 'uploader_url': 'https://twitter.com/Twitter',
193 },
194 },
195 {
196 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
197 'md5': '7137eca597f72b9abbe61e5ae0161399',
198 'info_dict': {
199 'id': '623160978427936768',
200 'ext': 'mp4',
201 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
202 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
203 'uploader': 'NASA',
204 'uploader_id': 'NASA',
205 'timestamp': 1437408129,
206 'upload_date': '20150720',
207 'uploader_url': 'https://twitter.com/NASA',
208 'age_limit': 0,
209 'comment_count': int,
210 'like_count': int,
211 'repost_count': int,
212 'tags': ['PlutoFlyby'],
213 },
214 'params': {'format': '[protocol=https]'}
215 },
216 {
217 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
218 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
219 'info_dict': {
220 'id': 'dq4Oj5quskI',
221 'ext': 'mp4',
222 'title': 'Ubuntu 11.10 Overview',
223 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
224 'upload_date': '20111013',
225 'uploader': 'OMG! UBUNTU!',
226 'uploader_id': 'omgubuntu',
227 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
228 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
229 'channel_follower_count': int,
230 'chapters': 'count:8',
231 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
232 'duration': 138,
233 'categories': ['Film & Animation'],
234 'age_limit': 0,
235 'comment_count': int,
236 'availability': 'public',
237 'like_count': int,
238 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
239 'view_count': int,
240 'tags': 'count:12',
241 'channel': 'OMG! UBUNTU!',
242 'playable_in_embed': True,
243 },
244 'add_ie': ['Youtube'],
245 },
246 {
247 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
248 'info_dict': {
249 'id': 'iBb2x00UVlv',
250 'ext': 'mp4',
251 'upload_date': '20151113',
252 'uploader_id': '1189339351084113920',
253 'uploader': 'ArsenalTerje',
254 'title': 'Vine by ArsenalTerje',
255 'timestamp': 1447451307,
256 'alt_title': 'Vine by ArsenalTerje',
257 'comment_count': int,
258 'like_count': int,
259 'thumbnail': r're:^https?://[^?#]+\.jpg',
260 'view_count': int,
261 'repost_count': int,
262 },
263 'add_ie': ['Vine'],
264 'params': {'skip_download': 'm3u8'},
265 },
266 {
267 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
268 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
269 'info_dict': {
270 'id': '705235433198714880',
271 'ext': 'mp4',
272 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
273 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
274 'uploader': 'Brent Yarina',
275 'uploader_id': 'BTNBrentYarina',
276 'timestamp': 1456976204,
277 'upload_date': '20160303',
278 },
279 'skip': 'This content is no longer available.',
280 },
281 {
282 'url': 'https://twitter.com/i/videos/752274308186120192',
283 'only_matching': True,
284 },
285 ]
286
287 def _real_extract(self, url):
288 status_id = self._match_id(url)
289 return self.url_result(
290 'https://twitter.com/statuses/' + status_id,
291 TwitterIE.ie_key(), status_id)
292
293
294 class TwitterIE(TwitterBaseIE):
295 IE_NAME = 'twitter'
296 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/video/(?P<index>\d+))?'
297
298 _TESTS = [{
299 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
300 'info_dict': {
301 'id': '643211870443208704',
302 'display_id': '643211948184596480',
303 'ext': 'mp4',
304 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
305 'thumbnail': r're:^https?://.*\.jpg',
306 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
307 'uploader': 'FREE THE NIPPLE',
308 'uploader_id': 'freethenipple',
309 'duration': 12.922,
310 'timestamp': 1442188653,
311 'upload_date': '20150913',
312 'uploader_url': 'https://twitter.com/freethenipple',
313 'comment_count': int,
314 'repost_count': int,
315 'like_count': int,
316 'tags': [],
317 'age_limit': 18,
318 },
319 }, {
320 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
321 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
322 'info_dict': {
323 'id': '657991469417025536',
324 'ext': 'mp4',
325 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
326 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
327 'thumbnail': r're:^https?://.*\.png',
328 'uploader': 'Gifs',
329 'uploader_id': 'giphz',
330 },
331 'expected_warnings': ['height', 'width'],
332 'skip': 'Account suspended',
333 }, {
334 'url': 'https://twitter.com/starwars/status/665052190608723968',
335 'info_dict': {
336 'id': '665052190608723968',
337 'display_id': '665052190608723968',
338 'ext': 'mp4',
339 'title': 'md5:e99588f17b3dd0503814ffb560e64731',
340 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
341 'uploader_id': 'starwars',
342 'uploader': r're:Star Wars.*',
343 'timestamp': 1447395772,
344 'upload_date': '20151113',
345 'uploader_url': 'https://twitter.com/starwars',
346 'comment_count': int,
347 'repost_count': int,
348 'like_count': int,
349 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
350 'age_limit': 0,
351 },
352 }, {
353 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
354 'info_dict': {
355 'id': '705235433198714880',
356 'ext': 'mp4',
357 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
358 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
359 'uploader_id': 'BTNBrentYarina',
360 'uploader': 'Brent Yarina',
361 'timestamp': 1456976204,
362 'upload_date': '20160303',
363 'uploader_url': 'https://twitter.com/BTNBrentYarina',
364 'comment_count': int,
365 'repost_count': int,
366 'like_count': int,
367 'tags': [],
368 'age_limit': 0,
369 },
370 'params': {
371 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
372 # Test case of TwitterCardIE
373 'skip_download': True,
374 },
375 'skip': 'Dead external link',
376 }, {
377 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
378 'info_dict': {
379 'id': '700207414000242688',
380 'display_id': '700207533655363584',
381 'ext': 'mp4',
382 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
383 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
384 'thumbnail': r're:^https?://.*\.jpg',
385 'uploader': 'jaydin donte geer',
386 'uploader_id': 'jaydingeer',
387 'duration': 30.0,
388 'timestamp': 1455777459,
389 'upload_date': '20160218',
390 'uploader_url': 'https://twitter.com/jaydingeer',
391 'comment_count': int,
392 'repost_count': int,
393 'like_count': int,
394 'tags': ['Damndaniel'],
395 'age_limit': 0,
396 },
397 }, {
398 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
399 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
400 'info_dict': {
401 'id': 'MIOxnrUteUd',
402 'ext': 'mp4',
403 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
404 'uploader': 'TAKUMA',
405 'uploader_id': '1004126642786242560',
406 'timestamp': 1402826626,
407 'upload_date': '20140615',
408 'thumbnail': r're:^https?://.*\.jpg',
409 'alt_title': 'Vine by TAKUMA',
410 'comment_count': int,
411 'repost_count': int,
412 'like_count': int,
413 'view_count': int,
414 },
415 'add_ie': ['Vine'],
416 }, {
417 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
418 'info_dict': {
419 'id': '717462543795523584',
420 'display_id': '719944021058060289',
421 'ext': 'mp4',
422 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
423 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
424 'uploader_id': 'CaptainAmerica',
425 'uploader': 'Captain America',
426 'duration': 3.17,
427 'timestamp': 1460483005,
428 'upload_date': '20160412',
429 'uploader_url': 'https://twitter.com/CaptainAmerica',
430 'thumbnail': r're:^https?://.*\.jpg',
431 'comment_count': int,
432 'repost_count': int,
433 'like_count': int,
434 'tags': [],
435 'age_limit': 0,
436 },
437 }, {
438 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
439 'info_dict': {
440 'id': '1zqKVVlkqLaKB',
441 'ext': 'mp4',
442 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
443 'upload_date': '20160923',
444 'uploader_id': '1PmKqpJdOJQoY',
445 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
446 'timestamp': 1474613214,
447 'thumbnail': r're:^https?://.*\.jpg',
448 },
449 'add_ie': ['Periscope'],
450 }, {
451 # has mp4 formats via mobile API
452 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
453 'info_dict': {
454 'id': '852138619213144067',
455 'ext': 'mp4',
456 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
457 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
458 'uploader': 'عالم الأخبار',
459 'uploader_id': 'news_al3alm',
460 'duration': 277.4,
461 'timestamp': 1492000653,
462 'upload_date': '20170412',
463 },
464 'skip': 'Account suspended',
465 }, {
466 'url': 'https://twitter.com/i/web/status/910031516746514432',
467 'info_dict': {
468 'id': '910030238373089285',
469 'display_id': '910031516746514432',
470 'ext': 'mp4',
471 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
472 'thumbnail': r're:^https?://.*\.jpg',
473 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
474 'uploader': 'Préfet de Guadeloupe',
475 'uploader_id': 'Prefet971',
476 'duration': 47.48,
477 'timestamp': 1505803395,
478 'upload_date': '20170919',
479 'uploader_url': 'https://twitter.com/Prefet971',
480 'comment_count': int,
481 'repost_count': int,
482 'like_count': int,
483 'tags': ['Maria'],
484 'age_limit': 0,
485 },
486 'params': {
487 'skip_download': True, # requires ffmpeg
488 },
489 }, {
490 # card via api.twitter.com/1.1/videos/tweet/config
491 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
492 'info_dict': {
493 'id': '1001551417340022785',
494 'display_id': '1001551623938805763',
495 'ext': 'mp4',
496 'title': 're:.*?Shep is on a roll today.*?',
497 'thumbnail': r're:^https?://.*\.jpg',
498 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
499 'uploader': 'Lis Power',
500 'uploader_id': 'LisPower1',
501 'duration': 111.278,
502 'timestamp': 1527623489,
503 'upload_date': '20180529',
504 'uploader_url': 'https://twitter.com/LisPower1',
505 'comment_count': int,
506 'repost_count': int,
507 'like_count': int,
508 'tags': [],
509 'age_limit': 0,
510 },
511 'params': {
512 'skip_download': True, # requires ffmpeg
513 },
514 }, {
515 'url': 'https://twitter.com/foobar/status/1087791357756956680',
516 'info_dict': {
517 'id': '1087791272830607360',
518 'display_id': '1087791357756956680',
519 'ext': 'mp4',
520 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
521 'thumbnail': r're:^https?://.*\.jpg',
522 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
523 'uploader': 'Twitter',
524 'uploader_id': 'Twitter',
525 'duration': 61.567,
526 'timestamp': 1548184644,
527 'upload_date': '20190122',
528 'uploader_url': 'https://twitter.com/Twitter',
529 'comment_count': int,
530 'repost_count': int,
531 'like_count': int,
532 'tags': [],
533 'age_limit': 0,
534 },
535 }, {
536 # not available in Periscope
537 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
538 'info_dict': {
539 'id': '1vOGwqejwoWxB',
540 'ext': 'mp4',
541 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
542 'uploader': 'Vivi',
543 'uploader_id': '1eVjYOLGkGrQL',
544 'thumbnail': r're:^https?://.*\.jpg',
545 'tags': ['EduTECH2019'],
546 'view_count': int,
547 },
548 'add_ie': ['TwitterBroadcast'],
549 }, {
550 # unified card
551 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
552 'info_dict': {
553 'id': '1349774757969989634',
554 'display_id': '1349794411333394432',
555 'ext': 'mp4',
556 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
557 'thumbnail': r're:^https?://.*\.jpg',
558 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
559 'uploader': 'Brooklyn Nets',
560 'uploader_id': 'BrooklynNets',
561 'duration': 324.484,
562 'timestamp': 1610651040,
563 'upload_date': '20210114',
564 'uploader_url': 'https://twitter.com/BrooklynNets',
565 'comment_count': int,
566 'repost_count': int,
567 'like_count': int,
568 'tags': [],
569 'age_limit': 0,
570 },
571 'params': {
572 'skip_download': True,
573 },
574 }, {
575 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
576 'info_dict': {
577 'id': '1577855447914409984',
578 'display_id': '1577855540407197696',
579 'ext': 'mp4',
580 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
581 'description': 'md5:b9c3699335447391d11753ab21c70a74',
582 'upload_date': '20221006',
583 'uploader': 'oshtru',
584 'uploader_id': 'oshtru',
585 'uploader_url': 'https://twitter.com/oshtru',
586 'thumbnail': r're:^https?://.*\.jpg',
587 'duration': 30.03,
588 'timestamp': 1665025050,
589 'comment_count': int,
590 'repost_count': int,
591 'like_count': int,
592 'tags': [],
593 'age_limit': 0,
594 },
595 'params': {'skip_download': True},
596 }, {
597 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
598 'info_dict': {
599 'id': '1577719286659006464',
600 'title': 'Ultima | #\u0432\u029f\u043c - Test',
601 'description': 'Test https://t.co/Y3KEZD7Dad',
602 'uploader': 'Ultima | #\u0432\u029f\u043c',
603 'uploader_id': 'UltimaShadowX',
604 'uploader_url': 'https://twitter.com/UltimaShadowX',
605 'upload_date': '20221005',
606 'timestamp': 1664992565,
607 'comment_count': int,
608 'repost_count': int,
609 'like_count': int,
610 'tags': [],
611 'age_limit': 0,
612 },
613 'playlist_count': 4,
614 'params': {'skip_download': True},
615 }, {
616 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
617 'info_dict': {
618 'id': '1575559336759263233',
619 'display_id': '1575560063510810624',
620 'ext': 'mp4',
621 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
622 'thumbnail': r're:^https?://.*\.jpg',
623 'description': 'md5:95aea692fda36a12081b9629b02daa92',
624 'uploader': 'Max Olson',
625 'uploader_id': 'MesoMax919',
626 'uploader_url': 'https://twitter.com/MesoMax919',
627 'duration': 21.321,
628 'timestamp': 1664477766,
629 'upload_date': '20220929',
630 'comment_count': int,
631 'repost_count': int,
632 'like_count': int,
633 'tags': ['HurricaneIan'],
634 'age_limit': 0,
635 },
636 }, {
637 # Adult content, uses old token
638 # Fails if not logged in (GraphQL)
639 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
640 'info_dict': {
641 'id': '1575199163847000068',
642 'display_id': '1575199173472927762',
643 'ext': 'mp4',
644 'title': str,
645 'description': str,
646 'uploader': str,
647 'uploader_id': 'Rizdraws',
648 'uploader_url': 'https://twitter.com/Rizdraws',
649 'upload_date': '20220928',
650 'timestamp': 1664391723,
651 'thumbnail': r're:^https?://.+\.jpg',
652 'like_count': int,
653 'repost_count': int,
654 'comment_count': int,
655 'age_limit': 18,
656 'tags': []
657 },
658 'expected_warnings': ['404'],
659 }, {
660 # Description is missing one https://t.co url (GraphQL)
661 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
662 'playlist_mincount': 2,
663 'info_dict': {
664 'id': '1395079556562706435',
665 'title': str,
666 'tags': [],
667 'uploader': str,
668 'like_count': int,
669 'upload_date': '20210519',
670 'age_limit': 0,
671 'repost_count': int,
672 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
673 'uploader_id': 'Srirachachau',
674 'comment_count': int,
675 'uploader_url': 'https://twitter.com/Srirachachau',
676 'timestamp': 1621447860,
677 },
678 }, {
679 # Description is missing one https://t.co url (GraphQL)
680 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
681 'playlist_mincount': 2,
682 'info_dict': {
683 'id': '1578353380363501568',
684 'title': str,
685 'uploader_id': 'DavidToons_',
686 'repost_count': int,
687 'like_count': int,
688 'uploader': str,
689 'timestamp': 1665143744,
690 'uploader_url': 'https://twitter.com/DavidToons_',
691 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
692 'tags': [],
693 'comment_count': int,
694 'upload_date': '20221007',
695 'age_limit': 0,
696 },
697 }, {
698 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
699 'playlist_count': 2,
700 'info_dict': {
701 'id': '1578401165338976258',
702 'title': str,
703 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
704 'uploader': str,
705 'uploader_id': 'primevideouk',
706 'timestamp': 1665155137,
707 'upload_date': '20221007',
708 'age_limit': 0,
709 'uploader_url': 'https://twitter.com/primevideouk',
710 'comment_count': int,
711 'repost_count': int,
712 'like_count': int,
713 'tags': ['TheRingsOfPower'],
714 },
715 }, {
716 # Twitter Spaces
717 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
718 'info_dict': {
719 'id': '1lPJqmBeeNAJb',
720 'ext': 'm4a',
721 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
722 'uploader': r're:Monique Camarra.+?',
723 'uploader_id': 'MoniqueCamarra',
724 'live_status': 'was_live',
725 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
726 'timestamp': 1658407771464,
727 },
728 'add_ie': ['TwitterSpaces'],
729 'params': {'skip_download': 'm3u8'},
730 }, {
731 # URL specifies video number but --yes-playlist
732 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
733 'playlist_mincount': 2,
734 'info_dict': {
735 'id': '1600649710662213632',
736 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
737 'timestamp': 1670459604.0,
738 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
739 'comment_count': int,
740 'uploader_id': 'CTVJLaidlaw',
741 'repost_count': int,
742 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
743 'upload_date': '20221208',
744 'age_limit': 0,
745 'uploader': 'Jocelyn Laidlaw',
746 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
747 'like_count': int,
748 },
749 }, {
750 # URL specifies video number and --no-playlist
751 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
752 'info_dict': {
753 'id': '1600649511827013632',
754 'ext': 'mp4',
755 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
756 'thumbnail': r're:^https?://.+\.jpg',
757 'timestamp': 1670459604.0,
758 'uploader_id': 'CTVJLaidlaw',
759 'uploader': 'Jocelyn Laidlaw',
760 'repost_count': int,
761 'comment_count': int,
762 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
763 'duration': 102.226,
764 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
765 'display_id': '1600649710662213632',
766 'like_count': int,
767 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
768 'upload_date': '20221208',
769 'age_limit': 0,
770 },
771 'params': {'noplaylist': True},
772 }, {
773 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
774 # note the id different between extraction and url
775 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
776 'info_dict': {
777 'id': '1621117577354424321',
778 'display_id': '1621117700482416640',
779 'ext': 'mp4',
780 'title': '뽀 - 아 최우제 이동속도 봐',
781 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
782 'duration': 24.598,
783 'uploader': '뽀',
784 'uploader_id': 's2FAKER',
785 'uploader_url': 'https://twitter.com/s2FAKER',
786 'upload_date': '20230202',
787 'timestamp': 1675339553.0,
788 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
789 'age_limit': 18,
790 'tags': [],
791 'like_count': int,
792 'repost_count': int,
793 'comment_count': int,
794 },
795 }, {
796 # onion route
797 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
798 'only_matching': True,
799 }, {
800 # Twitch Clip Embed
801 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
802 'only_matching': True,
803 }, {
804 # promo_video_website card
805 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
806 'only_matching': True,
807 }, {
808 # promo_video_convo card
809 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
810 'only_matching': True,
811 }, {
812 # appplayer card
813 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
814 'only_matching': True,
815 }, {
816 # video_direct_message card
817 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
818 'only_matching': True,
819 }, {
820 # poll2choice_video card
821 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
822 'only_matching': True,
823 }, {
824 # poll3choice_video card
825 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
826 'only_matching': True,
827 }, {
828 # poll4choice_video card
829 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
830 'only_matching': True,
831 }]
832
833 def _graphql_to_legacy(self, data, twid):
834 result = traverse_obj(data, (
835 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
836 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
837 'tweet_results', 'result', ('tweet', None),
838 ), expected_type=dict, default={}, get_all=False)
839
840 if result.get('__typename') not in ('Tweet', None):
841 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
842
843 if 'tombstone' in result:
844 cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
845 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
846
847 status = result.get('legacy', {})
848 status.update(traverse_obj(result, {
849 'user': ('core', 'user_results', 'result', 'legacy'),
850 'card': ('card', 'legacy'),
851 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
852 }, expected_type=dict, default={}))
853
854 # extra transformation is needed since result does not match legacy format
855 binding_values = {
856 binding_value.get('key'): binding_value.get('value')
857 for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
858 }
859 if binding_values:
860 status['card']['binding_values'] = binding_values
861
862 return status
863
864 def _build_graphql_query(self, media_id):
865 return {
866 'variables': {
867 'focalTweetId': media_id,
868 'includePromotedContent': True,
869 'with_rux_injections': False,
870 'withBirdwatchNotes': True,
871 'withCommunity': True,
872 'withDownvotePerspective': False,
873 'withQuickPromoteEligibilityTweetFields': True,
874 'withReactionsMetadata': False,
875 'withReactionsPerspective': False,
876 'withSuperFollowsTweetFields': True,
877 'withSuperFollowsUserFields': True,
878 'withV2Timeline': True,
879 'withVoice': True,
880 },
881 'features': {
882 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
883 'interactive_text_enabled': True,
884 'responsive_web_edit_tweet_api_enabled': True,
885 'responsive_web_enhance_cards_enabled': True,
886 'responsive_web_graphql_timeline_navigation_enabled': False,
887 'responsive_web_text_conversations_enabled': False,
888 'responsive_web_uc_gql_enabled': True,
889 'standardized_nudges_misinfo': True,
890 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
891 'tweetypie_unmention_optimization_enabled': True,
892 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
893 'verified_phone_label_enabled': False,
894 'vibe_api_enabled': True,
895 },
896 }
897
898 def _real_extract(self, url):
899 twid, selected_index = self._match_valid_url(url).group('id', 'index')
900 if self.is_logged_in or self._configuration_arg('force_graphql'):
901 self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
902 result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
903 status = self._graphql_to_legacy(result, twid)
904
905 else:
906 status = self._call_api(f'statuses/show/{twid}.json', twid, {
907 'cards_platform': 'Web-12',
908 'include_cards': 1,
909 'include_reply_count': 1,
910 'include_user_entities': 0,
911 'tweet_mode': 'extended',
912 })
913
914 title = description = status['full_text'].replace('\n', ' ')
915 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
916 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
917 user = status.get('user') or {}
918 uploader = user.get('name')
919 if uploader:
920 title = f'{uploader} - {title}'
921 uploader_id = user.get('screen_name')
922
923 tags = []
924 for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
925 hashtag_text = hashtag.get('text')
926 if not hashtag_text:
927 continue
928 tags.append(hashtag_text)
929
930 info = {
931 'id': twid,
932 'title': title,
933 'description': description,
934 'uploader': uploader,
935 'timestamp': unified_timestamp(status.get('created_at')),
936 'uploader_id': uploader_id,
937 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
938 'like_count': int_or_none(status.get('favorite_count')),
939 'repost_count': int_or_none(status.get('retweet_count')),
940 'comment_count': int_or_none(status.get('reply_count')),
941 'age_limit': 18 if status.get('possibly_sensitive') else 0,
942 'tags': tags,
943 }
944
945 def extract_from_video_info(media):
946 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
947 self.write_debug(f'Extracting from video info: {media_id}')
948 video_info = media.get('video_info') or {}
949
950 formats = []
951 subtitles = {}
952 for variant in video_info.get('variants', []):
953 fmts, subs = self._extract_variant_formats(variant, twid)
954 subtitles = self._merge_subtitles(subtitles, subs)
955 formats.extend(fmts)
956
957 thumbnails = []
958 media_url = media.get('media_url_https') or media.get('media_url')
959 if media_url:
960 def add_thumbnail(name, size):
961 thumbnails.append({
962 'id': name,
963 'url': update_url_query(media_url, {'name': name}),
964 'width': int_or_none(size.get('w') or size.get('width')),
965 'height': int_or_none(size.get('h') or size.get('height')),
966 })
967 for name, size in media.get('sizes', {}).items():
968 add_thumbnail(name, size)
969 add_thumbnail('orig', media.get('original_info') or {})
970
971 return {
972 'id': media_id,
973 'formats': formats,
974 'subtitles': subtitles,
975 'thumbnails': thumbnails,
976 'duration': float_or_none(video_info.get('duration_millis'), 1000),
977 # The codec of http formats are unknown
978 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
979 }
980
981 def extract_from_card_info(card):
982 if not card:
983 return
984
985 self.write_debug(f'Extracting from card info: {card.get("url")}')
986 binding_values = card['binding_values']
987
988 def get_binding_value(k):
989 o = binding_values.get(k) or {}
990 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
991
992 card_name = card['name'].split(':')[-1]
993 if card_name == 'player':
994 yield {
995 '_type': 'url',
996 'url': get_binding_value('player_url'),
997 }
998 elif card_name == 'periscope_broadcast':
999 yield {
1000 '_type': 'url',
1001 'url': get_binding_value('url') or get_binding_value('player_url'),
1002 'ie_key': PeriscopeIE.ie_key(),
1003 }
1004 elif card_name == 'broadcast':
1005 yield {
1006 '_type': 'url',
1007 'url': get_binding_value('broadcast_url'),
1008 'ie_key': TwitterBroadcastIE.ie_key(),
1009 }
1010 elif card_name == 'audiospace':
1011 yield {
1012 '_type': 'url',
1013 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1014 'ie_key': TwitterSpacesIE.ie_key(),
1015 }
1016 elif card_name == 'summary':
1017 yield {
1018 '_type': 'url',
1019 'url': get_binding_value('card_url'),
1020 }
1021 elif card_name == 'unified_card':
1022 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1023 yield from map(extract_from_video_info, traverse_obj(
1024 unified_card, ('media_entities', ...), expected_type=dict))
1025 # amplify, promo_video_website, promo_video_convo, appplayer,
1026 # video_direct_message, poll2choice_video, poll3choice_video,
1027 # poll4choice_video, ...
1028 else:
1029 is_amplify = card_name == 'amplify'
1030 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1031 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1032 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1033
1034 thumbnails = []
1035 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1036 image = get_binding_value('player_image' + suffix) or {}
1037 image_url = image.get('url')
1038 if not image_url or '/player-placeholder' in image_url:
1039 continue
1040 thumbnails.append({
1041 'id': suffix[1:] if suffix else 'medium',
1042 'url': image_url,
1043 'width': int_or_none(image.get('width')),
1044 'height': int_or_none(image.get('height')),
1045 })
1046
1047 yield {
1048 'formats': formats,
1049 'subtitles': subtitles,
1050 'thumbnails': thumbnails,
1051 'duration': int_or_none(get_binding_value(
1052 'content_duration_seconds')),
1053 }
1054
1055 media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
1056 videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
1057 cards = extract_from_card_info(status.get('card'))
1058 entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)]
1059
1060 if not entries:
1061 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1062 if not expanded_url or expanded_url == url:
1063 raise ExtractorError('No video could be found in this tweet', expected=True)
1064
1065 return self.url_result(expanded_url, display_id=twid, **info)
1066
1067 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1068
1069 if not self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1070 index = int(selected_index) - 1
1071 if index >= len(entries):
1072 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1073
1074 return entries[index]
1075
1076 if len(entries) == 1:
1077 return entries[0]
1078
1079 for index, entry in enumerate(entries, 1):
1080 entry['title'] += f' #{index}'
1081
1082 return self.playlist_result(entries, **info)
1083
1084
1085 class TwitterAmplifyIE(TwitterBaseIE):
1086 IE_NAME = 'twitter:amplify'
1087 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1088
1089 _TEST = {
1090 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1091 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1092 'info_dict': {
1093 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1094 'ext': 'mp4',
1095 'title': 'Twitter Video',
1096 'thumbnail': 're:^https?://.*',
1097 },
1098 'params': {'format': '[protocol=https]'},
1099 }
1100
1101 def _real_extract(self, url):
1102 video_id = self._match_id(url)
1103 webpage = self._download_webpage(url, video_id)
1104
1105 vmap_url = self._html_search_meta(
1106 'twitter:amplify:vmap', webpage, 'vmap url')
1107 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1108
1109 thumbnails = []
1110 thumbnail = self._html_search_meta(
1111 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1112
1113 def _find_dimension(target):
1114 w = int_or_none(self._html_search_meta(
1115 'twitter:%s:width' % target, webpage, fatal=False))
1116 h = int_or_none(self._html_search_meta(
1117 'twitter:%s:height' % target, webpage, fatal=False))
1118 return w, h
1119
1120 if thumbnail:
1121 thumbnail_w, thumbnail_h = _find_dimension('image')
1122 thumbnails.append({
1123 'url': thumbnail,
1124 'width': thumbnail_w,
1125 'height': thumbnail_h,
1126 })
1127
1128 video_w, video_h = _find_dimension('player')
1129 formats[0].update({
1130 'width': video_w,
1131 'height': video_h,
1132 })
1133
1134 return {
1135 'id': video_id,
1136 'title': 'Twitter Video',
1137 'formats': formats,
1138 'thumbnails': thumbnails,
1139 }
1140
1141
1142 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1143 IE_NAME = 'twitter:broadcast'
1144 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1145
1146 _TEST = {
1147 # untitled Periscope video
1148 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1149 'info_dict': {
1150 'id': '1yNGaQLWpejGj',
1151 'ext': 'mp4',
1152 'title': 'Andrea May Sahouri - Periscope Broadcast',
1153 'uploader': 'Andrea May Sahouri',
1154 'uploader_id': '1PXEdBZWpGwKe',
1155 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1156 'view_count': int,
1157 },
1158 }
1159
1160 def _real_extract(self, url):
1161 broadcast_id = self._match_id(url)
1162 broadcast = self._call_api(
1163 'broadcasts/show.json', broadcast_id,
1164 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1165 info = self._parse_broadcast_data(broadcast, broadcast_id)
1166 media_key = broadcast['media_key']
1167 source = self._call_api(
1168 f'live_video_stream/status/{media_key}', media_key)['source']
1169 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1170 if '/live_video_stream/geoblocked/' in m3u8_url:
1171 self.raise_geo_restricted()
1172 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1173 m3u8_url).query).get('type', [None])[0]
1174 state, width, height = self._extract_common_format_info(broadcast)
1175 info['formats'] = self._extract_pscp_m3u8_formats(
1176 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1177 return info
1178
1179
1180 class TwitterSpacesIE(TwitterBaseIE):
1181 IE_NAME = 'twitter:spaces'
1182 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1183
1184 _TESTS = [{
1185 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1186 'info_dict': {
1187 'id': '1RDxlgyvNXzJL',
1188 'ext': 'm4a',
1189 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1190 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1191 'uploader': r're:Lucio Di Gaetano.*?',
1192 'uploader_id': 'luciodigaetano',
1193 'live_status': 'was_live',
1194 'timestamp': 1659877956397,
1195 },
1196 'params': {'skip_download': 'm3u8'},
1197 }]
1198
1199 SPACE_STATUS = {
1200 'notstarted': 'is_upcoming',
1201 'ended': 'was_live',
1202 'running': 'is_live',
1203 'timedout': 'post_live',
1204 }
1205
1206 def _build_graphql_query(self, space_id):
1207 return {
1208 'variables': {
1209 'id': space_id,
1210 'isMetatagsQuery': True,
1211 'withDownvotePerspective': False,
1212 'withReactionsMetadata': False,
1213 'withReactionsPerspective': False,
1214 'withReplays': True,
1215 'withSuperFollowsUserFields': True,
1216 'withSuperFollowsTweetFields': True,
1217 },
1218 'features': {
1219 'dont_mention_me_view_api_enabled': True,
1220 'interactive_text_enabled': True,
1221 'responsive_web_edit_tweet_api_enabled': True,
1222 'responsive_web_enhance_cards_enabled': True,
1223 'responsive_web_uc_gql_enabled': True,
1224 'spaces_2022_h2_clipping': True,
1225 'spaces_2022_h2_spaces_communities': False,
1226 'standardized_nudges_misinfo': True,
1227 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1228 'vibe_api_enabled': True,
1229 },
1230 }
1231
1232 def _real_extract(self, url):
1233 space_id = self._match_id(url)
1234 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1235 if not space_data:
1236 raise ExtractorError('Twitter Space not found', expected=True)
1237
1238 metadata = space_data['metadata']
1239 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1240
1241 formats = []
1242 if live_status == 'is_upcoming':
1243 self.raise_no_formats('Twitter Space not started yet', expected=True)
1244 elif live_status == 'post_live':
1245 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1246 else:
1247 source = self._call_api(
1248 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1249
1250 # XXX: Native downloader does not work
1251 formats = self._extract_m3u8_formats(
1252 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1253 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1254 headers={'Referer': 'https://twitter.com/'})
1255 for fmt in formats:
1256 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1257
1258 participants = ', '.join(traverse_obj(
1259 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1260 return {
1261 'id': space_id,
1262 'title': metadata.get('title'),
1263 'description': f'Twitter Space participated by {participants}',
1264 'uploader': traverse_obj(
1265 metadata, ('creator_results', 'result', 'legacy', 'name')),
1266 'uploader_id': traverse_obj(
1267 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1268 'live_status': live_status,
1269 'timestamp': metadata.get('created_at'),
1270 'formats': formats,
1271 }
1272
1273
1274 class TwitterShortenerIE(TwitterBaseIE):
1275 IE_NAME = 'twitter:shortener'
1276 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1277 _BASE_URL = 'https://t.co/'
1278
1279 def _real_extract(self, url):
1280 mobj = self._match_valid_url(url)
1281 eid, id = mobj.group('eid', 'id')
1282 if eid:
1283 id = eid
1284 url = self._BASE_URL + id
1285 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1286 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1287 if new_url.startswith(__UNSAFE_LINK):
1288 new_url = new_url.replace(__UNSAFE_LINK, "")
1289 return self.url_result(new_url)