]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[extractor/generic] Handle basic-auth when checking redirects
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 import json
2 import re
3 import urllib.error
4
5 from .common import InfoExtractor
6 from .periscope import PeriscopeBaseIE, PeriscopeIE
7 from ..compat import functools # isort: split
8 from ..compat import (
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12 )
13 from ..utils import (
14 ExtractorError,
15 dict_get,
16 float_or_none,
17 format_field,
18 int_or_none,
19 make_archive_id,
20 str_or_none,
21 strip_or_none,
22 traverse_obj,
23 try_call,
24 try_get,
25 unified_timestamp,
26 update_url_query,
27 url_or_none,
28 xpath_text,
29 )
30
31
32 class TwitterBaseIE(InfoExtractor):
33 _API_BASE = 'https://api.twitter.com/1.1/'
34 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
35 _TOKENS = {
36 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
37 'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
38 }
39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
40
41 def _extract_variant_formats(self, variant, video_id):
42 variant_url = variant.get('url')
43 if not variant_url:
44 return [], {}
45 elif '.m3u8' in variant_url:
46 return self._extract_m3u8_formats_and_subtitles(
47 variant_url, video_id, 'mp4', 'm3u8_native',
48 m3u8_id='hls', fatal=False)
49 else:
50 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
51 f = {
52 'url': variant_url,
53 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
54 'tbr': tbr,
55 }
56 self._search_dimensions_in_video_url(f, variant_url)
57 return [f], {}
58
59 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
60 vmap_url = url_or_none(vmap_url)
61 if not vmap_url:
62 return [], {}
63 vmap_data = self._download_xml(vmap_url, video_id)
64 formats = []
65 subtitles = {}
66 urls = []
67 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
68 video_variant.attrib['url'] = compat_urllib_parse_unquote(
69 video_variant.attrib['url'])
70 urls.append(video_variant.attrib['url'])
71 fmts, subs = self._extract_variant_formats(
72 video_variant.attrib, video_id)
73 formats.extend(fmts)
74 subtitles = self._merge_subtitles(subtitles, subs)
75 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
76 if video_url not in urls:
77 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
78 formats.extend(fmts)
79 subtitles = self._merge_subtitles(subtitles, subs)
80 return formats, subtitles
81
82 @staticmethod
83 def _search_dimensions_in_video_url(a_format, video_url):
84 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
85 if m:
86 a_format.update({
87 'width': int(m.group('width')),
88 'height': int(m.group('height')),
89 })
90
91 @functools.cached_property
92 def is_logged_in(self):
93 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
94
95 def _call_api(self, path, video_id, query={}, graphql=False):
96 cookies = self._get_cookies(self._API_BASE)
97 headers = {}
98
99 csrf_cookie = cookies.get('ct0')
100 if csrf_cookie:
101 headers['x-csrf-token'] = csrf_cookie.value
102
103 if self.is_logged_in:
104 headers.update({
105 'x-twitter-auth-type': 'OAuth2Session',
106 'x-twitter-client-language': 'en',
107 'x-twitter-active-user': 'yes',
108 })
109
110 last_error = None
111 for bearer_token in self._TOKENS:
112 for first_attempt in (True, False):
113 headers['Authorization'] = f'Bearer {bearer_token}'
114
115 if not self.is_logged_in:
116 if not self._TOKENS[bearer_token]:
117 headers.pop('x-guest-token', None)
118 guest_token_response = self._download_json(
119 self._API_BASE + 'guest/activate.json', video_id,
120 'Downloading guest token', data=b'', headers=headers)
121
122 self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
123 if not self._TOKENS[bearer_token]:
124 raise ExtractorError('Could not retrieve guest token')
125
126 headers['x-guest-token'] = self._TOKENS[bearer_token]
127
128 try:
129 allowed_status = {400, 403, 404} if graphql else {403}
130 result = self._download_json(
131 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
132 video_id, headers=headers, query=query, expected_status=allowed_status)
133
134 except ExtractorError as e:
135 if last_error:
136 raise last_error
137
138 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
139 raise
140
141 last_error = e
142 self.report_warning(
143 'Twitter API gave 404 response, retrying with deprecated auth token. '
144 'Only one media item can be extracted')
145 break # continue outer loop with next bearer_token
146
147 if result.get('errors'):
148 errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
149 if first_attempt and any('bad guest token' in error.lower() for error in errors):
150 self.to_screen('Guest token has expired. Refreshing guest token')
151 self._TOKENS[bearer_token] = None
152 continue
153
154 error_message = ', '.join(set(errors)) or 'Unknown error'
155 raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
156
157 return result
158
159 def _build_graphql_query(self, media_id):
160 raise NotImplementedError('Method must be implemented to support GraphQL')
161
162 def _call_graphql_api(self, endpoint, media_id):
163 data = self._build_graphql_query(media_id)
164 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
165 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
166
167
168 class TwitterCardIE(InfoExtractor):
169 IE_NAME = 'twitter:card'
170 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
171 _TESTS = [
172 {
173 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
174 # MD5 checksums are different in different places
175 'info_dict': {
176 'id': '560070131976392705',
177 'ext': 'mp4',
178 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
179 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
180 'uploader': 'Twitter',
181 'uploader_id': 'Twitter',
182 'thumbnail': r're:^https?://.*\.jpg',
183 'duration': 30.033,
184 'timestamp': 1422366112,
185 'upload_date': '20150127',
186 'age_limit': 0,
187 'comment_count': int,
188 'tags': [],
189 'repost_count': int,
190 'like_count': int,
191 'display_id': '560070183650213889',
192 'uploader_url': 'https://twitter.com/Twitter',
193 },
194 },
195 {
196 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
197 'md5': '7137eca597f72b9abbe61e5ae0161399',
198 'info_dict': {
199 'id': '623160978427936768',
200 'ext': 'mp4',
201 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
202 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
203 'uploader': 'NASA',
204 'uploader_id': 'NASA',
205 'timestamp': 1437408129,
206 'upload_date': '20150720',
207 'uploader_url': 'https://twitter.com/NASA',
208 'age_limit': 0,
209 'comment_count': int,
210 'like_count': int,
211 'repost_count': int,
212 'tags': ['PlutoFlyby'],
213 },
214 'params': {'format': '[protocol=https]'}
215 },
216 {
217 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
218 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
219 'info_dict': {
220 'id': 'dq4Oj5quskI',
221 'ext': 'mp4',
222 'title': 'Ubuntu 11.10 Overview',
223 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
224 'upload_date': '20111013',
225 'uploader': 'OMG! UBUNTU!',
226 'uploader_id': 'omgubuntu',
227 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
228 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
229 'channel_follower_count': int,
230 'chapters': 'count:8',
231 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
232 'duration': 138,
233 'categories': ['Film & Animation'],
234 'age_limit': 0,
235 'comment_count': int,
236 'availability': 'public',
237 'like_count': int,
238 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
239 'view_count': int,
240 'tags': 'count:12',
241 'channel': 'OMG! UBUNTU!',
242 'playable_in_embed': True,
243 },
244 'add_ie': ['Youtube'],
245 },
246 {
247 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
248 'info_dict': {
249 'id': 'iBb2x00UVlv',
250 'ext': 'mp4',
251 'upload_date': '20151113',
252 'uploader_id': '1189339351084113920',
253 'uploader': 'ArsenalTerje',
254 'title': 'Vine by ArsenalTerje',
255 'timestamp': 1447451307,
256 'alt_title': 'Vine by ArsenalTerje',
257 'comment_count': int,
258 'like_count': int,
259 'thumbnail': r're:^https?://[^?#]+\.jpg',
260 'view_count': int,
261 'repost_count': int,
262 },
263 'add_ie': ['Vine'],
264 'params': {'skip_download': 'm3u8'},
265 },
266 {
267 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
268 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
269 'info_dict': {
270 'id': '705235433198714880',
271 'ext': 'mp4',
272 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
273 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
274 'uploader': 'Brent Yarina',
275 'uploader_id': 'BTNBrentYarina',
276 'timestamp': 1456976204,
277 'upload_date': '20160303',
278 },
279 'skip': 'This content is no longer available.',
280 },
281 {
282 'url': 'https://twitter.com/i/videos/752274308186120192',
283 'only_matching': True,
284 },
285 ]
286
287 def _real_extract(self, url):
288 status_id = self._match_id(url)
289 return self.url_result(
290 'https://twitter.com/statuses/' + status_id,
291 TwitterIE.ie_key(), status_id)
292
293
294 class TwitterIE(TwitterBaseIE):
295 IE_NAME = 'twitter'
296 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
297
298 _TESTS = [{
299 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
300 'info_dict': {
301 'id': '643211870443208704',
302 'display_id': '643211948184596480',
303 'ext': 'mp4',
304 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
305 'thumbnail': r're:^https?://.*\.jpg',
306 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
307 'uploader': 'FREE THE NIPPLE',
308 'uploader_id': 'freethenipple',
309 'duration': 12.922,
310 'timestamp': 1442188653,
311 'upload_date': '20150913',
312 'uploader_url': 'https://twitter.com/freethenipple',
313 'comment_count': int,
314 'repost_count': int,
315 'like_count': int,
316 'tags': [],
317 'age_limit': 18,
318 },
319 }, {
320 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
321 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
322 'info_dict': {
323 'id': '657991469417025536',
324 'ext': 'mp4',
325 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
326 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
327 'thumbnail': r're:^https?://.*\.png',
328 'uploader': 'Gifs',
329 'uploader_id': 'giphz',
330 },
331 'expected_warnings': ['height', 'width'],
332 'skip': 'Account suspended',
333 }, {
334 'url': 'https://twitter.com/starwars/status/665052190608723968',
335 'info_dict': {
336 'id': '665052190608723968',
337 'display_id': '665052190608723968',
338 'ext': 'mp4',
339 'title': r're:Star Wars.*A new beginning is coming December 18.*',
340 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
341 'uploader_id': 'starwars',
342 'uploader': r're:Star Wars.*',
343 'timestamp': 1447395772,
344 'upload_date': '20151113',
345 'uploader_url': 'https://twitter.com/starwars',
346 'comment_count': int,
347 'repost_count': int,
348 'like_count': int,
349 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
350 'age_limit': 0,
351 },
352 }, {
353 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
354 'info_dict': {
355 'id': '705235433198714880',
356 'ext': 'mp4',
357 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
358 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
359 'uploader_id': 'BTNBrentYarina',
360 'uploader': 'Brent Yarina',
361 'timestamp': 1456976204,
362 'upload_date': '20160303',
363 'uploader_url': 'https://twitter.com/BTNBrentYarina',
364 'comment_count': int,
365 'repost_count': int,
366 'like_count': int,
367 'tags': [],
368 'age_limit': 0,
369 },
370 'params': {
371 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
372 # Test case of TwitterCardIE
373 'skip_download': True,
374 },
375 'skip': 'Dead external link',
376 }, {
377 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
378 'info_dict': {
379 'id': '700207414000242688',
380 'display_id': '700207533655363584',
381 'ext': 'mp4',
382 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
383 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
384 'thumbnail': r're:^https?://.*\.jpg',
385 'uploader': 'jaydin donte geer',
386 'uploader_id': 'jaydingeer',
387 'duration': 30.0,
388 'timestamp': 1455777459,
389 'upload_date': '20160218',
390 'uploader_url': 'https://twitter.com/jaydingeer',
391 'comment_count': int,
392 'repost_count': int,
393 'like_count': int,
394 'tags': ['Damndaniel'],
395 'age_limit': 0,
396 },
397 }, {
398 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
399 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
400 'info_dict': {
401 'id': 'MIOxnrUteUd',
402 'ext': 'mp4',
403 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
404 'uploader': 'TAKUMA',
405 'uploader_id': '1004126642786242560',
406 'timestamp': 1402826626,
407 'upload_date': '20140615',
408 'thumbnail': r're:^https?://.*\.jpg',
409 'alt_title': 'Vine by TAKUMA',
410 'comment_count': int,
411 'repost_count': int,
412 'like_count': int,
413 'view_count': int,
414 },
415 'add_ie': ['Vine'],
416 }, {
417 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
418 'info_dict': {
419 'id': '717462543795523584',
420 'display_id': '719944021058060289',
421 'ext': 'mp4',
422 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
423 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
424 'uploader_id': 'CaptainAmerica',
425 'uploader': 'Captain America',
426 'duration': 3.17,
427 'timestamp': 1460483005,
428 'upload_date': '20160412',
429 'uploader_url': 'https://twitter.com/CaptainAmerica',
430 'thumbnail': r're:^https?://.*\.jpg',
431 'comment_count': int,
432 'repost_count': int,
433 'like_count': int,
434 'tags': [],
435 'age_limit': 0,
436 },
437 }, {
438 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
439 'info_dict': {
440 'id': '1zqKVVlkqLaKB',
441 'ext': 'mp4',
442 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
443 'upload_date': '20160923',
444 'uploader_id': '1PmKqpJdOJQoY',
445 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
446 'timestamp': 1474613214,
447 'thumbnail': r're:^https?://.*\.jpg',
448 },
449 'add_ie': ['Periscope'],
450 }, {
451 # has mp4 formats via mobile API
452 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
453 'info_dict': {
454 'id': '852138619213144067',
455 'ext': 'mp4',
456 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
457 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
458 'uploader': 'عالم الأخبار',
459 'uploader_id': 'news_al3alm',
460 'duration': 277.4,
461 'timestamp': 1492000653,
462 'upload_date': '20170412',
463 },
464 'skip': 'Account suspended',
465 }, {
466 'url': 'https://twitter.com/i/web/status/910031516746514432',
467 'info_dict': {
468 'id': '910030238373089285',
469 'display_id': '910031516746514432',
470 'ext': 'mp4',
471 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
472 'thumbnail': r're:^https?://.*\.jpg',
473 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
474 'uploader': 'Préfet de Guadeloupe',
475 'uploader_id': 'Prefet971',
476 'duration': 47.48,
477 'timestamp': 1505803395,
478 'upload_date': '20170919',
479 'uploader_url': 'https://twitter.com/Prefet971',
480 'comment_count': int,
481 'repost_count': int,
482 'like_count': int,
483 'tags': ['Maria'],
484 'age_limit': 0,
485 },
486 'params': {
487 'skip_download': True, # requires ffmpeg
488 },
489 }, {
490 # card via api.twitter.com/1.1/videos/tweet/config
491 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
492 'info_dict': {
493 'id': '1001551417340022785',
494 'display_id': '1001551623938805763',
495 'ext': 'mp4',
496 'title': 're:.*?Shep is on a roll today.*?',
497 'thumbnail': r're:^https?://.*\.jpg',
498 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
499 'uploader': 'Lis Power',
500 'uploader_id': 'LisPower1',
501 'duration': 111.278,
502 'timestamp': 1527623489,
503 'upload_date': '20180529',
504 'uploader_url': 'https://twitter.com/LisPower1',
505 'comment_count': int,
506 'repost_count': int,
507 'like_count': int,
508 'tags': [],
509 'age_limit': 0,
510 },
511 'params': {
512 'skip_download': True, # requires ffmpeg
513 },
514 }, {
515 'url': 'https://twitter.com/foobar/status/1087791357756956680',
516 'info_dict': {
517 'id': '1087791272830607360',
518 'display_id': '1087791357756956680',
519 'ext': 'mp4',
520 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
521 'thumbnail': r're:^https?://.*\.jpg',
522 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
523 'uploader': 'Twitter',
524 'uploader_id': 'Twitter',
525 'duration': 61.567,
526 'timestamp': 1548184644,
527 'upload_date': '20190122',
528 'uploader_url': 'https://twitter.com/Twitter',
529 'comment_count': int,
530 'repost_count': int,
531 'like_count': int,
532 'tags': [],
533 'age_limit': 0,
534 },
535 }, {
536 # not available in Periscope
537 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
538 'info_dict': {
539 'id': '1vOGwqejwoWxB',
540 'ext': 'mp4',
541 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
542 'uploader': 'Vivi',
543 'uploader_id': '1eVjYOLGkGrQL',
544 'thumbnail': r're:^https?://.*\.jpg',
545 'tags': ['EduTECH2019'],
546 'view_count': int,
547 },
548 'add_ie': ['TwitterBroadcast'],
549 }, {
550 # unified card
551 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
552 'info_dict': {
553 'id': '1349774757969989634',
554 'display_id': '1349794411333394432',
555 'ext': 'mp4',
556 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
557 'thumbnail': r're:^https?://.*\.jpg',
558 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
559 'uploader': 'Brooklyn Nets',
560 'uploader_id': 'BrooklynNets',
561 'duration': 324.484,
562 'timestamp': 1610651040,
563 'upload_date': '20210114',
564 'uploader_url': 'https://twitter.com/BrooklynNets',
565 'comment_count': int,
566 'repost_count': int,
567 'like_count': int,
568 'tags': [],
569 'age_limit': 0,
570 },
571 'params': {
572 'skip_download': True,
573 },
574 }, {
575 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
576 'info_dict': {
577 'id': '1577855447914409984',
578 'display_id': '1577855540407197696',
579 'ext': 'mp4',
580 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
581 'description': 'md5:b9c3699335447391d11753ab21c70a74',
582 'upload_date': '20221006',
583 'uploader': 'oshtru',
584 'uploader_id': 'oshtru',
585 'uploader_url': 'https://twitter.com/oshtru',
586 'thumbnail': r're:^https?://.*\.jpg',
587 'duration': 30.03,
588 'timestamp': 1665025050,
589 'comment_count': int,
590 'repost_count': int,
591 'like_count': int,
592 'tags': [],
593 'age_limit': 0,
594 },
595 'params': {'skip_download': True},
596 }, {
597 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
598 'info_dict': {
599 'id': '1577719286659006464',
600 'title': 'Ultima | #\u0432\u029f\u043c - Test',
601 'description': 'Test https://t.co/Y3KEZD7Dad',
602 'uploader': 'Ultima | #\u0432\u029f\u043c',
603 'uploader_id': 'UltimaShadowX',
604 'uploader_url': 'https://twitter.com/UltimaShadowX',
605 'upload_date': '20221005',
606 'timestamp': 1664992565,
607 'comment_count': int,
608 'repost_count': int,
609 'like_count': int,
610 'tags': [],
611 'age_limit': 0,
612 },
613 'playlist_count': 4,
614 'params': {'skip_download': True},
615 }, {
616 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
617 'info_dict': {
618 'id': '1575559336759263233',
619 'display_id': '1575560063510810624',
620 'ext': 'mp4',
621 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
622 'thumbnail': r're:^https?://.*\.jpg',
623 'description': 'md5:95aea692fda36a12081b9629b02daa92',
624 'uploader': 'Max Olson',
625 'uploader_id': 'MesoMax919',
626 'uploader_url': 'https://twitter.com/MesoMax919',
627 'duration': 21.321,
628 'timestamp': 1664477766,
629 'upload_date': '20220929',
630 'comment_count': int,
631 'repost_count': int,
632 'like_count': int,
633 'tags': ['HurricaneIan'],
634 'age_limit': 0,
635 },
636 }, {
637 # Adult content, uses old token
638 # Fails if not logged in (GraphQL)
639 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
640 'info_dict': {
641 'id': '1575199163847000068',
642 'display_id': '1575199173472927762',
643 'ext': 'mp4',
644 'title': str,
645 'description': str,
646 'uploader': str,
647 'uploader_id': 'Rizdraws',
648 'uploader_url': 'https://twitter.com/Rizdraws',
649 'upload_date': '20220928',
650 'timestamp': 1664391723,
651 'thumbnail': r're:^https?://.+\.jpg',
652 'like_count': int,
653 'repost_count': int,
654 'comment_count': int,
655 'age_limit': 18,
656 'tags': []
657 },
658 'expected_warnings': ['404'],
659 }, {
660 # Description is missing one https://t.co url (GraphQL)
661 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
662 'playlist_mincount': 2,
663 'info_dict': {
664 'id': '1395079556562706435',
665 'title': str,
666 'tags': [],
667 'uploader': str,
668 'like_count': int,
669 'upload_date': '20210519',
670 'age_limit': 0,
671 'repost_count': int,
672 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
673 'uploader_id': 'Srirachachau',
674 'comment_count': int,
675 'uploader_url': 'https://twitter.com/Srirachachau',
676 'timestamp': 1621447860,
677 },
678 }, {
679 # Description is missing one https://t.co url (GraphQL)
680 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
681 'playlist_mincount': 2,
682 'info_dict': {
683 'id': '1578353380363501568',
684 'title': str,
685 'uploader_id': 'DavidToons_',
686 'repost_count': int,
687 'like_count': int,
688 'uploader': str,
689 'timestamp': 1665143744,
690 'uploader_url': 'https://twitter.com/DavidToons_',
691 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
692 'tags': [],
693 'comment_count': int,
694 'upload_date': '20221007',
695 'age_limit': 0,
696 },
697 }, {
698 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
699 'playlist_count': 2,
700 'info_dict': {
701 'id': '1578401165338976258',
702 'title': str,
703 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
704 'uploader': str,
705 'uploader_id': 'primevideouk',
706 'timestamp': 1665155137,
707 'upload_date': '20221007',
708 'age_limit': 0,
709 'uploader_url': 'https://twitter.com/primevideouk',
710 'comment_count': int,
711 'repost_count': int,
712 'like_count': int,
713 'tags': ['TheRingsOfPower'],
714 },
715 }, {
716 # Twitter Spaces
717 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
718 'info_dict': {
719 'id': '1lPJqmBeeNAJb',
720 'ext': 'm4a',
721 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
722 'uploader': r're:Monique Camarra.+?',
723 'uploader_id': 'MoniqueCamarra',
724 'live_status': 'was_live',
725 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
726 'timestamp': 1658407771464,
727 },
728 'add_ie': ['TwitterSpaces'],
729 'params': {'skip_download': 'm3u8'},
730 }, {
731 # URL specifies video number but --yes-playlist
732 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
733 'playlist_mincount': 2,
734 'info_dict': {
735 'id': '1600649710662213632',
736 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
737 'timestamp': 1670459604.0,
738 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
739 'comment_count': int,
740 'uploader_id': 'CTVJLaidlaw',
741 'repost_count': int,
742 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
743 'upload_date': '20221208',
744 'age_limit': 0,
745 'uploader': 'Jocelyn Laidlaw',
746 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
747 'like_count': int,
748 },
749 }, {
750 # URL specifies video number and --no-playlist
751 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
752 'info_dict': {
753 'id': '1600649511827013632',
754 'ext': 'mp4',
755 'title': 'md5:dac4f4d4c591fcc4e88a253eba472dc3',
756 'thumbnail': r're:^https?://.+\.jpg',
757 'timestamp': 1670459604.0,
758 'uploader_id': 'CTVJLaidlaw',
759 'uploader': 'Jocelyn Laidlaw',
760 'repost_count': int,
761 'comment_count': int,
762 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
763 'duration': 102.226,
764 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
765 'display_id': '1600649710662213632',
766 'like_count': int,
767 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
768 'upload_date': '20221208',
769 'age_limit': 0,
770 },
771 'params': {'noplaylist': True},
772 }, {
773 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
774 # note the id different between extraction and url
775 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
776 'info_dict': {
777 'id': '1621117577354424321',
778 'display_id': '1621117700482416640',
779 'ext': 'mp4',
780 'title': '뽀 - 아 최우제 이동속도 봐',
781 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
782 'duration': 24.598,
783 'uploader': '뽀',
784 'uploader_id': 's2FAKER',
785 'uploader_url': 'https://twitter.com/s2FAKER',
786 'upload_date': '20230202',
787 'timestamp': 1675339553.0,
788 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
789 'age_limit': 18,
790 'tags': [],
791 'like_count': int,
792 'repost_count': int,
793 'comment_count': int,
794 },
795 }, {
796 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
797 'info_dict': {
798 'id': '1599108643743473680',
799 'display_id': '1599108751385972737',
800 'ext': 'mp4',
801 'title': '\u06ea - \U0001F48B',
802 'uploader_url': 'https://twitter.com/hlo_again',
803 'like_count': int,
804 'uploader_id': 'hlo_again',
805 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
806 'repost_count': int,
807 'duration': 9.531,
808 'comment_count': int,
809 'upload_date': '20221203',
810 'age_limit': 0,
811 'timestamp': 1670092210.0,
812 'tags': [],
813 'uploader': '\u06ea',
814 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
815 },
816 'params': {'noplaylist': True},
817 }, {
818 # Media view count is GraphQL only, force in test
819 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
820 'info_dict': {
821 'id': '1600009362759733248',
822 'display_id': '1600009574919962625',
823 'ext': 'mp4',
824 'uploader_url': 'https://twitter.com/MunTheShinobi',
825 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
826 'view_count': int,
827 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
828 'age_limit': 0,
829 'uploader': 'Mün The Shinobi | BlaqBoi\'s Therapist',
830 'repost_count': int,
831 'upload_date': '20221206',
832 'title': 'Mün The Shinobi | BlaqBoi\'s Therapist - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
833 'comment_count': int,
834 'like_count': int,
835 'tags': [],
836 'uploader_id': 'MunTheShinobi',
837 'duration': 139.987,
838 'timestamp': 1670306984.0,
839 },
840 'params': {'extractor_args': {'twitter': {'force_graphql': ['']}}},
841 }, {
842 # onion route
843 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
844 'only_matching': True,
845 }, {
846 # Twitch Clip Embed
847 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
848 'only_matching': True,
849 }, {
850 # promo_video_website card
851 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
852 'only_matching': True,
853 }, {
854 # promo_video_convo card
855 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
856 'only_matching': True,
857 }, {
858 # appplayer card
859 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
860 'only_matching': True,
861 }, {
862 # video_direct_message card
863 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
864 'only_matching': True,
865 }, {
866 # poll2choice_video card
867 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
868 'only_matching': True,
869 }, {
870 # poll3choice_video card
871 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
872 'only_matching': True,
873 }, {
874 # poll4choice_video card
875 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
876 'only_matching': True,
877 }]
878
879 def _graphql_to_legacy(self, data, twid):
880 result = traverse_obj(data, (
881 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
882 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
883 'tweet_results', 'result', ('tweet', None),
884 ), expected_type=dict, default={}, get_all=False)
885
886 if result.get('__typename') not in ('Tweet', None):
887 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
888
889 if 'tombstone' in result:
890 cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
891 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
892
893 status = result.get('legacy', {})
894 status.update(traverse_obj(result, {
895 'user': ('core', 'user_results', 'result', 'legacy'),
896 'card': ('card', 'legacy'),
897 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
898 }, expected_type=dict, default={}))
899
900 # extra transformation is needed since result does not match legacy format
901 binding_values = {
902 binding_value.get('key'): binding_value.get('value')
903 for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
904 }
905 if binding_values:
906 status['card']['binding_values'] = binding_values
907
908 return status
909
910 def _build_graphql_query(self, media_id):
911 return {
912 'variables': {
913 'focalTweetId': media_id,
914 'includePromotedContent': True,
915 'with_rux_injections': False,
916 'withBirdwatchNotes': True,
917 'withCommunity': True,
918 'withDownvotePerspective': False,
919 'withQuickPromoteEligibilityTweetFields': True,
920 'withReactionsMetadata': False,
921 'withReactionsPerspective': False,
922 'withSuperFollowsTweetFields': True,
923 'withSuperFollowsUserFields': True,
924 'withV2Timeline': True,
925 'withVoice': True,
926 },
927 'features': {
928 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
929 'interactive_text_enabled': True,
930 'responsive_web_edit_tweet_api_enabled': True,
931 'responsive_web_enhance_cards_enabled': True,
932 'responsive_web_graphql_timeline_navigation_enabled': False,
933 'responsive_web_text_conversations_enabled': False,
934 'responsive_web_uc_gql_enabled': True,
935 'standardized_nudges_misinfo': True,
936 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
937 'tweetypie_unmention_optimization_enabled': True,
938 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
939 'verified_phone_label_enabled': False,
940 'vibe_api_enabled': True,
941 },
942 }
943
944 def _real_extract(self, url):
945 twid, selected_index = self._match_valid_url(url).group('id', 'index')
946 if self.is_logged_in or self._configuration_arg('force_graphql'):
947 self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
948 result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
949 status = self._graphql_to_legacy(result, twid)
950
951 else:
952 status = self._call_api(f'statuses/show/{twid}.json', twid, {
953 'cards_platform': 'Web-12',
954 'include_cards': 1,
955 'include_reply_count': 1,
956 'include_user_entities': 0,
957 'tweet_mode': 'extended',
958 })
959
960 title = description = status['full_text'].replace('\n', ' ')
961 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
962 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
963 user = status.get('user') or {}
964 uploader = user.get('name')
965 if uploader:
966 title = f'{uploader} - {title}'
967 uploader_id = user.get('screen_name')
968
969 info = {
970 'id': twid,
971 'title': title,
972 'description': description,
973 'uploader': uploader,
974 'timestamp': unified_timestamp(status.get('created_at')),
975 'uploader_id': uploader_id,
976 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
977 'like_count': int_or_none(status.get('favorite_count')),
978 'repost_count': int_or_none(status.get('retweet_count')),
979 'comment_count': int_or_none(status.get('reply_count')),
980 'age_limit': 18 if status.get('possibly_sensitive') else 0,
981 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
982 }
983
984 def extract_from_video_info(media):
985 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
986 self.write_debug(f'Extracting from video info: {media_id}')
987 video_info = media.get('video_info') or {}
988
989 formats = []
990 subtitles = {}
991 for variant in video_info.get('variants', []):
992 fmts, subs = self._extract_variant_formats(variant, twid)
993 subtitles = self._merge_subtitles(subtitles, subs)
994 formats.extend(fmts)
995
996 thumbnails = []
997 media_url = media.get('media_url_https') or media.get('media_url')
998 if media_url:
999 def add_thumbnail(name, size):
1000 thumbnails.append({
1001 'id': name,
1002 'url': update_url_query(media_url, {'name': name}),
1003 'width': int_or_none(size.get('w') or size.get('width')),
1004 'height': int_or_none(size.get('h') or size.get('height')),
1005 })
1006 for name, size in media.get('sizes', {}).items():
1007 add_thumbnail(name, size)
1008 add_thumbnail('orig', media.get('original_info') or {})
1009
1010 return {
1011 'id': media_id,
1012 'formats': formats,
1013 'subtitles': subtitles,
1014 'thumbnails': thumbnails,
1015 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
1016 'duration': float_or_none(video_info.get('duration_millis'), 1000),
1017 # The codec of http formats are unknown
1018 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1019 }
1020
1021 def extract_from_card_info(card):
1022 if not card:
1023 return
1024
1025 self.write_debug(f'Extracting from card info: {card.get("url")}')
1026 binding_values = card['binding_values']
1027
1028 def get_binding_value(k):
1029 o = binding_values.get(k) or {}
1030 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1031
1032 card_name = card['name'].split(':')[-1]
1033 if card_name == 'player':
1034 yield {
1035 '_type': 'url',
1036 'url': get_binding_value('player_url'),
1037 }
1038 elif card_name == 'periscope_broadcast':
1039 yield {
1040 '_type': 'url',
1041 'url': get_binding_value('url') or get_binding_value('player_url'),
1042 'ie_key': PeriscopeIE.ie_key(),
1043 }
1044 elif card_name == 'broadcast':
1045 yield {
1046 '_type': 'url',
1047 'url': get_binding_value('broadcast_url'),
1048 'ie_key': TwitterBroadcastIE.ie_key(),
1049 }
1050 elif card_name == 'audiospace':
1051 yield {
1052 '_type': 'url',
1053 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1054 'ie_key': TwitterSpacesIE.ie_key(),
1055 }
1056 elif card_name == 'summary':
1057 yield {
1058 '_type': 'url',
1059 'url': get_binding_value('card_url'),
1060 }
1061 elif card_name == 'unified_card':
1062 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1063 yield from map(extract_from_video_info, traverse_obj(
1064 unified_card, ('media_entities', ...), expected_type=dict))
1065 # amplify, promo_video_website, promo_video_convo, appplayer,
1066 # video_direct_message, poll2choice_video, poll3choice_video,
1067 # poll4choice_video, ...
1068 else:
1069 is_amplify = card_name == 'amplify'
1070 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1071 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1072 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1073
1074 thumbnails = []
1075 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1076 image = get_binding_value('player_image' + suffix) or {}
1077 image_url = image.get('url')
1078 if not image_url or '/player-placeholder' in image_url:
1079 continue
1080 thumbnails.append({
1081 'id': suffix[1:] if suffix else 'medium',
1082 'url': image_url,
1083 'width': int_or_none(image.get('width')),
1084 'height': int_or_none(image.get('height')),
1085 })
1086
1087 yield {
1088 'formats': formats,
1089 'subtitles': subtitles,
1090 'thumbnails': thumbnails,
1091 'duration': int_or_none(get_binding_value(
1092 'content_duration_seconds')),
1093 }
1094
1095 videos = traverse_obj(status, (
1096 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1097
1098 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1099 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1100 else:
1101 desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
1102 if not desired_obj:
1103 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1104 elif desired_obj.get('type') != 'video':
1105 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1106
1107 # Restore original archive id and video index in title
1108 for index, entry in enumerate(videos, 1):
1109 if entry.get('id') != desired_obj.get('id'):
1110 continue
1111 if index == 1:
1112 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1113 if len(videos) != 1:
1114 info['title'] += f' #{index}'
1115 break
1116
1117 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1118
1119 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1120 if not entries:
1121 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1122 if not expanded_url or expanded_url == url:
1123 raise ExtractorError('No video could be found in this tweet', expected=True)
1124
1125 return self.url_result(expanded_url, display_id=twid, **info)
1126
1127 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1128
1129 if len(entries) == 1:
1130 return entries[0]
1131
1132 for index, entry in enumerate(entries, 1):
1133 entry['title'] += f' #{index}'
1134
1135 return self.playlist_result(entries, **info)
1136
1137
1138 class TwitterAmplifyIE(TwitterBaseIE):
1139 IE_NAME = 'twitter:amplify'
1140 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1141
1142 _TEST = {
1143 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1144 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1145 'info_dict': {
1146 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1147 'ext': 'mp4',
1148 'title': 'Twitter Video',
1149 'thumbnail': 're:^https?://.*',
1150 },
1151 'params': {'format': '[protocol=https]'},
1152 }
1153
1154 def _real_extract(self, url):
1155 video_id = self._match_id(url)
1156 webpage = self._download_webpage(url, video_id)
1157
1158 vmap_url = self._html_search_meta(
1159 'twitter:amplify:vmap', webpage, 'vmap url')
1160 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1161
1162 thumbnails = []
1163 thumbnail = self._html_search_meta(
1164 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1165
1166 def _find_dimension(target):
1167 w = int_or_none(self._html_search_meta(
1168 'twitter:%s:width' % target, webpage, fatal=False))
1169 h = int_or_none(self._html_search_meta(
1170 'twitter:%s:height' % target, webpage, fatal=False))
1171 return w, h
1172
1173 if thumbnail:
1174 thumbnail_w, thumbnail_h = _find_dimension('image')
1175 thumbnails.append({
1176 'url': thumbnail,
1177 'width': thumbnail_w,
1178 'height': thumbnail_h,
1179 })
1180
1181 video_w, video_h = _find_dimension('player')
1182 formats[0].update({
1183 'width': video_w,
1184 'height': video_h,
1185 })
1186
1187 return {
1188 'id': video_id,
1189 'title': 'Twitter Video',
1190 'formats': formats,
1191 'thumbnails': thumbnails,
1192 }
1193
1194
1195 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1196 IE_NAME = 'twitter:broadcast'
1197 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1198
1199 _TEST = {
1200 # untitled Periscope video
1201 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1202 'info_dict': {
1203 'id': '1yNGaQLWpejGj',
1204 'ext': 'mp4',
1205 'title': 'Andrea May Sahouri - Periscope Broadcast',
1206 'uploader': 'Andrea May Sahouri',
1207 'uploader_id': '1PXEdBZWpGwKe',
1208 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1209 'view_count': int,
1210 },
1211 }
1212
1213 def _real_extract(self, url):
1214 broadcast_id = self._match_id(url)
1215 broadcast = self._call_api(
1216 'broadcasts/show.json', broadcast_id,
1217 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1218 info = self._parse_broadcast_data(broadcast, broadcast_id)
1219 media_key = broadcast['media_key']
1220 source = self._call_api(
1221 f'live_video_stream/status/{media_key}', media_key)['source']
1222 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1223 if '/live_video_stream/geoblocked/' in m3u8_url:
1224 self.raise_geo_restricted()
1225 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1226 m3u8_url).query).get('type', [None])[0]
1227 state, width, height = self._extract_common_format_info(broadcast)
1228 info['formats'] = self._extract_pscp_m3u8_formats(
1229 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1230 return info
1231
1232
1233 class TwitterSpacesIE(TwitterBaseIE):
1234 IE_NAME = 'twitter:spaces'
1235 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1236
1237 _TESTS = [{
1238 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1239 'info_dict': {
1240 'id': '1RDxlgyvNXzJL',
1241 'ext': 'm4a',
1242 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1243 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1244 'uploader': r're:Lucio Di Gaetano.*?',
1245 'uploader_id': 'luciodigaetano',
1246 'live_status': 'was_live',
1247 'timestamp': 1659877956397,
1248 },
1249 'params': {'skip_download': 'm3u8'},
1250 }]
1251
1252 SPACE_STATUS = {
1253 'notstarted': 'is_upcoming',
1254 'ended': 'was_live',
1255 'running': 'is_live',
1256 'timedout': 'post_live',
1257 }
1258
1259 def _build_graphql_query(self, space_id):
1260 return {
1261 'variables': {
1262 'id': space_id,
1263 'isMetatagsQuery': True,
1264 'withDownvotePerspective': False,
1265 'withReactionsMetadata': False,
1266 'withReactionsPerspective': False,
1267 'withReplays': True,
1268 'withSuperFollowsUserFields': True,
1269 'withSuperFollowsTweetFields': True,
1270 },
1271 'features': {
1272 'dont_mention_me_view_api_enabled': True,
1273 'interactive_text_enabled': True,
1274 'responsive_web_edit_tweet_api_enabled': True,
1275 'responsive_web_enhance_cards_enabled': True,
1276 'responsive_web_uc_gql_enabled': True,
1277 'spaces_2022_h2_clipping': True,
1278 'spaces_2022_h2_spaces_communities': False,
1279 'standardized_nudges_misinfo': True,
1280 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1281 'vibe_api_enabled': True,
1282 },
1283 }
1284
1285 def _real_extract(self, url):
1286 space_id = self._match_id(url)
1287 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1288 if not space_data:
1289 raise ExtractorError('Twitter Space not found', expected=True)
1290
1291 metadata = space_data['metadata']
1292 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1293
1294 formats = []
1295 if live_status == 'is_upcoming':
1296 self.raise_no_formats('Twitter Space not started yet', expected=True)
1297 elif live_status == 'post_live':
1298 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1299 else:
1300 source = self._call_api(
1301 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1302
1303 # XXX: Native downloader does not work
1304 formats = self._extract_m3u8_formats(
1305 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1306 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1307 headers={'Referer': 'https://twitter.com/'})
1308 for fmt in formats:
1309 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1310
1311 participants = ', '.join(traverse_obj(
1312 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1313 return {
1314 'id': space_id,
1315 'title': metadata.get('title'),
1316 'description': f'Twitter Space participated by {participants}',
1317 'uploader': traverse_obj(
1318 metadata, ('creator_results', 'result', 'legacy', 'name')),
1319 'uploader_id': traverse_obj(
1320 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1321 'live_status': live_status,
1322 'timestamp': metadata.get('created_at'),
1323 'formats': formats,
1324 }
1325
1326
1327 class TwitterShortenerIE(TwitterBaseIE):
1328 IE_NAME = 'twitter:shortener'
1329 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1330 _BASE_URL = 'https://t.co/'
1331
1332 def _real_extract(self, url):
1333 mobj = self._match_valid_url(url)
1334 eid, id = mobj.group('eid', 'id')
1335 if eid:
1336 id = eid
1337 url = self._BASE_URL + id
1338 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1339 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1340 if new_url.startswith(__UNSAFE_LINK):
1341 new_url = new_url.replace(__UNSAFE_LINK, "")
1342 return self.url_result(new_url)