]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[extractor/rokfin] Re-construct manifest url (#6507)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
7a26ce26 1import json
23e7cba8 2import re
7a26ce26 3import urllib.error
23e7cba8
S
4
5from .common import InfoExtractor
13b2ae29 6from .periscope import PeriscopeBaseIE, PeriscopeIE
7a26ce26 7from ..compat import functools # isort: split
18ca61c5 8from ..compat import (
18ca61c5
RA
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12)
23e7cba8 13from ..utils import (
2edfd745 14 ExtractorError,
13b2ae29 15 dict_get,
23e7cba8 16 float_or_none,
13b2ae29 17 format_field,
cf5881fc 18 int_or_none,
13b2ae29
SS
19 make_archive_id,
20 str_or_none,
21 strip_or_none,
f1150b9e 22 traverse_obj,
7a26ce26 23 try_call,
2edfd745 24 try_get,
18ca61c5
RA
25 unified_timestamp,
26 update_url_query,
41d1cca3 27 url_or_none,
2edfd745 28 xpath_text,
23e7cba8
S
29)
30
31
445d72b8 32class TwitterBaseIE(InfoExtractor):
18ca61c5 33 _API_BASE = 'https://api.twitter.com/1.1/'
7a26ce26
SS
34 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
35 _TOKENS = {
36 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
37 'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
38 }
82fb2357 39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
18ca61c5
RA
40
41 def _extract_variant_formats(self, variant, video_id):
42 variant_url = variant.get('url')
43 if not variant_url:
4bed4363 44 return [], {}
18ca61c5 45 elif '.m3u8' in variant_url:
4bed4363 46 return self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
47 variant_url, video_id, 'mp4', 'm3u8_native',
48 m3u8_id='hls', fatal=False)
49 else:
50 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
51 f = {
52 'url': variant_url,
53 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
54 'tbr': tbr,
55 }
56 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 57 return [f], {}
18ca61c5 58
9be31e77 59 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 60 vmap_url = url_or_none(vmap_url)
61 if not vmap_url:
f1150b9e 62 return [], {}
445d72b8 63 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 64 formats = []
4bed4363 65 subtitles = {}
18ca61c5
RA
66 urls = []
67 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
68 video_variant.attrib['url'] = compat_urllib_parse_unquote(
69 video_variant.attrib['url'])
70 urls.append(video_variant.attrib['url'])
4bed4363
F
71 fmts, subs = self._extract_variant_formats(
72 video_variant.attrib, video_id)
73 formats.extend(fmts)
74 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
75 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
76 if video_url not in urls:
4bed4363
F
77 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
78 formats.extend(fmts)
79 subtitles = self._merge_subtitles(subtitles, subs)
80 return formats, subtitles
445d72b8 81
2edfd745
YCH
82 @staticmethod
83 def _search_dimensions_in_video_url(a_format, video_url):
84 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
85 if m:
86 a_format.update({
87 'width': int(m.group('width')),
88 'height': int(m.group('height')),
89 })
90
7a26ce26
SS
91 @functools.cached_property
92 def is_logged_in(self):
93 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
94
95 def _call_api(self, path, video_id, query={}, graphql=False):
96 cookies = self._get_cookies(self._API_BASE)
97 headers = {}
98
99 csrf_cookie = cookies.get('ct0')
100 if csrf_cookie:
101 headers['x-csrf-token'] = csrf_cookie.value
102
103 if self.is_logged_in:
104 headers.update({
105 'x-twitter-auth-type': 'OAuth2Session',
106 'x-twitter-client-language': 'en',
107 'x-twitter-active-user': 'yes',
108 })
109
352e7d98 110 last_error = None
7a26ce26 111 for bearer_token in self._TOKENS:
352e7d98 112 for first_attempt in (True, False):
113 headers['Authorization'] = f'Bearer {bearer_token}'
7a26ce26 114
352e7d98 115 if not self.is_logged_in:
7a26ce26 116 if not self._TOKENS[bearer_token]:
352e7d98 117 headers.pop('x-guest-token', None)
118 guest_token_response = self._download_json(
119 self._API_BASE + 'guest/activate.json', video_id,
120 'Downloading guest token', data=b'', headers=headers)
121
122 self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
123 if not self._TOKENS[bearer_token]:
124 raise ExtractorError('Could not retrieve guest token')
125
126 headers['x-guest-token'] = self._TOKENS[bearer_token]
127
128 try:
129 allowed_status = {400, 403, 404} if graphql else {403}
130 result = self._download_json(
131 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
132 video_id, headers=headers, query=query, expected_status=allowed_status)
133
134 except ExtractorError as e:
135 if last_error:
136 raise last_error
137
138 if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
139 raise
140
141 last_error = e
142 self.report_warning(
143 'Twitter API gave 404 response, retrying with deprecated auth token. '
144 'Only one media item can be extracted')
145 break # continue outer loop with next bearer_token
146
147 if result.get('errors'):
148 errors = traverse_obj(result, ('errors', ..., 'message'), expected_type=str)
149 if first_attempt and any('bad guest token' in error.lower() for error in errors):
150 self.to_screen('Guest token has expired. Refreshing guest token')
151 self._TOKENS[bearer_token] = None
152 continue
153
154 error_message = ', '.join(set(errors)) or 'Unknown error'
155 raise ExtractorError(f'Error(s) while querying API: {error_message}', expected=True)
156
157 return result
7a26ce26
SS
158
159 def _build_graphql_query(self, media_id):
160 raise NotImplementedError('Method must be implemented to support GraphQL')
161
162 def _call_graphql_api(self, endpoint, media_id):
163 data = self._build_graphql_query(media_id)
164 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
165 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
166
167
168class TwitterCardIE(InfoExtractor):
014e8803 169 IE_NAME = 'twitter:card'
18ca61c5 170 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 171 _TESTS = [
172 {
173 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 174 # MD5 checksums are different in different places
c3dea3f8 175 'info_dict': {
7a26ce26 176 'id': '560070131976392705',
c3dea3f8 177 'ext': 'mp4',
18ca61c5
RA
178 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
179 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
180 'uploader': 'Twitter',
181 'uploader_id': 'Twitter',
182 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 183 'duration': 30.033,
18ca61c5
RA
184 'timestamp': 1422366112,
185 'upload_date': '20150127',
7a26ce26
SS
186 'age_limit': 0,
187 'comment_count': int,
188 'tags': [],
189 'repost_count': int,
190 'like_count': int,
191 'display_id': '560070183650213889',
192 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 193 },
23e7cba8 194 },
c3dea3f8 195 {
196 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 197 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 198 'info_dict': {
199 'id': '623160978427936768',
200 'ext': 'mp4',
18ca61c5
RA
201 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
202 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
203 'uploader': 'NASA',
204 'uploader_id': 'NASA',
205 'timestamp': 1437408129,
206 'upload_date': '20150720',
7a26ce26
SS
207 'uploader_url': 'https://twitter.com/NASA',
208 'age_limit': 0,
209 'comment_count': int,
210 'like_count': int,
211 'repost_count': int,
212 'tags': ['PlutoFlyby'],
c3dea3f8 213 },
7a26ce26 214 'params': {'format': '[protocol=https]'}
4a7b7903
YCH
215 },
216 {
217 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 218 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
219 'info_dict': {
220 'id': 'dq4Oj5quskI',
221 'ext': 'mp4',
222 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 223 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 224 'upload_date': '20111013',
18ca61c5 225 'uploader': 'OMG! UBUNTU!',
4a7b7903 226 'uploader_id': 'omgubuntu',
7a26ce26
SS
227 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
228 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
229 'channel_follower_count': int,
230 'chapters': 'count:8',
231 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
232 'duration': 138,
233 'categories': ['Film & Animation'],
234 'age_limit': 0,
235 'comment_count': int,
236 'availability': 'public',
237 'like_count': int,
238 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
239 'view_count': int,
240 'tags': 'count:12',
241 'channel': 'OMG! UBUNTU!',
242 'playable_in_embed': True,
4a7b7903 243 },
31752f76 244 'add_ie': ['Youtube'],
5f1b2aea
YCH
245 },
246 {
247 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
248 'info_dict': {
249 'id': 'iBb2x00UVlv',
250 'ext': 'mp4',
251 'upload_date': '20151113',
252 'uploader_id': '1189339351084113920',
acb6e97e
YCH
253 'uploader': 'ArsenalTerje',
254 'title': 'Vine by ArsenalTerje',
e8f20ffa 255 'timestamp': 1447451307,
7a26ce26
SS
256 'alt_title': 'Vine by ArsenalTerje',
257 'comment_count': int,
258 'like_count': int,
259 'thumbnail': r're:^https?://[^?#]+\.jpg',
260 'view_count': int,
261 'repost_count': int,
5f1b2aea
YCH
262 },
263 'add_ie': ['Vine'],
7a26ce26
SS
264 'params': {'skip_download': 'm3u8'},
265 },
266 {
0ae937a7 267 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 268 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
269 'info_dict': {
270 'id': '705235433198714880',
271 'ext': 'mp4',
18ca61c5
RA
272 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
273 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
274 'uploader': 'Brent Yarina',
275 'uploader_id': 'BTNBrentYarina',
276 'timestamp': 1456976204,
277 'upload_date': '20160303',
0ae937a7 278 },
18ca61c5 279 'skip': 'This content is no longer available.',
7a26ce26
SS
280 },
281 {
748a462f
S
282 'url': 'https://twitter.com/i/videos/752274308186120192',
283 'only_matching': True,
0ae937a7 284 },
c3dea3f8 285 ]
23e7cba8
S
286
287 def _real_extract(self, url):
18ca61c5
RA
288 status_id = self._match_id(url)
289 return self.url_result(
290 'https://twitter.com/statuses/' + status_id,
291 TwitterIE.ie_key(), status_id)
c8398a9b 292
03879ff0 293
18ca61c5 294class TwitterIE(TwitterBaseIE):
014e8803 295 IE_NAME = 'twitter'
b6795fd3 296 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 297
cf5881fc 298 _TESTS = [{
48aae2d2 299 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 300 'info_dict': {
13b2ae29
SS
301 'id': '643211870443208704',
302 'display_id': '643211948184596480',
f57f84f6 303 'ext': 'mp4',
575036b4 304 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 305 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 306 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
48aae2d2
YCH
307 'uploader': 'FREE THE NIPPLE',
308 'uploader_id': 'freethenipple',
3b65a6fb 309 'duration': 12.922,
18ca61c5
RA
310 'timestamp': 1442188653,
311 'upload_date': '20150913',
13b2ae29
SS
312 'uploader_url': 'https://twitter.com/freethenipple',
313 'comment_count': int,
314 'repost_count': int,
315 'like_count': int,
316 'tags': [],
317 'age_limit': 18,
f57f84f6 318 },
cf5881fc
YCH
319 }, {
320 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
321 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
322 'info_dict': {
323 'id': '657991469417025536',
324 'ext': 'mp4',
325 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
326 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 327 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
328 'uploader': 'Gifs',
329 'uploader_id': 'giphz',
330 },
7efc1c2b 331 'expected_warnings': ['height', 'width'],
fc0a45fa 332 'skip': 'Account suspended',
b703ebee
JMF
333 }, {
334 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
335 'info_dict': {
336 'id': '665052190608723968',
13b2ae29 337 'display_id': '665052190608723968',
b703ebee 338 'ext': 'mp4',
b6795fd3 339 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 340 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
b703ebee 341 'uploader_id': 'starwars',
7a26ce26 342 'uploader': r're:Star Wars.*',
18ca61c5
RA
343 'timestamp': 1447395772,
344 'upload_date': '20151113',
13b2ae29
SS
345 'uploader_url': 'https://twitter.com/starwars',
346 'comment_count': int,
347 'repost_count': int,
348 'like_count': int,
349 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
350 'age_limit': 0,
b703ebee 351 },
0ae937a7
YCH
352 }, {
353 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
354 'info_dict': {
355 'id': '705235433198714880',
356 'ext': 'mp4',
18ca61c5
RA
357 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
358 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
359 'uploader_id': 'BTNBrentYarina',
360 'uploader': 'Brent Yarina',
18ca61c5
RA
361 'timestamp': 1456976204,
362 'upload_date': '20160303',
13b2ae29
SS
363 'uploader_url': 'https://twitter.com/BTNBrentYarina',
364 'comment_count': int,
365 'repost_count': int,
366 'like_count': int,
367 'tags': [],
368 'age_limit': 0,
0ae937a7
YCH
369 },
370 'params': {
371 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
372 # Test case of TwitterCardIE
373 'skip_download': True,
374 },
352e7d98 375 'skip': 'Dead external link',
03879ff0
YCH
376 }, {
377 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 378 'info_dict': {
13b2ae29
SS
379 'id': '700207414000242688',
380 'display_id': '700207533655363584',
03879ff0 381 'ext': 'mp4',
13b2ae29 382 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 383 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 384 'thumbnail': r're:^https?://.*\.jpg',
13b2ae29
SS
385 'uploader': 'jaydin donte geer',
386 'uploader_id': 'jaydingeer',
3b65a6fb 387 'duration': 30.0,
18ca61c5
RA
388 'timestamp': 1455777459,
389 'upload_date': '20160218',
13b2ae29
SS
390 'uploader_url': 'https://twitter.com/jaydingeer',
391 'comment_count': int,
392 'repost_count': int,
393 'like_count': int,
394 'tags': ['Damndaniel'],
395 'age_limit': 0,
03879ff0 396 },
395fd4b0
YCH
397 }, {
398 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
399 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
400 'info_dict': {
401 'id': 'MIOxnrUteUd',
402 'ext': 'mp4',
18ca61c5
RA
403 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
404 'uploader': 'TAKUMA',
405 'uploader_id': '1004126642786242560',
3615bfe1 406 'timestamp': 1402826626,
395fd4b0 407 'upload_date': '20140615',
13b2ae29
SS
408 'thumbnail': r're:^https?://.*\.jpg',
409 'alt_title': 'Vine by TAKUMA',
410 'comment_count': int,
411 'repost_count': int,
412 'like_count': int,
413 'view_count': int,
395fd4b0
YCH
414 },
415 'add_ie': ['Vine'],
36b7d9db
YCH
416 }, {
417 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 418 'info_dict': {
13b2ae29
SS
419 'id': '717462543795523584',
420 'display_id': '719944021058060289',
36b7d9db
YCH
421 'ext': 'mp4',
422 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5
RA
423 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
424 'uploader_id': 'CaptainAmerica',
36b7d9db 425 'uploader': 'Captain America',
3b65a6fb 426 'duration': 3.17,
18ca61c5
RA
427 'timestamp': 1460483005,
428 'upload_date': '20160412',
13b2ae29
SS
429 'uploader_url': 'https://twitter.com/CaptainAmerica',
430 'thumbnail': r're:^https?://.*\.jpg',
431 'comment_count': int,
432 'repost_count': int,
433 'like_count': int,
434 'tags': [],
435 'age_limit': 0,
36b7d9db 436 },
f0bc5a86
YCH
437 }, {
438 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
439 'info_dict': {
440 'id': '1zqKVVlkqLaKB',
441 'ext': 'mp4',
18ca61c5 442 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 443 'upload_date': '20160923',
18ca61c5
RA
444 'uploader_id': '1PmKqpJdOJQoY',
445 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 446 'timestamp': 1474613214,
13b2ae29 447 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
448 },
449 'add_ie': ['Periscope'],
2edfd745
YCH
450 }, {
451 # has mp4 formats via mobile API
452 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
453 'info_dict': {
454 'id': '852138619213144067',
455 'ext': 'mp4',
456 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 457 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
2edfd745
YCH
458 'uploader': 'عالم الأخبار',
459 'uploader_id': 'news_al3alm',
3b65a6fb 460 'duration': 277.4,
18ca61c5
RA
461 'timestamp': 1492000653,
462 'upload_date': '20170412',
2edfd745 463 },
00dd0cd5 464 'skip': 'Account suspended',
5c1452e8
GF
465 }, {
466 'url': 'https://twitter.com/i/web/status/910031516746514432',
467 'info_dict': {
13b2ae29
SS
468 'id': '910030238373089285',
469 'display_id': '910031516746514432',
5c1452e8
GF
470 'ext': 'mp4',
471 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
472 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 473 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
5c1452e8
GF
474 'uploader': 'Préfet de Guadeloupe',
475 'uploader_id': 'Prefet971',
476 'duration': 47.48,
18ca61c5
RA
477 'timestamp': 1505803395,
478 'upload_date': '20170919',
13b2ae29
SS
479 'uploader_url': 'https://twitter.com/Prefet971',
480 'comment_count': int,
481 'repost_count': int,
482 'like_count': int,
483 'tags': ['Maria'],
484 'age_limit': 0,
5c1452e8
GF
485 },
486 'params': {
487 'skip_download': True, # requires ffmpeg
488 },
2593725a
S
489 }, {
490 # card via api.twitter.com/1.1/videos/tweet/config
491 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
492 'info_dict': {
13b2ae29
SS
493 'id': '1001551417340022785',
494 'display_id': '1001551623938805763',
2593725a
S
495 'ext': 'mp4',
496 'title': 're:.*?Shep is on a roll today.*?',
497 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 498 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
2593725a
S
499 'uploader': 'Lis Power',
500 'uploader_id': 'LisPower1',
501 'duration': 111.278,
18ca61c5
RA
502 'timestamp': 1527623489,
503 'upload_date': '20180529',
13b2ae29
SS
504 'uploader_url': 'https://twitter.com/LisPower1',
505 'comment_count': int,
506 'repost_count': int,
507 'like_count': int,
508 'tags': [],
509 'age_limit': 0,
2593725a
S
510 },
511 'params': {
512 'skip_download': True, # requires ffmpeg
513 },
b7ef93f0
S
514 }, {
515 'url': 'https://twitter.com/foobar/status/1087791357756956680',
516 'info_dict': {
13b2ae29
SS
517 'id': '1087791272830607360',
518 'display_id': '1087791357756956680',
b7ef93f0
S
519 'ext': 'mp4',
520 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
521 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 522 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
b7ef93f0
S
523 'uploader': 'Twitter',
524 'uploader_id': 'Twitter',
525 'duration': 61.567,
18ca61c5
RA
526 'timestamp': 1548184644,
527 'upload_date': '20190122',
13b2ae29
SS
528 'uploader_url': 'https://twitter.com/Twitter',
529 'comment_count': int,
530 'repost_count': int,
531 'like_count': int,
532 'tags': [],
533 'age_limit': 0,
18ca61c5
RA
534 },
535 }, {
536 # not available in Periscope
537 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
538 'info_dict': {
539 'id': '1vOGwqejwoWxB',
540 'ext': 'mp4',
541 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
542 'uploader': 'Vivi',
543 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
544 'thumbnail': r're:^https?://.*\.jpg',
545 'tags': ['EduTECH2019'],
546 'view_count': int,
b7ef93f0 547 },
18ca61c5 548 'add_ie': ['TwitterBroadcast'],
30a074c2 549 }, {
550 # unified card
551 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
552 'info_dict': {
13b2ae29
SS
553 'id': '1349774757969989634',
554 'display_id': '1349794411333394432',
30a074c2 555 'ext': 'mp4',
556 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
557 'thumbnail': r're:^https?://.*\.jpg',
558 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
559 'uploader': 'Brooklyn Nets',
560 'uploader_id': 'BrooklynNets',
561 'duration': 324.484,
562 'timestamp': 1610651040,
563 'upload_date': '20210114',
13b2ae29
SS
564 'uploader_url': 'https://twitter.com/BrooklynNets',
565 'comment_count': int,
566 'repost_count': int,
567 'like_count': int,
568 'tags': [],
569 'age_limit': 0,
30a074c2 570 },
571 'params': {
572 'skip_download': True,
573 },
13b2ae29
SS
574 }, {
575 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
576 'info_dict': {
577 'id': '1577855447914409984',
578 'display_id': '1577855540407197696',
579 'ext': 'mp4',
352e7d98 580 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
581 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 582 'upload_date': '20221006',
352e7d98 583 'uploader': 'oshtru',
13b2ae29
SS
584 'uploader_id': 'oshtru',
585 'uploader_url': 'https://twitter.com/oshtru',
586 'thumbnail': r're:^https?://.*\.jpg',
587 'duration': 30.03,
7a26ce26 588 'timestamp': 1665025050,
13b2ae29
SS
589 'comment_count': int,
590 'repost_count': int,
591 'like_count': int,
592 'tags': [],
593 'age_limit': 0,
594 },
595 'params': {'skip_download': True},
596 }, {
597 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
598 'info_dict': {
599 'id': '1577719286659006464',
600 'title': 'Ultima | #\u0432\u029f\u043c - Test',
601 'description': 'Test https://t.co/Y3KEZD7Dad',
602 'uploader': 'Ultima | #\u0432\u029f\u043c',
603 'uploader_id': 'UltimaShadowX',
604 'uploader_url': 'https://twitter.com/UltimaShadowX',
605 'upload_date': '20221005',
7a26ce26 606 'timestamp': 1664992565,
13b2ae29
SS
607 'comment_count': int,
608 'repost_count': int,
609 'like_count': int,
610 'tags': [],
611 'age_limit': 0,
612 },
613 'playlist_count': 4,
614 'params': {'skip_download': True},
7a26ce26
SS
615 }, {
616 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
617 'info_dict': {
618 'id': '1575559336759263233',
619 'display_id': '1575560063510810624',
620 'ext': 'mp4',
621 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
622 'thumbnail': r're:^https?://.*\.jpg',
623 'description': 'md5:95aea692fda36a12081b9629b02daa92',
624 'uploader': 'Max Olson',
625 'uploader_id': 'MesoMax919',
626 'uploader_url': 'https://twitter.com/MesoMax919',
627 'duration': 21.321,
628 'timestamp': 1664477766,
629 'upload_date': '20220929',
630 'comment_count': int,
631 'repost_count': int,
632 'like_count': int,
633 'tags': ['HurricaneIan'],
634 'age_limit': 0,
635 },
636 }, {
637 # Adult content, uses old token
638 # Fails if not logged in (GraphQL)
639 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
640 'info_dict': {
641 'id': '1575199163847000068',
642 'display_id': '1575199173472927762',
643 'ext': 'mp4',
644 'title': str,
645 'description': str,
646 'uploader': str,
647 'uploader_id': 'Rizdraws',
648 'uploader_url': 'https://twitter.com/Rizdraws',
649 'upload_date': '20220928',
650 'timestamp': 1664391723,
16bed382 651 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
652 'like_count': int,
653 'repost_count': int,
654 'comment_count': int,
655 'age_limit': 18,
656 'tags': []
657 },
658 'expected_warnings': ['404'],
659 }, {
660 # Description is missing one https://t.co url (GraphQL)
661 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
662 'playlist_mincount': 2,
663 'info_dict': {
664 'id': '1395079556562706435',
665 'title': str,
666 'tags': [],
667 'uploader': str,
668 'like_count': int,
669 'upload_date': '20210519',
670 'age_limit': 0,
671 'repost_count': int,
672 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
673 'uploader_id': 'Srirachachau',
674 'comment_count': int,
675 'uploader_url': 'https://twitter.com/Srirachachau',
676 'timestamp': 1621447860,
677 },
678 }, {
679 # Description is missing one https://t.co url (GraphQL)
680 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
681 'playlist_mincount': 2,
682 'info_dict': {
683 'id': '1578353380363501568',
684 'title': str,
685 'uploader_id': 'DavidToons_',
686 'repost_count': int,
687 'like_count': int,
688 'uploader': str,
689 'timestamp': 1665143744,
690 'uploader_url': 'https://twitter.com/DavidToons_',
691 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
692 'tags': [],
693 'comment_count': int,
694 'upload_date': '20221007',
695 'age_limit': 0,
696 },
697 }, {
698 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
699 'playlist_count': 2,
700 'info_dict': {
701 'id': '1578401165338976258',
702 'title': str,
703 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
704 'uploader': str,
705 'uploader_id': 'primevideouk',
706 'timestamp': 1665155137,
707 'upload_date': '20221007',
708 'age_limit': 0,
709 'uploader_url': 'https://twitter.com/primevideouk',
710 'comment_count': int,
711 'repost_count': int,
712 'like_count': int,
713 'tags': ['TheRingsOfPower'],
714 },
715 }, {
716 # Twitter Spaces
717 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
718 'info_dict': {
719 'id': '1lPJqmBeeNAJb',
720 'ext': 'm4a',
721 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
722 'uploader': r're:Monique Camarra.+?',
723 'uploader_id': 'MoniqueCamarra',
724 'live_status': 'was_live',
725 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
726 'timestamp': 1658407771464,
727 },
728 'add_ie': ['TwitterSpaces'],
729 'params': {'skip_download': 'm3u8'},
16bed382 730 }, {
731 # URL specifies video number but --yes-playlist
732 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
733 'playlist_mincount': 2,
734 'info_dict': {
735 'id': '1600649710662213632',
736 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
737 'timestamp': 1670459604.0,
738 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
739 'comment_count': int,
740 'uploader_id': 'CTVJLaidlaw',
741 'repost_count': int,
742 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
743 'upload_date': '20221208',
744 'age_limit': 0,
745 'uploader': 'Jocelyn Laidlaw',
746 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
747 'like_count': int,
748 },
749 }, {
750 # URL specifies video number and --no-playlist
751 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
752 'info_dict': {
753 'id': '1600649511827013632',
754 'ext': 'mp4',
b6795fd3 755 'title': 'md5:dac4f4d4c591fcc4e88a253eba472dc3',
16bed382 756 'thumbnail': r're:^https?://.+\.jpg',
757 'timestamp': 1670459604.0,
758 'uploader_id': 'CTVJLaidlaw',
759 'uploader': 'Jocelyn Laidlaw',
760 'repost_count': int,
761 'comment_count': int,
762 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
763 'duration': 102.226,
764 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
765 'display_id': '1600649710662213632',
766 'like_count': int,
767 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
768 'upload_date': '20221208',
769 'age_limit': 0,
770 },
771 'params': {'noplaylist': True},
7543c9c9 772 }, {
773 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
774 # note the id different between extraction and url
775 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
776 'info_dict': {
777 'id': '1621117577354424321',
778 'display_id': '1621117700482416640',
779 'ext': 'mp4',
780 'title': '뽀 - 아 최우제 이동속도 봐',
781 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
782 'duration': 24.598,
783 'uploader': '뽀',
784 'uploader_id': 's2FAKER',
785 'uploader_url': 'https://twitter.com/s2FAKER',
786 'upload_date': '20230202',
787 'timestamp': 1675339553.0,
788 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
789 'age_limit': 18,
790 'tags': [],
791 'like_count': int,
792 'repost_count': int,
793 'comment_count': int,
794 },
b6795fd3
SS
795 }, {
796 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
797 'info_dict': {
798 'id': '1599108643743473680',
799 'display_id': '1599108751385972737',
800 'ext': 'mp4',
801 'title': '\u06ea - \U0001F48B',
802 'uploader_url': 'https://twitter.com/hlo_again',
803 'like_count': int,
804 'uploader_id': 'hlo_again',
805 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
806 'repost_count': int,
807 'duration': 9.531,
808 'comment_count': int,
809 'upload_date': '20221203',
810 'age_limit': 0,
811 'timestamp': 1670092210.0,
812 'tags': [],
813 'uploader': '\u06ea',
814 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
815 },
816 'params': {'noplaylist': True},
817 }, {
818 # Media view count is GraphQL only, force in test
819 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
820 'info_dict': {
821 'id': '1600009362759733248',
822 'display_id': '1600009574919962625',
823 'ext': 'mp4',
824 'uploader_url': 'https://twitter.com/MunTheShinobi',
825 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
826 'view_count': int,
827 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
828 'age_limit': 0,
829 'uploader': 'Mün The Shinobi | BlaqBoi\'s Therapist',
830 'repost_count': int,
831 'upload_date': '20221206',
832 'title': 'Mün The Shinobi | BlaqBoi\'s Therapist - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
833 'comment_count': int,
834 'like_count': int,
835 'tags': [],
836 'uploader_id': 'MunTheShinobi',
837 'duration': 139.987,
838 'timestamp': 1670306984.0,
839 },
840 'params': {'extractor_args': {'twitter': {'force_graphql': ['']}}},
cf605226 841 }, {
842 # url to retweet id
843 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
844 'info_dict': {
845 'id': '1623274794488659969',
846 'display_id': '1623739803874349067',
847 'ext': 'mp4',
848 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
849 'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
850 'uploader': 'Johnny Bullets',
851 'uploader_id': 'Johnnybull3ts',
852 'uploader_url': 'https://twitter.com/Johnnybull3ts',
853 'age_limit': 0,
854 'tags': [],
855 'duration': 8.033,
856 'timestamp': 1675853859.0,
857 'upload_date': '20230208',
858 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
859 'like_count': int,
860 'repost_count': int,
861 'comment_count': int,
862 },
82fb2357 863 }, {
864 # onion route
865 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
866 'only_matching': True,
18ca61c5
RA
867 }, {
868 # Twitch Clip Embed
869 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
870 'only_matching': True,
10a5091e
RA
871 }, {
872 # promo_video_website card
873 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
874 'only_matching': True,
00dd0cd5 875 }, {
876 # promo_video_convo card
877 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
878 'only_matching': True,
879 }, {
880 # appplayer card
881 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
882 'only_matching': True,
30a074c2 883 }, {
884 # video_direct_message card
885 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
886 'only_matching': True,
887 }, {
888 # poll2choice_video card
889 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
890 'only_matching': True,
891 }, {
892 # poll3choice_video card
893 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
894 'only_matching': True,
895 }, {
896 # poll4choice_video card
897 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
898 'only_matching': True,
cf5881fc 899 }]
f57f84f6 900
7a26ce26
SS
901 def _graphql_to_legacy(self, data, twid):
902 result = traverse_obj(data, (
903 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
904 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
7543c9c9 905 'tweet_results', 'result', ('tweet', None),
7a26ce26
SS
906 ), expected_type=dict, default={}, get_all=False)
907
7543c9c9 908 if result.get('__typename') not in ('Tweet', None):
909 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
910
7a26ce26
SS
911 if 'tombstone' in result:
912 cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
913 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
914
915 status = result.get('legacy', {})
916 status.update(traverse_obj(result, {
917 'user': ('core', 'user_results', 'result', 'legacy'),
918 'card': ('card', 'legacy'),
919 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
920 }, expected_type=dict, default={}))
921
922 # extra transformation is needed since result does not match legacy format
923 binding_values = {
924 binding_value.get('key'): binding_value.get('value')
925 for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
926 }
927 if binding_values:
928 status['card']['binding_values'] = binding_values
929
930 return status
931
932 def _build_graphql_query(self, media_id):
933 return {
934 'variables': {
935 'focalTweetId': media_id,
936 'includePromotedContent': True,
937 'with_rux_injections': False,
938 'withBirdwatchNotes': True,
939 'withCommunity': True,
940 'withDownvotePerspective': False,
941 'withQuickPromoteEligibilityTweetFields': True,
942 'withReactionsMetadata': False,
943 'withReactionsPerspective': False,
944 'withSuperFollowsTweetFields': True,
945 'withSuperFollowsUserFields': True,
946 'withV2Timeline': True,
947 'withVoice': True,
948 },
949 'features': {
950 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
951 'interactive_text_enabled': True,
952 'responsive_web_edit_tweet_api_enabled': True,
953 'responsive_web_enhance_cards_enabled': True,
954 'responsive_web_graphql_timeline_navigation_enabled': False,
955 'responsive_web_text_conversations_enabled': False,
956 'responsive_web_uc_gql_enabled': True,
957 'standardized_nudges_misinfo': True,
958 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
959 'tweetypie_unmention_optimization_enabled': True,
960 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
961 'verified_phone_label_enabled': False,
962 'vibe_api_enabled': True,
963 },
964 }
965
f57f84f6 966 def _real_extract(self, url):
16bed382 967 twid, selected_index = self._match_valid_url(url).group('id', 'index')
7a26ce26
SS
968 if self.is_logged_in or self._configuration_arg('force_graphql'):
969 self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
970 result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
971 status = self._graphql_to_legacy(result, twid)
972
973 else:
cf605226 974 status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
18ca61c5
RA
975 'cards_platform': 'Web-12',
976 'include_cards': 1,
977 'include_reply_count': 1,
978 'include_user_entities': 0,
979 'tweet_mode': 'extended',
cf605226 980 }), 'retweeted_status', None)
575036b4 981
18ca61c5 982 title = description = status['full_text'].replace('\n', ' ')
575036b4 983 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 984 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
985 user = status.get('user') or {}
986 uploader = user.get('name')
987 if uploader:
7a26ce26 988 title = f'{uploader} - {title}'
18ca61c5
RA
989 uploader_id = user.get('screen_name')
990
cf5881fc 991 info = {
18ca61c5
RA
992 'id': twid,
993 'title': title,
994 'description': description,
995 'uploader': uploader,
996 'timestamp': unified_timestamp(status.get('created_at')),
997 'uploader_id': uploader_id,
a70635b8 998 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
999 'like_count': int_or_none(status.get('favorite_count')),
1000 'repost_count': int_or_none(status.get('retweet_count')),
1001 'comment_count': int_or_none(status.get('reply_count')),
1002 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 1003 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 1004 }
cf5881fc 1005
30a074c2 1006 def extract_from_video_info(media):
13b2ae29
SS
1007 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1008 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
1009 video_info = media.get('video_info') or {}
1010
1011 formats = []
4bed4363 1012 subtitles = {}
18ca61c5 1013 for variant in video_info.get('variants', []):
4bed4363
F
1014 fmts, subs = self._extract_variant_formats(variant, twid)
1015 subtitles = self._merge_subtitles(subtitles, subs)
1016 formats.extend(fmts)
18ca61c5
RA
1017
1018 thumbnails = []
1019 media_url = media.get('media_url_https') or media.get('media_url')
1020 if media_url:
1021 def add_thumbnail(name, size):
1022 thumbnails.append({
1023 'id': name,
1024 'url': update_url_query(media_url, {'name': name}),
1025 'width': int_or_none(size.get('w') or size.get('width')),
1026 'height': int_or_none(size.get('h') or size.get('height')),
1027 })
1028 for name, size in media.get('sizes', {}).items():
1029 add_thumbnail(name, size)
1030 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1031
13b2ae29
SS
1032 return {
1033 'id': media_id,
18ca61c5 1034 'formats': formats,
4bed4363 1035 'subtitles': subtitles,
18ca61c5 1036 'thumbnails': thumbnails,
b6795fd3 1037 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
18ca61c5 1038 'duration': float_or_none(video_info.get('duration_millis'), 1000),
9f14daf2 1039 # The codec of http formats are unknown
1040 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
13b2ae29 1041 }
30a074c2 1042
13b2ae29
SS
1043 def extract_from_card_info(card):
1044 if not card:
1045 return
1046
1047 self.write_debug(f'Extracting from card info: {card.get("url")}')
1048 binding_values = card['binding_values']
1049
1050 def get_binding_value(k):
1051 o = binding_values.get(k) or {}
1052 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1053
1054 card_name = card['name'].split(':')[-1]
1055 if card_name == 'player':
7a26ce26 1056 yield {
13b2ae29
SS
1057 '_type': 'url',
1058 'url': get_binding_value('player_url'),
1059 }
1060 elif card_name == 'periscope_broadcast':
7a26ce26 1061 yield {
13b2ae29
SS
1062 '_type': 'url',
1063 'url': get_binding_value('url') or get_binding_value('player_url'),
1064 'ie_key': PeriscopeIE.ie_key(),
1065 }
1066 elif card_name == 'broadcast':
7a26ce26 1067 yield {
13b2ae29
SS
1068 '_type': 'url',
1069 'url': get_binding_value('broadcast_url'),
1070 'ie_key': TwitterBroadcastIE.ie_key(),
1071 }
7a26ce26
SS
1072 elif card_name == 'audiospace':
1073 yield {
1074 '_type': 'url',
1075 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1076 'ie_key': TwitterSpacesIE.ie_key(),
1077 }
13b2ae29 1078 elif card_name == 'summary':
7a26ce26 1079 yield {
18ca61c5 1080 '_type': 'url',
13b2ae29
SS
1081 'url': get_binding_value('card_url'),
1082 }
1083 elif card_name == 'unified_card':
7a26ce26
SS
1084 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1085 yield from map(extract_from_video_info, traverse_obj(
1086 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1087 # amplify, promo_video_website, promo_video_convo, appplayer,
1088 # video_direct_message, poll2choice_video, poll3choice_video,
1089 # poll4choice_video, ...
1090 else:
1091 is_amplify = card_name == 'amplify'
1092 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1093 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1094 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1095
1096 thumbnails = []
1097 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1098 image = get_binding_value('player_image' + suffix) or {}
1099 image_url = image.get('url')
1100 if not image_url or '/player-placeholder' in image_url:
1101 continue
1102 thumbnails.append({
1103 'id': suffix[1:] if suffix else 'medium',
1104 'url': image_url,
1105 'width': int_or_none(image.get('width')),
1106 'height': int_or_none(image.get('height')),
1107 })
1108
7a26ce26 1109 yield {
13b2ae29
SS
1110 'formats': formats,
1111 'subtitles': subtitles,
1112 'thumbnails': thumbnails,
1113 'duration': int_or_none(get_binding_value(
1114 'content_duration_seconds')),
1115 }
1116
b6795fd3
SS
1117 videos = traverse_obj(status, (
1118 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1119
b6795fd3
SS
1120 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1121 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1122 else:
1123 desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
1124 if not desired_obj:
1125 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1126 elif desired_obj.get('type') != 'video':
1127 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1128
1129 # Restore original archive id and video index in title
1130 for index, entry in enumerate(videos, 1):
1131 if entry.get('id') != desired_obj.get('id'):
1132 continue
1133 if index == 1:
1134 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1135 if len(videos) != 1:
1136 info['title'] += f' #{index}'
1137 break
1138
1139 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1140
1141 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1142 if not entries:
1143 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1144 if not expanded_url or expanded_url == url:
1145 raise ExtractorError('No video could be found in this tweet', expected=True)
1146
1147 return self.url_result(expanded_url, display_id=twid, **info)
1148
1149 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1150
1151 if len(entries) == 1:
1152 return entries[0]
1153
1154 for index, entry in enumerate(entries, 1):
1155 entry['title'] += f' #{index}'
1156
1157 return self.playlist_result(entries, **info)
445d72b8
YCH
1158
1159
1160class TwitterAmplifyIE(TwitterBaseIE):
1161 IE_NAME = 'twitter:amplify'
25042f73 1162 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1163
1164 _TEST = {
1165 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1166 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1167 'info_dict': {
1168 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1169 'ext': 'mp4',
1170 'title': 'Twitter Video',
bdbf4ba4 1171 'thumbnail': 're:^https?://.*',
445d72b8 1172 },
7a26ce26 1173 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1174 }
1175
1176 def _real_extract(self, url):
1177 video_id = self._match_id(url)
1178 webpage = self._download_webpage(url, video_id)
1179
1180 vmap_url = self._html_search_meta(
1181 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1182 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1183
bdbf4ba4
YCH
1184 thumbnails = []
1185 thumbnail = self._html_search_meta(
1186 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1187
1188 def _find_dimension(target):
1189 w = int_or_none(self._html_search_meta(
1190 'twitter:%s:width' % target, webpage, fatal=False))
1191 h = int_or_none(self._html_search_meta(
1192 'twitter:%s:height' % target, webpage, fatal=False))
1193 return w, h
1194
1195 if thumbnail:
1196 thumbnail_w, thumbnail_h = _find_dimension('image')
1197 thumbnails.append({
1198 'url': thumbnail,
1199 'width': thumbnail_w,
1200 'height': thumbnail_h,
1201 })
1202
1203 video_w, video_h = _find_dimension('player')
9be31e77 1204 formats[0].update({
bdbf4ba4
YCH
1205 'width': video_w,
1206 'height': video_h,
9be31e77 1207 })
bdbf4ba4 1208
445d72b8
YCH
1209 return {
1210 'id': video_id,
1211 'title': 'Twitter Video',
bdbf4ba4
YCH
1212 'formats': formats,
1213 'thumbnails': thumbnails,
445d72b8 1214 }
18ca61c5
RA
1215
1216
1217class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1218 IE_NAME = 'twitter:broadcast'
1219 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1220
7b0b53ea
S
1221 _TEST = {
1222 # untitled Periscope video
1223 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1224 'info_dict': {
1225 'id': '1yNGaQLWpejGj',
1226 'ext': 'mp4',
1227 'title': 'Andrea May Sahouri - Periscope Broadcast',
1228 'uploader': 'Andrea May Sahouri',
1229 'uploader_id': '1PXEdBZWpGwKe',
7a26ce26
SS
1230 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1231 'view_count': int,
7b0b53ea
S
1232 },
1233 }
1234
18ca61c5
RA
1235 def _real_extract(self, url):
1236 broadcast_id = self._match_id(url)
1237 broadcast = self._call_api(
1238 'broadcasts/show.json', broadcast_id,
1239 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1240 info = self._parse_broadcast_data(broadcast, broadcast_id)
1241 media_key = broadcast['media_key']
1242 source = self._call_api(
7a26ce26 1243 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1244 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1245 if '/live_video_stream/geoblocked/' in m3u8_url:
1246 self.raise_geo_restricted()
1247 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1248 m3u8_url).query).get('type', [None])[0]
1249 state, width, height = self._extract_common_format_info(broadcast)
1250 info['formats'] = self._extract_pscp_m3u8_formats(
1251 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1252 return info
86b868c6
U
1253
1254
7a26ce26
SS
1255class TwitterSpacesIE(TwitterBaseIE):
1256 IE_NAME = 'twitter:spaces'
1257 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1258
1259 _TESTS = [{
1260 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1261 'info_dict': {
1262 'id': '1RDxlgyvNXzJL',
1263 'ext': 'm4a',
1264 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1265 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1266 'uploader': r're:Lucio Di Gaetano.*?',
1267 'uploader_id': 'luciodigaetano',
1268 'live_status': 'was_live',
1269 'timestamp': 1659877956397,
1270 },
1271 'params': {'skip_download': 'm3u8'},
1272 }]
1273
1274 SPACE_STATUS = {
1275 'notstarted': 'is_upcoming',
1276 'ended': 'was_live',
1277 'running': 'is_live',
1278 'timedout': 'post_live',
1279 }
1280
1281 def _build_graphql_query(self, space_id):
1282 return {
1283 'variables': {
1284 'id': space_id,
1285 'isMetatagsQuery': True,
1286 'withDownvotePerspective': False,
1287 'withReactionsMetadata': False,
1288 'withReactionsPerspective': False,
1289 'withReplays': True,
1290 'withSuperFollowsUserFields': True,
1291 'withSuperFollowsTweetFields': True,
1292 },
1293 'features': {
1294 'dont_mention_me_view_api_enabled': True,
1295 'interactive_text_enabled': True,
1296 'responsive_web_edit_tweet_api_enabled': True,
1297 'responsive_web_enhance_cards_enabled': True,
1298 'responsive_web_uc_gql_enabled': True,
1299 'spaces_2022_h2_clipping': True,
1300 'spaces_2022_h2_spaces_communities': False,
1301 'standardized_nudges_misinfo': True,
1302 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1303 'vibe_api_enabled': True,
1304 },
1305 }
1306
1307 def _real_extract(self, url):
1308 space_id = self._match_id(url)
1309 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1310 if not space_data:
1311 raise ExtractorError('Twitter Space not found', expected=True)
1312
1313 metadata = space_data['metadata']
1314 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1315
1316 formats = []
1317 if live_status == 'is_upcoming':
1318 self.raise_no_formats('Twitter Space not started yet', expected=True)
1319 elif live_status == 'post_live':
1320 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1321 else:
1322 source = self._call_api(
1323 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1324
1325 # XXX: Native downloader does not work
1326 formats = self._extract_m3u8_formats(
1327 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
9a0416c6 1328 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1329 headers={'Referer': 'https://twitter.com/'})
7a26ce26
SS
1330 for fmt in formats:
1331 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1332
1333 participants = ', '.join(traverse_obj(
1334 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1335 return {
1336 'id': space_id,
1337 'title': metadata.get('title'),
1338 'description': f'Twitter Space participated by {participants}',
1339 'uploader': traverse_obj(
1340 metadata, ('creator_results', 'result', 'legacy', 'name')),
1341 'uploader_id': traverse_obj(
1342 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1343 'live_status': live_status,
1344 'timestamp': metadata.get('created_at'),
1345 'formats': formats,
1346 }
1347
1348
86b868c6
U
1349class TwitterShortenerIE(TwitterBaseIE):
1350 IE_NAME = 'twitter:shortener'
a537ab1a
U
1351 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1352 _BASE_URL = 'https://t.co/'
86b868c6
U
1353
1354 def _real_extract(self, url):
5ad28e7f 1355 mobj = self._match_valid_url(url)
a537ab1a
U
1356 eid, id = mobj.group('eid', 'id')
1357 if eid:
1358 id = eid
1359 url = self._BASE_URL + id
1360 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1361 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1362 if new_url.startswith(__UNSAFE_LINK):
1363 new_url = new_url.replace(__UNSAFE_LINK, "")
9e20a9c4 1364 return self.url_result(new_url)