]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[extractor/twitter] Default to GraphQL, handle auth errors (#6957)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
7a26ce26 1import json
23e7cba8
S
2import re
3
4from .common import InfoExtractor
13b2ae29 5from .periscope import PeriscopeBaseIE, PeriscopeIE
7a26ce26 6from ..compat import functools # isort: split
18ca61c5 7from ..compat import (
18ca61c5
RA
8 compat_parse_qs,
9 compat_urllib_parse_unquote,
10 compat_urllib_parse_urlparse,
11)
23e7cba8 12from ..utils import (
2edfd745 13 ExtractorError,
13b2ae29 14 dict_get,
23e7cba8 15 float_or_none,
13b2ae29 16 format_field,
cf5881fc 17 int_or_none,
13b2ae29 18 make_archive_id,
147e62fc 19 remove_end,
13b2ae29
SS
20 str_or_none,
21 strip_or_none,
f1150b9e 22 traverse_obj,
7a26ce26 23 try_call,
2edfd745 24 try_get,
18ca61c5
RA
25 unified_timestamp,
26 update_url_query,
41d1cca3 27 url_or_none,
2edfd745 28 xpath_text,
23e7cba8
S
29)
30
31
445d72b8 32class TwitterBaseIE(InfoExtractor):
18ca61c5 33 _API_BASE = 'https://api.twitter.com/1.1/'
7a26ce26 34 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
82fb2357 35 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
147e62fc 36 _AUTH = {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
37 _guest_token = None
18ca61c5
RA
38
39 def _extract_variant_formats(self, variant, video_id):
40 variant_url = variant.get('url')
41 if not variant_url:
4bed4363 42 return [], {}
18ca61c5 43 elif '.m3u8' in variant_url:
4bed4363 44 return self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
45 variant_url, video_id, 'mp4', 'm3u8_native',
46 m3u8_id='hls', fatal=False)
47 else:
48 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
49 f = {
50 'url': variant_url,
51 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
52 'tbr': tbr,
53 }
54 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 55 return [f], {}
18ca61c5 56
9be31e77 57 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 58 vmap_url = url_or_none(vmap_url)
59 if not vmap_url:
f1150b9e 60 return [], {}
445d72b8 61 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 62 formats = []
4bed4363 63 subtitles = {}
18ca61c5
RA
64 urls = []
65 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
66 video_variant.attrib['url'] = compat_urllib_parse_unquote(
67 video_variant.attrib['url'])
68 urls.append(video_variant.attrib['url'])
4bed4363
F
69 fmts, subs = self._extract_variant_formats(
70 video_variant.attrib, video_id)
71 formats.extend(fmts)
72 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
73 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
74 if video_url not in urls:
4bed4363
F
75 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
76 formats.extend(fmts)
77 subtitles = self._merge_subtitles(subtitles, subs)
78 return formats, subtitles
445d72b8 79
2edfd745
YCH
80 @staticmethod
81 def _search_dimensions_in_video_url(a_format, video_url):
82 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
83 if m:
84 a_format.update({
85 'width': int(m.group('width')),
86 'height': int(m.group('height')),
87 })
88
7a26ce26
SS
89 @functools.cached_property
90 def is_logged_in(self):
91 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
92
93 def _call_api(self, path, video_id, query={}, graphql=False):
94 cookies = self._get_cookies(self._API_BASE)
147e62fc 95 headers = self._AUTH.copy()
7a26ce26
SS
96
97 csrf_cookie = cookies.get('ct0')
98 if csrf_cookie:
99 headers['x-csrf-token'] = csrf_cookie.value
100
101 if self.is_logged_in:
102 headers.update({
103 'x-twitter-auth-type': 'OAuth2Session',
104 'x-twitter-client-language': 'en',
105 'x-twitter-active-user': 'yes',
106 })
107
147e62fc 108 for first_attempt in (True, False):
109 if not self.is_logged_in and not self._guest_token:
110 headers.pop('x-guest-token', None)
111 self._guest_token = traverse_obj(self._download_json(
112 f'{self._API_BASE}guest/activate.json', video_id,
113 'Downloading guest token', data=b'', headers=headers), 'guest_token')
114 if self._guest_token:
115 headers['x-guest-token'] = self._guest_token
116 elif not self.is_logged_in:
117 raise ExtractorError('Could not retrieve guest token')
7a26ce26 118
147e62fc 119 allowed_status = {400, 401, 403, 404} if graphql else {403}
120 result = self._download_json(
121 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
122 video_id, headers=headers, query=query, expected_status=allowed_status,
123 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
352e7d98 124
147e62fc 125 if result.get('errors'):
126 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
127 if not self.is_logged_in and first_attempt and 'bad guest token' in errors.lower():
128 self.to_screen('Guest token has expired. Refreshing guest token')
129 self._guest_token = None
130 continue
352e7d98 131
147e62fc 132 raise ExtractorError(
133 f'Error(s) while querying API: {errors or "Unknown error"}', expected=True)
352e7d98 134
147e62fc 135 return result
7a26ce26
SS
136
137 def _build_graphql_query(self, media_id):
138 raise NotImplementedError('Method must be implemented to support GraphQL')
139
140 def _call_graphql_api(self, endpoint, media_id):
141 data = self._build_graphql_query(media_id)
142 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
143 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
144
145
146class TwitterCardIE(InfoExtractor):
014e8803 147 IE_NAME = 'twitter:card'
18ca61c5 148 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 149 _TESTS = [
150 {
151 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 152 # MD5 checksums are different in different places
c3dea3f8 153 'info_dict': {
7a26ce26 154 'id': '560070131976392705',
c3dea3f8 155 'ext': 'mp4',
18ca61c5
RA
156 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
157 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
158 'uploader': 'Twitter',
159 'uploader_id': 'Twitter',
160 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 161 'duration': 30.033,
18ca61c5
RA
162 'timestamp': 1422366112,
163 'upload_date': '20150127',
7a26ce26
SS
164 'age_limit': 0,
165 'comment_count': int,
166 'tags': [],
167 'repost_count': int,
168 'like_count': int,
169 'display_id': '560070183650213889',
170 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 171 },
23e7cba8 172 },
c3dea3f8 173 {
174 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 175 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 176 'info_dict': {
177 'id': '623160978427936768',
178 'ext': 'mp4',
18ca61c5
RA
179 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
180 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
181 'uploader': 'NASA',
182 'uploader_id': 'NASA',
183 'timestamp': 1437408129,
184 'upload_date': '20150720',
7a26ce26
SS
185 'uploader_url': 'https://twitter.com/NASA',
186 'age_limit': 0,
187 'comment_count': int,
188 'like_count': int,
189 'repost_count': int,
190 'tags': ['PlutoFlyby'],
c3dea3f8 191 },
7a26ce26 192 'params': {'format': '[protocol=https]'}
4a7b7903
YCH
193 },
194 {
195 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 196 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
197 'info_dict': {
198 'id': 'dq4Oj5quskI',
199 'ext': 'mp4',
200 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 201 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 202 'upload_date': '20111013',
18ca61c5 203 'uploader': 'OMG! UBUNTU!',
4a7b7903 204 'uploader_id': 'omgubuntu',
7a26ce26
SS
205 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
206 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
207 'channel_follower_count': int,
208 'chapters': 'count:8',
209 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
210 'duration': 138,
211 'categories': ['Film & Animation'],
212 'age_limit': 0,
213 'comment_count': int,
214 'availability': 'public',
215 'like_count': int,
216 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
217 'view_count': int,
218 'tags': 'count:12',
219 'channel': 'OMG! UBUNTU!',
220 'playable_in_embed': True,
4a7b7903 221 },
31752f76 222 'add_ie': ['Youtube'],
5f1b2aea
YCH
223 },
224 {
225 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
226 'info_dict': {
227 'id': 'iBb2x00UVlv',
228 'ext': 'mp4',
229 'upload_date': '20151113',
230 'uploader_id': '1189339351084113920',
acb6e97e
YCH
231 'uploader': 'ArsenalTerje',
232 'title': 'Vine by ArsenalTerje',
e8f20ffa 233 'timestamp': 1447451307,
7a26ce26
SS
234 'alt_title': 'Vine by ArsenalTerje',
235 'comment_count': int,
236 'like_count': int,
237 'thumbnail': r're:^https?://[^?#]+\.jpg',
238 'view_count': int,
239 'repost_count': int,
5f1b2aea
YCH
240 },
241 'add_ie': ['Vine'],
7a26ce26
SS
242 'params': {'skip_download': 'm3u8'},
243 },
244 {
0ae937a7 245 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 246 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
247 'info_dict': {
248 'id': '705235433198714880',
249 'ext': 'mp4',
18ca61c5
RA
250 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
251 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
252 'uploader': 'Brent Yarina',
253 'uploader_id': 'BTNBrentYarina',
254 'timestamp': 1456976204,
255 'upload_date': '20160303',
0ae937a7 256 },
18ca61c5 257 'skip': 'This content is no longer available.',
7a26ce26
SS
258 },
259 {
748a462f
S
260 'url': 'https://twitter.com/i/videos/752274308186120192',
261 'only_matching': True,
0ae937a7 262 },
c3dea3f8 263 ]
23e7cba8
S
264
265 def _real_extract(self, url):
18ca61c5
RA
266 status_id = self._match_id(url)
267 return self.url_result(
268 'https://twitter.com/statuses/' + status_id,
269 TwitterIE.ie_key(), status_id)
c8398a9b 270
03879ff0 271
18ca61c5 272class TwitterIE(TwitterBaseIE):
014e8803 273 IE_NAME = 'twitter'
b6795fd3 274 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
f57f84f6 275
cf5881fc 276 _TESTS = [{
48aae2d2 277 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 278 'info_dict': {
13b2ae29
SS
279 'id': '643211870443208704',
280 'display_id': '643211948184596480',
f57f84f6 281 'ext': 'mp4',
575036b4 282 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 283 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 284 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
48aae2d2
YCH
285 'uploader': 'FREE THE NIPPLE',
286 'uploader_id': 'freethenipple',
3b65a6fb 287 'duration': 12.922,
18ca61c5
RA
288 'timestamp': 1442188653,
289 'upload_date': '20150913',
13b2ae29
SS
290 'uploader_url': 'https://twitter.com/freethenipple',
291 'comment_count': int,
292 'repost_count': int,
293 'like_count': int,
147e62fc 294 'view_count': int,
13b2ae29
SS
295 'tags': [],
296 'age_limit': 18,
f57f84f6 297 },
cf5881fc
YCH
298 }, {
299 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
300 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
301 'info_dict': {
302 'id': '657991469417025536',
303 'ext': 'mp4',
304 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
305 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 306 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
307 'uploader': 'Gifs',
308 'uploader_id': 'giphz',
309 },
7efc1c2b 310 'expected_warnings': ['height', 'width'],
fc0a45fa 311 'skip': 'Account suspended',
b703ebee
JMF
312 }, {
313 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
314 'info_dict': {
315 'id': '665052190608723968',
13b2ae29 316 'display_id': '665052190608723968',
b703ebee 317 'ext': 'mp4',
b6795fd3 318 'title': r're:Star Wars.*A new beginning is coming December 18.*',
18ca61c5 319 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
b703ebee 320 'uploader_id': 'starwars',
7a26ce26 321 'uploader': r're:Star Wars.*',
18ca61c5
RA
322 'timestamp': 1447395772,
323 'upload_date': '20151113',
13b2ae29
SS
324 'uploader_url': 'https://twitter.com/starwars',
325 'comment_count': int,
326 'repost_count': int,
327 'like_count': int,
328 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
329 'age_limit': 0,
b703ebee 330 },
0ae937a7
YCH
331 }, {
332 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
333 'info_dict': {
334 'id': '705235433198714880',
335 'ext': 'mp4',
18ca61c5
RA
336 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
337 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
338 'uploader_id': 'BTNBrentYarina',
339 'uploader': 'Brent Yarina',
18ca61c5
RA
340 'timestamp': 1456976204,
341 'upload_date': '20160303',
13b2ae29
SS
342 'uploader_url': 'https://twitter.com/BTNBrentYarina',
343 'comment_count': int,
344 'repost_count': int,
345 'like_count': int,
346 'tags': [],
347 'age_limit': 0,
0ae937a7
YCH
348 },
349 'params': {
350 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
351 # Test case of TwitterCardIE
352 'skip_download': True,
353 },
352e7d98 354 'skip': 'Dead external link',
03879ff0
YCH
355 }, {
356 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 357 'info_dict': {
13b2ae29
SS
358 'id': '700207414000242688',
359 'display_id': '700207533655363584',
03879ff0 360 'ext': 'mp4',
13b2ae29 361 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 362 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 363 'thumbnail': r're:^https?://.*\.jpg',
13b2ae29
SS
364 'uploader': 'jaydin donte geer',
365 'uploader_id': 'jaydingeer',
3b65a6fb 366 'duration': 30.0,
18ca61c5
RA
367 'timestamp': 1455777459,
368 'upload_date': '20160218',
13b2ae29
SS
369 'uploader_url': 'https://twitter.com/jaydingeer',
370 'comment_count': int,
371 'repost_count': int,
372 'like_count': int,
147e62fc 373 'view_count': int,
13b2ae29
SS
374 'tags': ['Damndaniel'],
375 'age_limit': 0,
03879ff0 376 },
395fd4b0
YCH
377 }, {
378 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
379 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
380 'info_dict': {
381 'id': 'MIOxnrUteUd',
382 'ext': 'mp4',
18ca61c5
RA
383 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
384 'uploader': 'TAKUMA',
385 'uploader_id': '1004126642786242560',
3615bfe1 386 'timestamp': 1402826626,
395fd4b0 387 'upload_date': '20140615',
13b2ae29
SS
388 'thumbnail': r're:^https?://.*\.jpg',
389 'alt_title': 'Vine by TAKUMA',
390 'comment_count': int,
391 'repost_count': int,
392 'like_count': int,
393 'view_count': int,
395fd4b0
YCH
394 },
395 'add_ie': ['Vine'],
36b7d9db
YCH
396 }, {
397 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 398 'info_dict': {
13b2ae29
SS
399 'id': '717462543795523584',
400 'display_id': '719944021058060289',
36b7d9db
YCH
401 'ext': 'mp4',
402 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5
RA
403 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
404 'uploader_id': 'CaptainAmerica',
36b7d9db 405 'uploader': 'Captain America',
3b65a6fb 406 'duration': 3.17,
18ca61c5
RA
407 'timestamp': 1460483005,
408 'upload_date': '20160412',
13b2ae29
SS
409 'uploader_url': 'https://twitter.com/CaptainAmerica',
410 'thumbnail': r're:^https?://.*\.jpg',
411 'comment_count': int,
412 'repost_count': int,
413 'like_count': int,
147e62fc 414 'view_count': int,
13b2ae29
SS
415 'tags': [],
416 'age_limit': 0,
36b7d9db 417 },
f0bc5a86
YCH
418 }, {
419 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
420 'info_dict': {
421 'id': '1zqKVVlkqLaKB',
422 'ext': 'mp4',
18ca61c5 423 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 424 'upload_date': '20160923',
18ca61c5
RA
425 'uploader_id': '1PmKqpJdOJQoY',
426 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 427 'timestamp': 1474613214,
13b2ae29 428 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
429 },
430 'add_ie': ['Periscope'],
2edfd745
YCH
431 }, {
432 # has mp4 formats via mobile API
433 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
434 'info_dict': {
435 'id': '852138619213144067',
436 'ext': 'mp4',
437 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 438 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
2edfd745
YCH
439 'uploader': 'عالم الأخبار',
440 'uploader_id': 'news_al3alm',
3b65a6fb 441 'duration': 277.4,
18ca61c5
RA
442 'timestamp': 1492000653,
443 'upload_date': '20170412',
2edfd745 444 },
00dd0cd5 445 'skip': 'Account suspended',
5c1452e8
GF
446 }, {
447 'url': 'https://twitter.com/i/web/status/910031516746514432',
448 'info_dict': {
13b2ae29
SS
449 'id': '910030238373089285',
450 'display_id': '910031516746514432',
5c1452e8
GF
451 'ext': 'mp4',
452 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
453 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 454 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
5c1452e8
GF
455 'uploader': 'Préfet de Guadeloupe',
456 'uploader_id': 'Prefet971',
457 'duration': 47.48,
18ca61c5
RA
458 'timestamp': 1505803395,
459 'upload_date': '20170919',
13b2ae29
SS
460 'uploader_url': 'https://twitter.com/Prefet971',
461 'comment_count': int,
462 'repost_count': int,
463 'like_count': int,
147e62fc 464 'view_count': int,
13b2ae29
SS
465 'tags': ['Maria'],
466 'age_limit': 0,
5c1452e8
GF
467 },
468 'params': {
469 'skip_download': True, # requires ffmpeg
470 },
2593725a
S
471 }, {
472 # card via api.twitter.com/1.1/videos/tweet/config
473 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
474 'info_dict': {
13b2ae29
SS
475 'id': '1001551417340022785',
476 'display_id': '1001551623938805763',
2593725a
S
477 'ext': 'mp4',
478 'title': 're:.*?Shep is on a roll today.*?',
479 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 480 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
2593725a
S
481 'uploader': 'Lis Power',
482 'uploader_id': 'LisPower1',
483 'duration': 111.278,
18ca61c5
RA
484 'timestamp': 1527623489,
485 'upload_date': '20180529',
13b2ae29
SS
486 'uploader_url': 'https://twitter.com/LisPower1',
487 'comment_count': int,
488 'repost_count': int,
489 'like_count': int,
147e62fc 490 'view_count': int,
13b2ae29
SS
491 'tags': [],
492 'age_limit': 0,
2593725a
S
493 },
494 'params': {
495 'skip_download': True, # requires ffmpeg
496 },
b7ef93f0
S
497 }, {
498 'url': 'https://twitter.com/foobar/status/1087791357756956680',
499 'info_dict': {
13b2ae29
SS
500 'id': '1087791272830607360',
501 'display_id': '1087791357756956680',
b7ef93f0
S
502 'ext': 'mp4',
503 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
504 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 505 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
b7ef93f0
S
506 'uploader': 'Twitter',
507 'uploader_id': 'Twitter',
508 'duration': 61.567,
18ca61c5
RA
509 'timestamp': 1548184644,
510 'upload_date': '20190122',
13b2ae29
SS
511 'uploader_url': 'https://twitter.com/Twitter',
512 'comment_count': int,
513 'repost_count': int,
514 'like_count': int,
147e62fc 515 'view_count': int,
13b2ae29
SS
516 'tags': [],
517 'age_limit': 0,
18ca61c5
RA
518 },
519 }, {
520 # not available in Periscope
521 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
522 'info_dict': {
523 'id': '1vOGwqejwoWxB',
524 'ext': 'mp4',
525 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
526 'uploader': 'Vivi',
527 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
528 'thumbnail': r're:^https?://.*\.jpg',
529 'tags': ['EduTECH2019'],
530 'view_count': int,
b7ef93f0 531 },
18ca61c5 532 'add_ie': ['TwitterBroadcast'],
30a074c2 533 }, {
534 # unified card
535 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
536 'info_dict': {
13b2ae29
SS
537 'id': '1349774757969989634',
538 'display_id': '1349794411333394432',
30a074c2 539 'ext': 'mp4',
540 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
541 'thumbnail': r're:^https?://.*\.jpg',
542 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
543 'uploader': 'Brooklyn Nets',
544 'uploader_id': 'BrooklynNets',
545 'duration': 324.484,
546 'timestamp': 1610651040,
547 'upload_date': '20210114',
13b2ae29
SS
548 'uploader_url': 'https://twitter.com/BrooklynNets',
549 'comment_count': int,
550 'repost_count': int,
551 'like_count': int,
552 'tags': [],
553 'age_limit': 0,
30a074c2 554 },
555 'params': {
556 'skip_download': True,
557 },
13b2ae29
SS
558 }, {
559 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
560 'info_dict': {
561 'id': '1577855447914409984',
562 'display_id': '1577855540407197696',
563 'ext': 'mp4',
352e7d98 564 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
565 'description': 'md5:b9c3699335447391d11753ab21c70a74',
13b2ae29 566 'upload_date': '20221006',
352e7d98 567 'uploader': 'oshtru',
13b2ae29
SS
568 'uploader_id': 'oshtru',
569 'uploader_url': 'https://twitter.com/oshtru',
570 'thumbnail': r're:^https?://.*\.jpg',
571 'duration': 30.03,
7a26ce26 572 'timestamp': 1665025050,
13b2ae29
SS
573 'comment_count': int,
574 'repost_count': int,
575 'like_count': int,
147e62fc 576 'view_count': int,
13b2ae29
SS
577 'tags': [],
578 'age_limit': 0,
579 },
580 'params': {'skip_download': True},
581 }, {
582 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
583 'info_dict': {
584 'id': '1577719286659006464',
585 'title': 'Ultima | #\u0432\u029f\u043c - Test',
586 'description': 'Test https://t.co/Y3KEZD7Dad',
587 'uploader': 'Ultima | #\u0432\u029f\u043c',
588 'uploader_id': 'UltimaShadowX',
589 'uploader_url': 'https://twitter.com/UltimaShadowX',
590 'upload_date': '20221005',
7a26ce26 591 'timestamp': 1664992565,
13b2ae29
SS
592 'comment_count': int,
593 'repost_count': int,
594 'like_count': int,
595 'tags': [],
596 'age_limit': 0,
597 },
598 'playlist_count': 4,
599 'params': {'skip_download': True},
7a26ce26
SS
600 }, {
601 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
602 'info_dict': {
603 'id': '1575559336759263233',
604 'display_id': '1575560063510810624',
605 'ext': 'mp4',
606 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
607 'thumbnail': r're:^https?://.*\.jpg',
608 'description': 'md5:95aea692fda36a12081b9629b02daa92',
609 'uploader': 'Max Olson',
610 'uploader_id': 'MesoMax919',
611 'uploader_url': 'https://twitter.com/MesoMax919',
612 'duration': 21.321,
613 'timestamp': 1664477766,
614 'upload_date': '20220929',
615 'comment_count': int,
616 'repost_count': int,
617 'like_count': int,
147e62fc 618 'view_count': int,
7a26ce26
SS
619 'tags': ['HurricaneIan'],
620 'age_limit': 0,
621 },
622 }, {
147e62fc 623 # Adult content, fails if not logged in (GraphQL)
7a26ce26
SS
624 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
625 'info_dict': {
626 'id': '1575199163847000068',
627 'display_id': '1575199173472927762',
628 'ext': 'mp4',
629 'title': str,
630 'description': str,
631 'uploader': str,
632 'uploader_id': 'Rizdraws',
633 'uploader_url': 'https://twitter.com/Rizdraws',
634 'upload_date': '20220928',
635 'timestamp': 1664391723,
16bed382 636 'thumbnail': r're:^https?://.+\.jpg',
7a26ce26
SS
637 'like_count': int,
638 'repost_count': int,
639 'comment_count': int,
640 'age_limit': 18,
641 'tags': []
642 },
147e62fc 643 'skip': 'Requires authentication',
7a26ce26 644 }, {
7a26ce26
SS
645 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
646 'playlist_mincount': 2,
647 'info_dict': {
648 'id': '1395079556562706435',
649 'title': str,
650 'tags': [],
651 'uploader': str,
652 'like_count': int,
653 'upload_date': '20210519',
654 'age_limit': 0,
655 'repost_count': int,
147e62fc 656 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
7a26ce26
SS
657 'uploader_id': 'Srirachachau',
658 'comment_count': int,
659 'uploader_url': 'https://twitter.com/Srirachachau',
660 'timestamp': 1621447860,
661 },
662 }, {
7a26ce26
SS
663 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
664 'playlist_mincount': 2,
665 'info_dict': {
666 'id': '1578353380363501568',
667 'title': str,
668 'uploader_id': 'DavidToons_',
669 'repost_count': int,
670 'like_count': int,
671 'uploader': str,
672 'timestamp': 1665143744,
673 'uploader_url': 'https://twitter.com/DavidToons_',
147e62fc 674 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
7a26ce26
SS
675 'tags': [],
676 'comment_count': int,
677 'upload_date': '20221007',
678 'age_limit': 0,
679 },
680 }, {
681 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
682 'playlist_count': 2,
683 'info_dict': {
684 'id': '1578401165338976258',
685 'title': str,
686 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
687 'uploader': str,
688 'uploader_id': 'primevideouk',
689 'timestamp': 1665155137,
690 'upload_date': '20221007',
691 'age_limit': 0,
692 'uploader_url': 'https://twitter.com/primevideouk',
693 'comment_count': int,
694 'repost_count': int,
695 'like_count': int,
696 'tags': ['TheRingsOfPower'],
697 },
698 }, {
699 # Twitter Spaces
700 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
701 'info_dict': {
702 'id': '1lPJqmBeeNAJb',
703 'ext': 'm4a',
704 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
705 'uploader': r're:Monique Camarra.+?',
706 'uploader_id': 'MoniqueCamarra',
707 'live_status': 'was_live',
708 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
709 'timestamp': 1658407771464,
710 },
711 'add_ie': ['TwitterSpaces'],
712 'params': {'skip_download': 'm3u8'},
16bed382 713 }, {
714 # URL specifies video number but --yes-playlist
715 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
716 'playlist_mincount': 2,
717 'info_dict': {
718 'id': '1600649710662213632',
719 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
720 'timestamp': 1670459604.0,
721 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
722 'comment_count': int,
723 'uploader_id': 'CTVJLaidlaw',
724 'repost_count': int,
725 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
726 'upload_date': '20221208',
727 'age_limit': 0,
728 'uploader': 'Jocelyn Laidlaw',
729 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
730 'like_count': int,
731 },
732 }, {
733 # URL specifies video number and --no-playlist
734 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
735 'info_dict': {
736 'id': '1600649511827013632',
737 'ext': 'mp4',
147e62fc 738 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
16bed382 739 'thumbnail': r're:^https?://.+\.jpg',
740 'timestamp': 1670459604.0,
741 'uploader_id': 'CTVJLaidlaw',
742 'uploader': 'Jocelyn Laidlaw',
743 'repost_count': int,
744 'comment_count': int,
745 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
746 'duration': 102.226,
747 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
748 'display_id': '1600649710662213632',
749 'like_count': int,
147e62fc 750 'view_count': int,
16bed382 751 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
752 'upload_date': '20221208',
753 'age_limit': 0,
754 },
755 'params': {'noplaylist': True},
7543c9c9 756 }, {
757 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
758 # note the id different between extraction and url
759 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
760 'info_dict': {
761 'id': '1621117577354424321',
762 'display_id': '1621117700482416640',
763 'ext': 'mp4',
764 'title': '뽀 - 아 최우제 이동속도 봐',
765 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
766 'duration': 24.598,
767 'uploader': '뽀',
768 'uploader_id': 's2FAKER',
769 'uploader_url': 'https://twitter.com/s2FAKER',
770 'upload_date': '20230202',
771 'timestamp': 1675339553.0,
772 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
773 'age_limit': 18,
774 'tags': [],
775 'like_count': int,
776 'repost_count': int,
777 'comment_count': int,
147e62fc 778 'view_count': int,
7543c9c9 779 },
b6795fd3
SS
780 }, {
781 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
782 'info_dict': {
783 'id': '1599108643743473680',
784 'display_id': '1599108751385972737',
785 'ext': 'mp4',
786 'title': '\u06ea - \U0001F48B',
787 'uploader_url': 'https://twitter.com/hlo_again',
788 'like_count': int,
789 'uploader_id': 'hlo_again',
790 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
791 'repost_count': int,
792 'duration': 9.531,
793 'comment_count': int,
147e62fc 794 'view_count': int,
b6795fd3
SS
795 'upload_date': '20221203',
796 'age_limit': 0,
797 'timestamp': 1670092210.0,
798 'tags': [],
799 'uploader': '\u06ea',
800 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
801 },
802 'params': {'noplaylist': True},
803 }, {
b6795fd3
SS
804 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
805 'info_dict': {
806 'id': '1600009362759733248',
807 'display_id': '1600009574919962625',
808 'ext': 'mp4',
809 'uploader_url': 'https://twitter.com/MunTheShinobi',
810 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
811 'view_count': int,
812 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
813 'age_limit': 0,
147e62fc 814 'uploader': 'Mün The Shinobi',
b6795fd3
SS
815 'repost_count': int,
816 'upload_date': '20221206',
147e62fc 817 'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
b6795fd3
SS
818 'comment_count': int,
819 'like_count': int,
820 'tags': [],
821 'uploader_id': 'MunTheShinobi',
822 'duration': 139.987,
823 'timestamp': 1670306984.0,
824 },
cf605226 825 }, {
147e62fc 826 # url to retweet id, legacy API
cf605226 827 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
828 'info_dict': {
829 'id': '1623274794488659969',
830 'display_id': '1623739803874349067',
831 'ext': 'mp4',
832 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
833 'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
834 'uploader': 'Johnny Bullets',
835 'uploader_id': 'Johnnybull3ts',
836 'uploader_url': 'https://twitter.com/Johnnybull3ts',
837 'age_limit': 0,
838 'tags': [],
839 'duration': 8.033,
840 'timestamp': 1675853859.0,
841 'upload_date': '20230208',
842 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
843 'like_count': int,
844 'repost_count': int,
845 'comment_count': int,
846 },
147e62fc 847 'params': {'extractor_args': {'twitter': {'legacy_api': ['']}}},
82fb2357 848 }, {
849 # onion route
850 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
851 'only_matching': True,
18ca61c5
RA
852 }, {
853 # Twitch Clip Embed
854 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
855 'only_matching': True,
10a5091e
RA
856 }, {
857 # promo_video_website card
858 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
859 'only_matching': True,
00dd0cd5 860 }, {
861 # promo_video_convo card
862 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
863 'only_matching': True,
864 }, {
865 # appplayer card
866 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
867 'only_matching': True,
30a074c2 868 }, {
869 # video_direct_message card
870 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
871 'only_matching': True,
872 }, {
873 # poll2choice_video card
874 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
875 'only_matching': True,
876 }, {
877 # poll3choice_video card
878 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
879 'only_matching': True,
880 }, {
881 # poll4choice_video card
882 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
883 'only_matching': True,
cf5881fc 884 }]
f57f84f6 885
7a26ce26
SS
886 def _graphql_to_legacy(self, data, twid):
887 result = traverse_obj(data, (
888 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
889 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
7543c9c9 890 'tweet_results', 'result', ('tweet', None),
7a26ce26
SS
891 ), expected_type=dict, default={}, get_all=False)
892
147e62fc 893 if result.get('__typename') not in ('Tweet', 'TweetTombstone', None):
7543c9c9 894 self.report_warning(f'Unknown typename: {result.get("__typename")}', twid, only_once=True)
895
7a26ce26 896 if 'tombstone' in result:
147e62fc 897 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
898 if cause and 'adult content' in cause:
899 self.raise_login_required(cause)
7a26ce26
SS
900 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
901
902 status = result.get('legacy', {})
903 status.update(traverse_obj(result, {
904 'user': ('core', 'user_results', 'result', 'legacy'),
905 'card': ('card', 'legacy'),
906 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
907 }, expected_type=dict, default={}))
908
909 # extra transformation is needed since result does not match legacy format
910 binding_values = {
911 binding_value.get('key'): binding_value.get('value')
147e62fc 912 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
7a26ce26
SS
913 }
914 if binding_values:
915 status['card']['binding_values'] = binding_values
916
917 return status
918
919 def _build_graphql_query(self, media_id):
920 return {
921 'variables': {
922 'focalTweetId': media_id,
923 'includePromotedContent': True,
924 'with_rux_injections': False,
925 'withBirdwatchNotes': True,
926 'withCommunity': True,
927 'withDownvotePerspective': False,
928 'withQuickPromoteEligibilityTweetFields': True,
929 'withReactionsMetadata': False,
930 'withReactionsPerspective': False,
931 'withSuperFollowsTweetFields': True,
932 'withSuperFollowsUserFields': True,
933 'withV2Timeline': True,
934 'withVoice': True,
935 },
936 'features': {
937 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
938 'interactive_text_enabled': True,
939 'responsive_web_edit_tweet_api_enabled': True,
940 'responsive_web_enhance_cards_enabled': True,
941 'responsive_web_graphql_timeline_navigation_enabled': False,
942 'responsive_web_text_conversations_enabled': False,
943 'responsive_web_uc_gql_enabled': True,
944 'standardized_nudges_misinfo': True,
945 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
946 'tweetypie_unmention_optimization_enabled': True,
947 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
948 'verified_phone_label_enabled': False,
949 'vibe_api_enabled': True,
950 },
951 }
952
f57f84f6 953 def _real_extract(self, url):
16bed382 954 twid, selected_index = self._match_valid_url(url).group('id', 'index')
147e62fc 955 if self._configuration_arg('legacy_api') and not self.is_logged_in:
cf605226 956 status = traverse_obj(self._call_api(f'statuses/show/{twid}.json', twid, {
18ca61c5
RA
957 'cards_platform': 'Web-12',
958 'include_cards': 1,
959 'include_reply_count': 1,
960 'include_user_entities': 0,
961 'tweet_mode': 'extended',
cf605226 962 }), 'retweeted_status', None)
147e62fc 963 else:
964 result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
965 status = self._graphql_to_legacy(result, twid)
575036b4 966
18ca61c5 967 title = description = status['full_text'].replace('\n', ' ')
575036b4 968 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 969 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
970 user = status.get('user') or {}
971 uploader = user.get('name')
972 if uploader:
7a26ce26 973 title = f'{uploader} - {title}'
18ca61c5
RA
974 uploader_id = user.get('screen_name')
975
cf5881fc 976 info = {
18ca61c5
RA
977 'id': twid,
978 'title': title,
979 'description': description,
980 'uploader': uploader,
981 'timestamp': unified_timestamp(status.get('created_at')),
982 'uploader_id': uploader_id,
a70635b8 983 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
984 'like_count': int_or_none(status.get('favorite_count')),
985 'repost_count': int_or_none(status.get('retweet_count')),
986 'comment_count': int_or_none(status.get('reply_count')),
987 'age_limit': 18 if status.get('possibly_sensitive') else 0,
b6795fd3 988 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
f57f84f6 989 }
cf5881fc 990
30a074c2 991 def extract_from_video_info(media):
13b2ae29
SS
992 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
993 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
994 video_info = media.get('video_info') or {}
995
996 formats = []
4bed4363 997 subtitles = {}
18ca61c5 998 for variant in video_info.get('variants', []):
4bed4363
F
999 fmts, subs = self._extract_variant_formats(variant, twid)
1000 subtitles = self._merge_subtitles(subtitles, subs)
1001 formats.extend(fmts)
18ca61c5
RA
1002
1003 thumbnails = []
1004 media_url = media.get('media_url_https') or media.get('media_url')
1005 if media_url:
1006 def add_thumbnail(name, size):
1007 thumbnails.append({
1008 'id': name,
1009 'url': update_url_query(media_url, {'name': name}),
1010 'width': int_or_none(size.get('w') or size.get('width')),
1011 'height': int_or_none(size.get('h') or size.get('height')),
1012 })
1013 for name, size in media.get('sizes', {}).items():
1014 add_thumbnail(name, size)
1015 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 1016
13b2ae29
SS
1017 return {
1018 'id': media_id,
18ca61c5 1019 'formats': formats,
4bed4363 1020 'subtitles': subtitles,
18ca61c5 1021 'thumbnails': thumbnails,
b6795fd3 1022 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})),
18ca61c5 1023 'duration': float_or_none(video_info.get('duration_millis'), 1000),
9f14daf2 1024 # The codec of http formats are unknown
1025 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
13b2ae29 1026 }
30a074c2 1027
13b2ae29
SS
1028 def extract_from_card_info(card):
1029 if not card:
1030 return
1031
1032 self.write_debug(f'Extracting from card info: {card.get("url")}')
1033 binding_values = card['binding_values']
1034
1035 def get_binding_value(k):
1036 o = binding_values.get(k) or {}
1037 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1038
1039 card_name = card['name'].split(':')[-1]
1040 if card_name == 'player':
7a26ce26 1041 yield {
13b2ae29
SS
1042 '_type': 'url',
1043 'url': get_binding_value('player_url'),
1044 }
1045 elif card_name == 'periscope_broadcast':
7a26ce26 1046 yield {
13b2ae29
SS
1047 '_type': 'url',
1048 'url': get_binding_value('url') or get_binding_value('player_url'),
1049 'ie_key': PeriscopeIE.ie_key(),
1050 }
1051 elif card_name == 'broadcast':
7a26ce26 1052 yield {
13b2ae29
SS
1053 '_type': 'url',
1054 'url': get_binding_value('broadcast_url'),
1055 'ie_key': TwitterBroadcastIE.ie_key(),
1056 }
7a26ce26
SS
1057 elif card_name == 'audiospace':
1058 yield {
1059 '_type': 'url',
1060 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1061 'ie_key': TwitterSpacesIE.ie_key(),
1062 }
13b2ae29 1063 elif card_name == 'summary':
7a26ce26 1064 yield {
18ca61c5 1065 '_type': 'url',
13b2ae29
SS
1066 'url': get_binding_value('card_url'),
1067 }
1068 elif card_name == 'unified_card':
7a26ce26
SS
1069 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1070 yield from map(extract_from_video_info, traverse_obj(
1071 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
1072 # amplify, promo_video_website, promo_video_convo, appplayer,
1073 # video_direct_message, poll2choice_video, poll3choice_video,
1074 # poll4choice_video, ...
1075 else:
1076 is_amplify = card_name == 'amplify'
1077 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1078 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1079 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
1080
1081 thumbnails = []
1082 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1083 image = get_binding_value('player_image' + suffix) or {}
1084 image_url = image.get('url')
1085 if not image_url or '/player-placeholder' in image_url:
1086 continue
1087 thumbnails.append({
1088 'id': suffix[1:] if suffix else 'medium',
1089 'url': image_url,
1090 'width': int_or_none(image.get('width')),
1091 'height': int_or_none(image.get('height')),
1092 })
1093
7a26ce26 1094 yield {
13b2ae29
SS
1095 'formats': formats,
1096 'subtitles': subtitles,
1097 'thumbnails': thumbnails,
1098 'duration': int_or_none(get_binding_value(
1099 'content_duration_seconds')),
1100 }
1101
b6795fd3
SS
1102 videos = traverse_obj(status, (
1103 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
13b2ae29 1104
b6795fd3
SS
1105 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1106 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1107 else:
1108 desired_obj = traverse_obj(status, ('extended_entities', 'media', int(selected_index) - 1, {dict}))
1109 if not desired_obj:
1110 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1111 elif desired_obj.get('type') != 'video':
1112 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1113
1114 # Restore original archive id and video index in title
1115 for index, entry in enumerate(videos, 1):
1116 if entry.get('id') != desired_obj.get('id'):
1117 continue
1118 if index == 1:
1119 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1120 if len(videos) != 1:
1121 info['title'] += f' #{index}'
1122 break
1123
1124 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1125
1126 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
13b2ae29
SS
1127 if not entries:
1128 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1129 if not expanded_url or expanded_url == url:
147e62fc 1130 self.raise_no_formats('No video could be found in this tweet', expected=True)
1131 return info
13b2ae29
SS
1132
1133 return self.url_result(expanded_url, display_id=twid, **info)
1134
1135 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1136
1137 if len(entries) == 1:
1138 return entries[0]
1139
1140 for index, entry in enumerate(entries, 1):
1141 entry['title'] += f' #{index}'
1142
1143 return self.playlist_result(entries, **info)
445d72b8
YCH
1144
1145
1146class TwitterAmplifyIE(TwitterBaseIE):
1147 IE_NAME = 'twitter:amplify'
25042f73 1148 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1149
1150 _TEST = {
1151 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1152 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1153 'info_dict': {
1154 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1155 'ext': 'mp4',
1156 'title': 'Twitter Video',
bdbf4ba4 1157 'thumbnail': 're:^https?://.*',
445d72b8 1158 },
7a26ce26 1159 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1160 }
1161
1162 def _real_extract(self, url):
1163 video_id = self._match_id(url)
1164 webpage = self._download_webpage(url, video_id)
1165
1166 vmap_url = self._html_search_meta(
1167 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1168 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1169
bdbf4ba4
YCH
1170 thumbnails = []
1171 thumbnail = self._html_search_meta(
1172 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1173
1174 def _find_dimension(target):
1175 w = int_or_none(self._html_search_meta(
1176 'twitter:%s:width' % target, webpage, fatal=False))
1177 h = int_or_none(self._html_search_meta(
1178 'twitter:%s:height' % target, webpage, fatal=False))
1179 return w, h
1180
1181 if thumbnail:
1182 thumbnail_w, thumbnail_h = _find_dimension('image')
1183 thumbnails.append({
1184 'url': thumbnail,
1185 'width': thumbnail_w,
1186 'height': thumbnail_h,
1187 })
1188
1189 video_w, video_h = _find_dimension('player')
9be31e77 1190 formats[0].update({
bdbf4ba4
YCH
1191 'width': video_w,
1192 'height': video_h,
9be31e77 1193 })
bdbf4ba4 1194
445d72b8
YCH
1195 return {
1196 'id': video_id,
1197 'title': 'Twitter Video',
bdbf4ba4
YCH
1198 'formats': formats,
1199 'thumbnails': thumbnails,
445d72b8 1200 }
18ca61c5
RA
1201
1202
1203class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1204 IE_NAME = 'twitter:broadcast'
1205 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1206
7b0b53ea
S
1207 _TEST = {
1208 # untitled Periscope video
1209 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1210 'info_dict': {
1211 'id': '1yNGaQLWpejGj',
1212 'ext': 'mp4',
1213 'title': 'Andrea May Sahouri - Periscope Broadcast',
1214 'uploader': 'Andrea May Sahouri',
1215 'uploader_id': '1PXEdBZWpGwKe',
7a26ce26
SS
1216 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1217 'view_count': int,
7b0b53ea
S
1218 },
1219 }
1220
18ca61c5
RA
1221 def _real_extract(self, url):
1222 broadcast_id = self._match_id(url)
1223 broadcast = self._call_api(
1224 'broadcasts/show.json', broadcast_id,
1225 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1226 info = self._parse_broadcast_data(broadcast, broadcast_id)
1227 media_key = broadcast['media_key']
1228 source = self._call_api(
7a26ce26 1229 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1230 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1231 if '/live_video_stream/geoblocked/' in m3u8_url:
1232 self.raise_geo_restricted()
1233 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1234 m3u8_url).query).get('type', [None])[0]
1235 state, width, height = self._extract_common_format_info(broadcast)
1236 info['formats'] = self._extract_pscp_m3u8_formats(
1237 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1238 return info
86b868c6
U
1239
1240
7a26ce26
SS
1241class TwitterSpacesIE(TwitterBaseIE):
1242 IE_NAME = 'twitter:spaces'
1243 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
7a26ce26
SS
1244
1245 _TESTS = [{
1246 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1247 'info_dict': {
1248 'id': '1RDxlgyvNXzJL',
1249 'ext': 'm4a',
1250 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1251 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1252 'uploader': r're:Lucio Di Gaetano.*?',
1253 'uploader_id': 'luciodigaetano',
1254 'live_status': 'was_live',
1255 'timestamp': 1659877956397,
1256 },
1257 'params': {'skip_download': 'm3u8'},
1258 }]
1259
1260 SPACE_STATUS = {
1261 'notstarted': 'is_upcoming',
1262 'ended': 'was_live',
1263 'running': 'is_live',
1264 'timedout': 'post_live',
1265 }
1266
1267 def _build_graphql_query(self, space_id):
1268 return {
1269 'variables': {
1270 'id': space_id,
1271 'isMetatagsQuery': True,
1272 'withDownvotePerspective': False,
1273 'withReactionsMetadata': False,
1274 'withReactionsPerspective': False,
1275 'withReplays': True,
1276 'withSuperFollowsUserFields': True,
1277 'withSuperFollowsTweetFields': True,
1278 },
1279 'features': {
1280 'dont_mention_me_view_api_enabled': True,
1281 'interactive_text_enabled': True,
1282 'responsive_web_edit_tweet_api_enabled': True,
1283 'responsive_web_enhance_cards_enabled': True,
1284 'responsive_web_uc_gql_enabled': True,
1285 'spaces_2022_h2_clipping': True,
1286 'spaces_2022_h2_spaces_communities': False,
1287 'standardized_nudges_misinfo': True,
1288 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1289 'vibe_api_enabled': True,
1290 },
1291 }
1292
1293 def _real_extract(self, url):
1294 space_id = self._match_id(url)
1295 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1296 if not space_data:
1297 raise ExtractorError('Twitter Space not found', expected=True)
1298
1299 metadata = space_data['metadata']
1300 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1301
1302 formats = []
1303 if live_status == 'is_upcoming':
1304 self.raise_no_formats('Twitter Space not started yet', expected=True)
1305 elif live_status == 'post_live':
1306 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1307 else:
1308 source = self._call_api(
1309 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1310
1311 # XXX: Native downloader does not work
1312 formats = self._extract_m3u8_formats(
1313 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
9a0416c6 1314 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live',
1315 headers={'Referer': 'https://twitter.com/'})
7a26ce26
SS
1316 for fmt in formats:
1317 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1318
1319 participants = ', '.join(traverse_obj(
1320 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1321 return {
1322 'id': space_id,
1323 'title': metadata.get('title'),
1324 'description': f'Twitter Space participated by {participants}',
1325 'uploader': traverse_obj(
1326 metadata, ('creator_results', 'result', 'legacy', 'name')),
1327 'uploader_id': traverse_obj(
1328 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1329 'live_status': live_status,
1330 'timestamp': metadata.get('created_at'),
1331 'formats': formats,
1332 }
1333
1334
86b868c6
U
1335class TwitterShortenerIE(TwitterBaseIE):
1336 IE_NAME = 'twitter:shortener'
a537ab1a
U
1337 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1338 _BASE_URL = 'https://t.co/'
86b868c6
U
1339
1340 def _real_extract(self, url):
5ad28e7f 1341 mobj = self._match_valid_url(url)
a537ab1a
U
1342 eid, id = mobj.group('eid', 'id')
1343 if eid:
1344 id = eid
1345 url = self._BASE_URL + id
1346 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1347 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1348 if new_url.startswith(__UNSAFE_LINK):
1349 new_url = new_url.replace(__UNSAFE_LINK, "")
9e20a9c4 1350 return self.url_result(new_url)