]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/twitter.py
[extractor] Deprecate `_sort_formats`
[yt-dlp.git] / yt_dlp / extractor / twitter.py
CommitLineData
7a26ce26 1import json
23e7cba8 2import re
7a26ce26 3import urllib.error
23e7cba8
S
4
5from .common import InfoExtractor
13b2ae29 6from .periscope import PeriscopeBaseIE, PeriscopeIE
7a26ce26 7from ..compat import functools # isort: split
18ca61c5 8from ..compat import (
18ca61c5
RA
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12)
23e7cba8 13from ..utils import (
2edfd745 14 ExtractorError,
13b2ae29 15 dict_get,
23e7cba8 16 float_or_none,
13b2ae29 17 format_field,
cf5881fc 18 int_or_none,
13b2ae29
SS
19 make_archive_id,
20 str_or_none,
21 strip_or_none,
f1150b9e 22 traverse_obj,
7a26ce26 23 try_call,
2edfd745 24 try_get,
18ca61c5
RA
25 unified_timestamp,
26 update_url_query,
41d1cca3 27 url_or_none,
2edfd745 28 xpath_text,
23e7cba8
S
29)
30
31
445d72b8 32class TwitterBaseIE(InfoExtractor):
18ca61c5 33 _API_BASE = 'https://api.twitter.com/1.1/'
7a26ce26
SS
34 _GRAPHQL_API_BASE = 'https://twitter.com/i/api/graphql/'
35 _TOKENS = {
36 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA': None,
37 'AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw': None,
38 }
82fb2357 39 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
18ca61c5
RA
40
41 def _extract_variant_formats(self, variant, video_id):
42 variant_url = variant.get('url')
43 if not variant_url:
4bed4363 44 return [], {}
18ca61c5 45 elif '.m3u8' in variant_url:
4bed4363 46 return self._extract_m3u8_formats_and_subtitles(
18ca61c5
RA
47 variant_url, video_id, 'mp4', 'm3u8_native',
48 m3u8_id='hls', fatal=False)
49 else:
50 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
51 f = {
52 'url': variant_url,
53 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
54 'tbr': tbr,
55 }
56 self._search_dimensions_in_video_url(f, variant_url)
4bed4363 57 return [f], {}
18ca61c5 58
9be31e77 59 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
41d1cca3 60 vmap_url = url_or_none(vmap_url)
61 if not vmap_url:
f1150b9e 62 return [], {}
445d72b8 63 vmap_data = self._download_xml(vmap_url, video_id)
18ca61c5 64 formats = []
4bed4363 65 subtitles = {}
18ca61c5
RA
66 urls = []
67 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
68 video_variant.attrib['url'] = compat_urllib_parse_unquote(
69 video_variant.attrib['url'])
70 urls.append(video_variant.attrib['url'])
4bed4363
F
71 fmts, subs = self._extract_variant_formats(
72 video_variant.attrib, video_id)
73 formats.extend(fmts)
74 subtitles = self._merge_subtitles(subtitles, subs)
18ca61c5
RA
75 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
76 if video_url not in urls:
4bed4363
F
77 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
78 formats.extend(fmts)
79 subtitles = self._merge_subtitles(subtitles, subs)
80 return formats, subtitles
445d72b8 81
2edfd745
YCH
82 @staticmethod
83 def _search_dimensions_in_video_url(a_format, video_url):
84 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
85 if m:
86 a_format.update({
87 'width': int(m.group('width')),
88 'height': int(m.group('height')),
89 })
90
7a26ce26
SS
91 @functools.cached_property
92 def is_logged_in(self):
93 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
94
95 def _call_api(self, path, video_id, query={}, graphql=False):
96 cookies = self._get_cookies(self._API_BASE)
97 headers = {}
98
99 csrf_cookie = cookies.get('ct0')
100 if csrf_cookie:
101 headers['x-csrf-token'] = csrf_cookie.value
102
103 if self.is_logged_in:
104 headers.update({
105 'x-twitter-auth-type': 'OAuth2Session',
106 'x-twitter-client-language': 'en',
107 'x-twitter-active-user': 'yes',
108 })
109
110 result, last_error = None, None
111 for bearer_token in self._TOKENS:
112 headers['Authorization'] = f'Bearer {bearer_token}'
113
114 if not self.is_logged_in:
115 if not self._TOKENS[bearer_token]:
116 headers.pop('x-guest-token', None)
117 guest_token_response = self._download_json(
118 self._API_BASE + 'guest/activate.json', video_id,
119 'Downloading guest token', data=b'', headers=headers)
120
121 self._TOKENS[bearer_token] = guest_token_response.get('guest_token')
122 if not self._TOKENS[bearer_token]:
123 raise ExtractorError('Could not retrieve guest token')
124 headers['x-guest-token'] = self._TOKENS[bearer_token]
125
126 try:
127 allowed_status = {400, 403, 404} if graphql else {403}
128 result = self._download_json(
129 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
130 video_id, headers=headers, query=query, expected_status=allowed_status)
131 break
132
133 except ExtractorError as e:
134 if last_error:
135 raise last_error
136 elif not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code != 404:
137 raise
138 last_error = e
139 self.report_warning(
140 'Twitter API gave 404 response, retrying with deprecated token. '
141 'Only one media item can be extracted')
142
143 if result.get('errors'):
144 error_message = ', '.join(set(traverse_obj(
145 result, ('errors', ..., 'message'), expected_type=str))) or 'Unknown error'
146 raise ExtractorError(f'Error(s) while querying api: {error_message}', expected=True)
147
148 assert result is not None
149 return result
150
151 def _build_graphql_query(self, media_id):
152 raise NotImplementedError('Method must be implemented to support GraphQL')
153
154 def _call_graphql_api(self, endpoint, media_id):
155 data = self._build_graphql_query(media_id)
156 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
157 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
18ca61c5
RA
158
159
160class TwitterCardIE(InfoExtractor):
014e8803 161 IE_NAME = 'twitter:card'
18ca61c5 162 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
c3dea3f8 163 _TESTS = [
164 {
165 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
acb6e97e 166 # MD5 checksums are different in different places
c3dea3f8 167 'info_dict': {
7a26ce26 168 'id': '560070131976392705',
c3dea3f8 169 'ext': 'mp4',
18ca61c5
RA
170 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
171 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
172 'uploader': 'Twitter',
173 'uploader_id': 'Twitter',
174 'thumbnail': r're:^https?://.*\.jpg',
c3dea3f8 175 'duration': 30.033,
18ca61c5
RA
176 'timestamp': 1422366112,
177 'upload_date': '20150127',
7a26ce26
SS
178 'age_limit': 0,
179 'comment_count': int,
180 'tags': [],
181 'repost_count': int,
182 'like_count': int,
183 'display_id': '560070183650213889',
184 'uploader_url': 'https://twitter.com/Twitter',
3615bfe1 185 },
23e7cba8 186 },
c3dea3f8 187 {
188 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
18ca61c5 189 'md5': '7137eca597f72b9abbe61e5ae0161399',
c3dea3f8 190 'info_dict': {
191 'id': '623160978427936768',
192 'ext': 'mp4',
18ca61c5
RA
193 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
194 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
195 'uploader': 'NASA',
196 'uploader_id': 'NASA',
197 'timestamp': 1437408129,
198 'upload_date': '20150720',
7a26ce26
SS
199 'uploader_url': 'https://twitter.com/NASA',
200 'age_limit': 0,
201 'comment_count': int,
202 'like_count': int,
203 'repost_count': int,
204 'tags': ['PlutoFlyby'],
c3dea3f8 205 },
7a26ce26 206 'params': {'format': '[protocol=https]'}
4a7b7903
YCH
207 },
208 {
209 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
f0bc5a86 210 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
4a7b7903
YCH
211 'info_dict': {
212 'id': 'dq4Oj5quskI',
213 'ext': 'mp4',
214 'title': 'Ubuntu 11.10 Overview',
f0bc5a86 215 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
4a7b7903 216 'upload_date': '20111013',
18ca61c5 217 'uploader': 'OMG! UBUNTU!',
4a7b7903 218 'uploader_id': 'omgubuntu',
7a26ce26
SS
219 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
220 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
221 'channel_follower_count': int,
222 'chapters': 'count:8',
223 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
224 'duration': 138,
225 'categories': ['Film & Animation'],
226 'age_limit': 0,
227 'comment_count': int,
228 'availability': 'public',
229 'like_count': int,
230 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
231 'view_count': int,
232 'tags': 'count:12',
233 'channel': 'OMG! UBUNTU!',
234 'playable_in_embed': True,
4a7b7903 235 },
31752f76 236 'add_ie': ['Youtube'],
5f1b2aea
YCH
237 },
238 {
239 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
5f1b2aea
YCH
240 'info_dict': {
241 'id': 'iBb2x00UVlv',
242 'ext': 'mp4',
243 'upload_date': '20151113',
244 'uploader_id': '1189339351084113920',
acb6e97e
YCH
245 'uploader': 'ArsenalTerje',
246 'title': 'Vine by ArsenalTerje',
e8f20ffa 247 'timestamp': 1447451307,
7a26ce26
SS
248 'alt_title': 'Vine by ArsenalTerje',
249 'comment_count': int,
250 'like_count': int,
251 'thumbnail': r're:^https?://[^?#]+\.jpg',
252 'view_count': int,
253 'repost_count': int,
5f1b2aea
YCH
254 },
255 'add_ie': ['Vine'],
7a26ce26
SS
256 'params': {'skip_download': 'm3u8'},
257 },
258 {
0ae937a7 259 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
3615bfe1 260 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
0ae937a7
YCH
261 'info_dict': {
262 'id': '705235433198714880',
263 'ext': 'mp4',
18ca61c5
RA
264 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
265 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
266 'uploader': 'Brent Yarina',
267 'uploader_id': 'BTNBrentYarina',
268 'timestamp': 1456976204,
269 'upload_date': '20160303',
0ae937a7 270 },
18ca61c5 271 'skip': 'This content is no longer available.',
7a26ce26
SS
272 },
273 {
748a462f
S
274 'url': 'https://twitter.com/i/videos/752274308186120192',
275 'only_matching': True,
0ae937a7 276 },
c3dea3f8 277 ]
23e7cba8
S
278
279 def _real_extract(self, url):
18ca61c5
RA
280 status_id = self._match_id(url)
281 return self.url_result(
282 'https://twitter.com/statuses/' + status_id,
283 TwitterIE.ie_key(), status_id)
c8398a9b 284
03879ff0 285
18ca61c5 286class TwitterIE(TwitterBaseIE):
014e8803 287 IE_NAME = 'twitter'
18ca61c5 288 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)'
f57f84f6 289
cf5881fc 290 _TESTS = [{
48aae2d2 291 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
f57f84f6 292 'info_dict': {
13b2ae29
SS
293 'id': '643211870443208704',
294 'display_id': '643211948184596480',
f57f84f6 295 'ext': 'mp4',
575036b4 296 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
ec85ded8 297 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 298 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
48aae2d2
YCH
299 'uploader': 'FREE THE NIPPLE',
300 'uploader_id': 'freethenipple',
3b65a6fb 301 'duration': 12.922,
18ca61c5
RA
302 'timestamp': 1442188653,
303 'upload_date': '20150913',
13b2ae29
SS
304 'uploader_url': 'https://twitter.com/freethenipple',
305 'comment_count': int,
306 'repost_count': int,
307 'like_count': int,
308 'tags': [],
309 'age_limit': 18,
f57f84f6 310 },
cf5881fc
YCH
311 }, {
312 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
313 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
314 'info_dict': {
315 'id': '657991469417025536',
316 'ext': 'mp4',
317 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
318 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
ec85ded8 319 'thumbnail': r're:^https?://.*\.png',
cf5881fc
YCH
320 'uploader': 'Gifs',
321 'uploader_id': 'giphz',
322 },
7efc1c2b 323 'expected_warnings': ['height', 'width'],
fc0a45fa 324 'skip': 'Account suspended',
b703ebee
JMF
325 }, {
326 'url': 'https://twitter.com/starwars/status/665052190608723968',
b703ebee
JMF
327 'info_dict': {
328 'id': '665052190608723968',
13b2ae29 329 'display_id': '665052190608723968',
b703ebee 330 'ext': 'mp4',
7a26ce26 331 'title': 'md5:3f57ab5d35116537a2ae7345cd0060d8',
18ca61c5 332 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
b703ebee 333 'uploader_id': 'starwars',
7a26ce26 334 'uploader': r're:Star Wars.*',
18ca61c5
RA
335 'timestamp': 1447395772,
336 'upload_date': '20151113',
13b2ae29
SS
337 'uploader_url': 'https://twitter.com/starwars',
338 'comment_count': int,
339 'repost_count': int,
340 'like_count': int,
341 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
342 'age_limit': 0,
b703ebee 343 },
0ae937a7
YCH
344 }, {
345 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
346 'info_dict': {
347 'id': '705235433198714880',
348 'ext': 'mp4',
18ca61c5
RA
349 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
350 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
0ae937a7
YCH
351 'uploader_id': 'BTNBrentYarina',
352 'uploader': 'Brent Yarina',
18ca61c5
RA
353 'timestamp': 1456976204,
354 'upload_date': '20160303',
13b2ae29
SS
355 'uploader_url': 'https://twitter.com/BTNBrentYarina',
356 'comment_count': int,
357 'repost_count': int,
358 'like_count': int,
359 'tags': [],
360 'age_limit': 0,
0ae937a7
YCH
361 },
362 'params': {
363 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
364 # Test case of TwitterCardIE
365 'skip_download': True,
366 },
03879ff0
YCH
367 }, {
368 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
03879ff0 369 'info_dict': {
13b2ae29
SS
370 'id': '700207414000242688',
371 'display_id': '700207533655363584',
03879ff0 372 'ext': 'mp4',
13b2ae29 373 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
18ca61c5 374 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
ec85ded8 375 'thumbnail': r're:^https?://.*\.jpg',
13b2ae29
SS
376 'uploader': 'jaydin donte geer',
377 'uploader_id': 'jaydingeer',
3b65a6fb 378 'duration': 30.0,
18ca61c5
RA
379 'timestamp': 1455777459,
380 'upload_date': '20160218',
13b2ae29
SS
381 'uploader_url': 'https://twitter.com/jaydingeer',
382 'comment_count': int,
383 'repost_count': int,
384 'like_count': int,
385 'tags': ['Damndaniel'],
386 'age_limit': 0,
03879ff0 387 },
395fd4b0
YCH
388 }, {
389 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
390 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
391 'info_dict': {
392 'id': 'MIOxnrUteUd',
393 'ext': 'mp4',
18ca61c5
RA
394 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
395 'uploader': 'TAKUMA',
396 'uploader_id': '1004126642786242560',
3615bfe1 397 'timestamp': 1402826626,
395fd4b0 398 'upload_date': '20140615',
13b2ae29
SS
399 'thumbnail': r're:^https?://.*\.jpg',
400 'alt_title': 'Vine by TAKUMA',
401 'comment_count': int,
402 'repost_count': int,
403 'like_count': int,
404 'view_count': int,
395fd4b0
YCH
405 },
406 'add_ie': ['Vine'],
36b7d9db
YCH
407 }, {
408 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
36b7d9db 409 'info_dict': {
13b2ae29
SS
410 'id': '717462543795523584',
411 'display_id': '719944021058060289',
36b7d9db
YCH
412 'ext': 'mp4',
413 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
18ca61c5
RA
414 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
415 'uploader_id': 'CaptainAmerica',
36b7d9db 416 'uploader': 'Captain America',
3b65a6fb 417 'duration': 3.17,
18ca61c5
RA
418 'timestamp': 1460483005,
419 'upload_date': '20160412',
13b2ae29
SS
420 'uploader_url': 'https://twitter.com/CaptainAmerica',
421 'thumbnail': r're:^https?://.*\.jpg',
422 'comment_count': int,
423 'repost_count': int,
424 'like_count': int,
425 'tags': [],
426 'age_limit': 0,
36b7d9db 427 },
f0bc5a86
YCH
428 }, {
429 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
430 'info_dict': {
431 'id': '1zqKVVlkqLaKB',
432 'ext': 'mp4',
18ca61c5 433 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
f0bc5a86 434 'upload_date': '20160923',
18ca61c5
RA
435 'uploader_id': '1PmKqpJdOJQoY',
436 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
f0bc5a86 437 'timestamp': 1474613214,
13b2ae29 438 'thumbnail': r're:^https?://.*\.jpg',
f0bc5a86
YCH
439 },
440 'add_ie': ['Periscope'],
2edfd745
YCH
441 }, {
442 # has mp4 formats via mobile API
443 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
444 'info_dict': {
445 'id': '852138619213144067',
446 'ext': 'mp4',
447 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
18ca61c5 448 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
2edfd745
YCH
449 'uploader': 'عالم الأخبار',
450 'uploader_id': 'news_al3alm',
3b65a6fb 451 'duration': 277.4,
18ca61c5
RA
452 'timestamp': 1492000653,
453 'upload_date': '20170412',
2edfd745 454 },
00dd0cd5 455 'skip': 'Account suspended',
5c1452e8
GF
456 }, {
457 'url': 'https://twitter.com/i/web/status/910031516746514432',
458 'info_dict': {
13b2ae29
SS
459 'id': '910030238373089285',
460 'display_id': '910031516746514432',
5c1452e8
GF
461 'ext': 'mp4',
462 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
463 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 464 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
5c1452e8
GF
465 'uploader': 'Préfet de Guadeloupe',
466 'uploader_id': 'Prefet971',
467 'duration': 47.48,
18ca61c5
RA
468 'timestamp': 1505803395,
469 'upload_date': '20170919',
13b2ae29
SS
470 'uploader_url': 'https://twitter.com/Prefet971',
471 'comment_count': int,
472 'repost_count': int,
473 'like_count': int,
474 'tags': ['Maria'],
475 'age_limit': 0,
5c1452e8
GF
476 },
477 'params': {
478 'skip_download': True, # requires ffmpeg
479 },
2593725a
S
480 }, {
481 # card via api.twitter.com/1.1/videos/tweet/config
482 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
483 'info_dict': {
13b2ae29
SS
484 'id': '1001551417340022785',
485 'display_id': '1001551623938805763',
2593725a
S
486 'ext': 'mp4',
487 'title': 're:.*?Shep is on a roll today.*?',
488 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 489 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
2593725a
S
490 'uploader': 'Lis Power',
491 'uploader_id': 'LisPower1',
492 'duration': 111.278,
18ca61c5
RA
493 'timestamp': 1527623489,
494 'upload_date': '20180529',
13b2ae29
SS
495 'uploader_url': 'https://twitter.com/LisPower1',
496 'comment_count': int,
497 'repost_count': int,
498 'like_count': int,
499 'tags': [],
500 'age_limit': 0,
2593725a
S
501 },
502 'params': {
503 'skip_download': True, # requires ffmpeg
504 },
b7ef93f0
S
505 }, {
506 'url': 'https://twitter.com/foobar/status/1087791357756956680',
507 'info_dict': {
13b2ae29
SS
508 'id': '1087791272830607360',
509 'display_id': '1087791357756956680',
b7ef93f0
S
510 'ext': 'mp4',
511 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
512 'thumbnail': r're:^https?://.*\.jpg',
18ca61c5 513 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
b7ef93f0
S
514 'uploader': 'Twitter',
515 'uploader_id': 'Twitter',
516 'duration': 61.567,
18ca61c5
RA
517 'timestamp': 1548184644,
518 'upload_date': '20190122',
13b2ae29
SS
519 'uploader_url': 'https://twitter.com/Twitter',
520 'comment_count': int,
521 'repost_count': int,
522 'like_count': int,
523 'tags': [],
524 'age_limit': 0,
18ca61c5
RA
525 },
526 }, {
527 # not available in Periscope
528 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
529 'info_dict': {
530 'id': '1vOGwqejwoWxB',
531 'ext': 'mp4',
532 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
533 'uploader': 'Vivi',
534 'uploader_id': '1eVjYOLGkGrQL',
13b2ae29
SS
535 'thumbnail': r're:^https?://.*\.jpg',
536 'tags': ['EduTECH2019'],
537 'view_count': int,
b7ef93f0 538 },
18ca61c5 539 'add_ie': ['TwitterBroadcast'],
30a074c2 540 }, {
541 # unified card
542 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
543 'info_dict': {
13b2ae29
SS
544 'id': '1349774757969989634',
545 'display_id': '1349794411333394432',
30a074c2 546 'ext': 'mp4',
547 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
548 'thumbnail': r're:^https?://.*\.jpg',
549 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
550 'uploader': 'Brooklyn Nets',
551 'uploader_id': 'BrooklynNets',
552 'duration': 324.484,
553 'timestamp': 1610651040,
554 'upload_date': '20210114',
13b2ae29
SS
555 'uploader_url': 'https://twitter.com/BrooklynNets',
556 'comment_count': int,
557 'repost_count': int,
558 'like_count': int,
559 'tags': [],
560 'age_limit': 0,
30a074c2 561 },
562 'params': {
563 'skip_download': True,
564 },
13b2ae29
SS
565 }, {
566 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
567 'info_dict': {
568 'id': '1577855447914409984',
569 'display_id': '1577855540407197696',
570 'ext': 'mp4',
571 'title': 'oshtru \U0001faac\U0001f47d - gm \u2728\ufe0f now I can post image and video. nice update.',
572 'description': 'gm \u2728\ufe0f now I can post image and video. nice update. https://t.co/cG7XgiINOm',
573 'upload_date': '20221006',
574 'uploader': 'oshtru \U0001faac\U0001f47d',
575 'uploader_id': 'oshtru',
576 'uploader_url': 'https://twitter.com/oshtru',
577 'thumbnail': r're:^https?://.*\.jpg',
578 'duration': 30.03,
7a26ce26 579 'timestamp': 1665025050,
13b2ae29
SS
580 'comment_count': int,
581 'repost_count': int,
582 'like_count': int,
583 'tags': [],
584 'age_limit': 0,
585 },
586 'params': {'skip_download': True},
587 }, {
588 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
589 'info_dict': {
590 'id': '1577719286659006464',
591 'title': 'Ultima | #\u0432\u029f\u043c - Test',
592 'description': 'Test https://t.co/Y3KEZD7Dad',
593 'uploader': 'Ultima | #\u0432\u029f\u043c',
594 'uploader_id': 'UltimaShadowX',
595 'uploader_url': 'https://twitter.com/UltimaShadowX',
596 'upload_date': '20221005',
7a26ce26 597 'timestamp': 1664992565,
13b2ae29
SS
598 'comment_count': int,
599 'repost_count': int,
600 'like_count': int,
601 'tags': [],
602 'age_limit': 0,
603 },
604 'playlist_count': 4,
605 'params': {'skip_download': True},
7a26ce26
SS
606 }, {
607 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
608 'info_dict': {
609 'id': '1575559336759263233',
610 'display_id': '1575560063510810624',
611 'ext': 'mp4',
612 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
613 'thumbnail': r're:^https?://.*\.jpg',
614 'description': 'md5:95aea692fda36a12081b9629b02daa92',
615 'uploader': 'Max Olson',
616 'uploader_id': 'MesoMax919',
617 'uploader_url': 'https://twitter.com/MesoMax919',
618 'duration': 21.321,
619 'timestamp': 1664477766,
620 'upload_date': '20220929',
621 'comment_count': int,
622 'repost_count': int,
623 'like_count': int,
624 'tags': ['HurricaneIan'],
625 'age_limit': 0,
626 },
627 }, {
628 # Adult content, uses old token
629 # Fails if not logged in (GraphQL)
630 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
631 'info_dict': {
632 'id': '1575199163847000068',
633 'display_id': '1575199173472927762',
634 'ext': 'mp4',
635 'title': str,
636 'description': str,
637 'uploader': str,
638 'uploader_id': 'Rizdraws',
639 'uploader_url': 'https://twitter.com/Rizdraws',
640 'upload_date': '20220928',
641 'timestamp': 1664391723,
642 'thumbnail': 're:^https?://.*\\.jpg',
643 'like_count': int,
644 'repost_count': int,
645 'comment_count': int,
646 'age_limit': 18,
647 'tags': []
648 },
649 'expected_warnings': ['404'],
650 }, {
651 # Description is missing one https://t.co url (GraphQL)
652 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
653 'playlist_mincount': 2,
654 'info_dict': {
655 'id': '1395079556562706435',
656 'title': str,
657 'tags': [],
658 'uploader': str,
659 'like_count': int,
660 'upload_date': '20210519',
661 'age_limit': 0,
662 'repost_count': int,
663 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7',
664 'uploader_id': 'Srirachachau',
665 'comment_count': int,
666 'uploader_url': 'https://twitter.com/Srirachachau',
667 'timestamp': 1621447860,
668 },
669 }, {
670 # Description is missing one https://t.co url (GraphQL)
671 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
672 'playlist_mincount': 2,
673 'info_dict': {
674 'id': '1578353380363501568',
675 'title': str,
676 'uploader_id': 'DavidToons_',
677 'repost_count': int,
678 'like_count': int,
679 'uploader': str,
680 'timestamp': 1665143744,
681 'uploader_url': 'https://twitter.com/DavidToons_',
682 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w',
683 'tags': [],
684 'comment_count': int,
685 'upload_date': '20221007',
686 'age_limit': 0,
687 },
688 }, {
689 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
690 'playlist_count': 2,
691 'info_dict': {
692 'id': '1578401165338976258',
693 'title': str,
694 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
695 'uploader': str,
696 'uploader_id': 'primevideouk',
697 'timestamp': 1665155137,
698 'upload_date': '20221007',
699 'age_limit': 0,
700 'uploader_url': 'https://twitter.com/primevideouk',
701 'comment_count': int,
702 'repost_count': int,
703 'like_count': int,
704 'tags': ['TheRingsOfPower'],
705 },
706 }, {
707 # Twitter Spaces
708 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
709 'info_dict': {
710 'id': '1lPJqmBeeNAJb',
711 'ext': 'm4a',
712 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
713 'uploader': r're:Monique Camarra.+?',
714 'uploader_id': 'MoniqueCamarra',
715 'live_status': 'was_live',
716 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
717 'timestamp': 1658407771464,
718 },
719 'add_ie': ['TwitterSpaces'],
720 'params': {'skip_download': 'm3u8'},
82fb2357 721 }, {
722 # onion route
723 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
724 'only_matching': True,
18ca61c5
RA
725 }, {
726 # Twitch Clip Embed
727 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
728 'only_matching': True,
10a5091e
RA
729 }, {
730 # promo_video_website card
731 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
732 'only_matching': True,
00dd0cd5 733 }, {
734 # promo_video_convo card
735 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
736 'only_matching': True,
737 }, {
738 # appplayer card
739 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
740 'only_matching': True,
30a074c2 741 }, {
742 # video_direct_message card
743 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
744 'only_matching': True,
745 }, {
746 # poll2choice_video card
747 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
748 'only_matching': True,
749 }, {
750 # poll3choice_video card
751 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
752 'only_matching': True,
753 }, {
754 # poll4choice_video card
755 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
756 'only_matching': True,
cf5881fc 757 }]
f57f84f6 758
7a26ce26
SS
759 def _graphql_to_legacy(self, data, twid):
760 result = traverse_obj(data, (
761 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
762 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
763 'tweet_results', 'result'
764 ), expected_type=dict, default={}, get_all=False)
765
766 if 'tombstone' in result:
767 cause = traverse_obj(result, ('tombstone', 'text', 'text'), expected_type=str)
768 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
769
770 status = result.get('legacy', {})
771 status.update(traverse_obj(result, {
772 'user': ('core', 'user_results', 'result', 'legacy'),
773 'card': ('card', 'legacy'),
774 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
775 }, expected_type=dict, default={}))
776
777 # extra transformation is needed since result does not match legacy format
778 binding_values = {
779 binding_value.get('key'): binding_value.get('value')
780 for binding_value in traverse_obj(status, ('card', 'binding_values', ...), expected_type=dict)
781 }
782 if binding_values:
783 status['card']['binding_values'] = binding_values
784
785 return status
786
787 def _build_graphql_query(self, media_id):
788 return {
789 'variables': {
790 'focalTweetId': media_id,
791 'includePromotedContent': True,
792 'with_rux_injections': False,
793 'withBirdwatchNotes': True,
794 'withCommunity': True,
795 'withDownvotePerspective': False,
796 'withQuickPromoteEligibilityTweetFields': True,
797 'withReactionsMetadata': False,
798 'withReactionsPerspective': False,
799 'withSuperFollowsTweetFields': True,
800 'withSuperFollowsUserFields': True,
801 'withV2Timeline': True,
802 'withVoice': True,
803 },
804 'features': {
805 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
806 'interactive_text_enabled': True,
807 'responsive_web_edit_tweet_api_enabled': True,
808 'responsive_web_enhance_cards_enabled': True,
809 'responsive_web_graphql_timeline_navigation_enabled': False,
810 'responsive_web_text_conversations_enabled': False,
811 'responsive_web_uc_gql_enabled': True,
812 'standardized_nudges_misinfo': True,
813 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
814 'tweetypie_unmention_optimization_enabled': True,
815 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
816 'verified_phone_label_enabled': False,
817 'vibe_api_enabled': True,
818 },
819 }
820
f57f84f6 821 def _real_extract(self, url):
18ca61c5 822 twid = self._match_id(url)
7a26ce26
SS
823 if self.is_logged_in or self._configuration_arg('force_graphql'):
824 self.write_debug(f'Using GraphQL API (Auth = {self.is_logged_in})')
825 result = self._call_graphql_api('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid)
826 status = self._graphql_to_legacy(result, twid)
827
828 else:
829 status = self._call_api(f'statuses/show/{twid}.json', twid, {
18ca61c5
RA
830 'cards_platform': 'Web-12',
831 'include_cards': 1,
832 'include_reply_count': 1,
833 'include_user_entities': 0,
834 'tweet_mode': 'extended',
835 })
575036b4 836
18ca61c5 837 title = description = status['full_text'].replace('\n', ' ')
575036b4 838 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
b703ebee 839 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
18ca61c5
RA
840 user = status.get('user') or {}
841 uploader = user.get('name')
842 if uploader:
7a26ce26 843 title = f'{uploader} - {title}'
18ca61c5
RA
844 uploader_id = user.get('screen_name')
845
846 tags = []
847 for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
848 hashtag_text = hashtag.get('text')
849 if not hashtag_text:
850 continue
851 tags.append(hashtag_text)
575036b4 852
cf5881fc 853 info = {
18ca61c5
RA
854 'id': twid,
855 'title': title,
856 'description': description,
857 'uploader': uploader,
858 'timestamp': unified_timestamp(status.get('created_at')),
859 'uploader_id': uploader_id,
a70635b8 860 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
18ca61c5
RA
861 'like_count': int_or_none(status.get('favorite_count')),
862 'repost_count': int_or_none(status.get('retweet_count')),
863 'comment_count': int_or_none(status.get('reply_count')),
864 'age_limit': 18 if status.get('possibly_sensitive') else 0,
865 'tags': tags,
f57f84f6 866 }
cf5881fc 867
30a074c2 868 def extract_from_video_info(media):
13b2ae29
SS
869 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
870 self.write_debug(f'Extracting from video info: {media_id}')
18ca61c5
RA
871 video_info = media.get('video_info') or {}
872
873 formats = []
4bed4363 874 subtitles = {}
18ca61c5 875 for variant in video_info.get('variants', []):
4bed4363
F
876 fmts, subs = self._extract_variant_formats(variant, twid)
877 subtitles = self._merge_subtitles(subtitles, subs)
878 formats.extend(fmts)
18ca61c5
RA
879
880 thumbnails = []
881 media_url = media.get('media_url_https') or media.get('media_url')
882 if media_url:
883 def add_thumbnail(name, size):
884 thumbnails.append({
885 'id': name,
886 'url': update_url_query(media_url, {'name': name}),
887 'width': int_or_none(size.get('w') or size.get('width')),
888 'height': int_or_none(size.get('h') or size.get('height')),
889 })
890 for name, size in media.get('sizes', {}).items():
891 add_thumbnail(name, size)
892 add_thumbnail('orig', media.get('original_info') or {})
cf5881fc 893
13b2ae29
SS
894 return {
895 'id': media_id,
18ca61c5 896 'formats': formats,
4bed4363 897 'subtitles': subtitles,
18ca61c5
RA
898 'thumbnails': thumbnails,
899 'duration': float_or_none(video_info.get('duration_millis'), 1000),
9f14daf2 900 # The codec of http formats are unknown
901 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
13b2ae29 902 }
30a074c2 903
13b2ae29
SS
904 def extract_from_card_info(card):
905 if not card:
906 return
907
908 self.write_debug(f'Extracting from card info: {card.get("url")}')
909 binding_values = card['binding_values']
910
911 def get_binding_value(k):
912 o = binding_values.get(k) or {}
913 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
914
915 card_name = card['name'].split(':')[-1]
916 if card_name == 'player':
7a26ce26 917 yield {
13b2ae29
SS
918 '_type': 'url',
919 'url': get_binding_value('player_url'),
920 }
921 elif card_name == 'periscope_broadcast':
7a26ce26 922 yield {
13b2ae29
SS
923 '_type': 'url',
924 'url': get_binding_value('url') or get_binding_value('player_url'),
925 'ie_key': PeriscopeIE.ie_key(),
926 }
927 elif card_name == 'broadcast':
7a26ce26 928 yield {
13b2ae29
SS
929 '_type': 'url',
930 'url': get_binding_value('broadcast_url'),
931 'ie_key': TwitterBroadcastIE.ie_key(),
932 }
7a26ce26
SS
933 elif card_name == 'audiospace':
934 yield {
935 '_type': 'url',
936 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
937 'ie_key': TwitterSpacesIE.ie_key(),
938 }
13b2ae29 939 elif card_name == 'summary':
7a26ce26 940 yield {
18ca61c5 941 '_type': 'url',
13b2ae29
SS
942 'url': get_binding_value('card_url'),
943 }
944 elif card_name == 'unified_card':
7a26ce26
SS
945 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
946 yield from map(extract_from_video_info, traverse_obj(
947 unified_card, ('media_entities', ...), expected_type=dict))
13b2ae29
SS
948 # amplify, promo_video_website, promo_video_convo, appplayer,
949 # video_direct_message, poll2choice_video, poll3choice_video,
950 # poll4choice_video, ...
951 else:
952 is_amplify = card_name == 'amplify'
953 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
954 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
955 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
13b2ae29
SS
956
957 thumbnails = []
958 for suffix in ('_small', '', '_large', '_x_large', '_original'):
959 image = get_binding_value('player_image' + suffix) or {}
960 image_url = image.get('url')
961 if not image_url or '/player-placeholder' in image_url:
962 continue
963 thumbnails.append({
964 'id': suffix[1:] if suffix else 'medium',
965 'url': image_url,
966 'width': int_or_none(image.get('width')),
967 'height': int_or_none(image.get('height')),
968 })
969
7a26ce26 970 yield {
13b2ae29
SS
971 'formats': formats,
972 'subtitles': subtitles,
973 'thumbnails': thumbnails,
974 'duration': int_or_none(get_binding_value(
975 'content_duration_seconds')),
976 }
977
978 media_path = ((None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo')
979 videos = map(extract_from_video_info, traverse_obj(status, media_path, expected_type=dict))
7a26ce26
SS
980 cards = extract_from_card_info(status.get('card'))
981 entries = [{**info, **data, 'display_id': twid} for data in (*videos, *cards)]
13b2ae29
SS
982
983 if not entries:
984 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
985 if not expanded_url or expanded_url == url:
986 raise ExtractorError('No video could be found in this tweet', expected=True)
987
988 return self.url_result(expanded_url, display_id=twid, **info)
989
990 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
991
992 if len(entries) == 1:
993 return entries[0]
994
995 for index, entry in enumerate(entries, 1):
996 entry['title'] += f' #{index}'
997
998 return self.playlist_result(entries, **info)
445d72b8
YCH
999
1000
1001class TwitterAmplifyIE(TwitterBaseIE):
1002 IE_NAME = 'twitter:amplify'
25042f73 1003 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
445d72b8
YCH
1004
1005 _TEST = {
1006 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
7a26ce26 1007 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
445d72b8
YCH
1008 'info_dict': {
1009 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1010 'ext': 'mp4',
1011 'title': 'Twitter Video',
bdbf4ba4 1012 'thumbnail': 're:^https?://.*',
445d72b8 1013 },
7a26ce26 1014 'params': {'format': '[protocol=https]'},
445d72b8
YCH
1015 }
1016
1017 def _real_extract(self, url):
1018 video_id = self._match_id(url)
1019 webpage = self._download_webpage(url, video_id)
1020
1021 vmap_url = self._html_search_meta(
1022 'twitter:amplify:vmap', webpage, 'vmap url')
7a26ce26 1023 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
445d72b8 1024
bdbf4ba4
YCH
1025 thumbnails = []
1026 thumbnail = self._html_search_meta(
1027 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1028
1029 def _find_dimension(target):
1030 w = int_or_none(self._html_search_meta(
1031 'twitter:%s:width' % target, webpage, fatal=False))
1032 h = int_or_none(self._html_search_meta(
1033 'twitter:%s:height' % target, webpage, fatal=False))
1034 return w, h
1035
1036 if thumbnail:
1037 thumbnail_w, thumbnail_h = _find_dimension('image')
1038 thumbnails.append({
1039 'url': thumbnail,
1040 'width': thumbnail_w,
1041 'height': thumbnail_h,
1042 })
1043
1044 video_w, video_h = _find_dimension('player')
9be31e77 1045 formats[0].update({
bdbf4ba4
YCH
1046 'width': video_w,
1047 'height': video_h,
9be31e77 1048 })
bdbf4ba4 1049
445d72b8
YCH
1050 return {
1051 'id': video_id,
1052 'title': 'Twitter Video',
bdbf4ba4
YCH
1053 'formats': formats,
1054 'thumbnails': thumbnails,
445d72b8 1055 }
18ca61c5
RA
1056
1057
1058class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1059 IE_NAME = 'twitter:broadcast'
1060 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1061
7b0b53ea
S
1062 _TEST = {
1063 # untitled Periscope video
1064 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1065 'info_dict': {
1066 'id': '1yNGaQLWpejGj',
1067 'ext': 'mp4',
1068 'title': 'Andrea May Sahouri - Periscope Broadcast',
1069 'uploader': 'Andrea May Sahouri',
1070 'uploader_id': '1PXEdBZWpGwKe',
7a26ce26
SS
1071 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1072 'view_count': int,
7b0b53ea
S
1073 },
1074 }
1075
18ca61c5
RA
1076 def _real_extract(self, url):
1077 broadcast_id = self._match_id(url)
1078 broadcast = self._call_api(
1079 'broadcasts/show.json', broadcast_id,
1080 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1081 info = self._parse_broadcast_data(broadcast, broadcast_id)
1082 media_key = broadcast['media_key']
1083 source = self._call_api(
7a26ce26 1084 f'live_video_stream/status/{media_key}', media_key)['source']
18ca61c5
RA
1085 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1086 if '/live_video_stream/geoblocked/' in m3u8_url:
1087 self.raise_geo_restricted()
1088 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
1089 m3u8_url).query).get('type', [None])[0]
1090 state, width, height = self._extract_common_format_info(broadcast)
1091 info['formats'] = self._extract_pscp_m3u8_formats(
1092 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1093 return info
86b868c6
U
1094
1095
7a26ce26
SS
1096class TwitterSpacesIE(TwitterBaseIE):
1097 IE_NAME = 'twitter:spaces'
1098 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1099 _TWITTER_GRAPHQL = 'https://twitter.com/i/api/graphql/HPEisOmj1epUNLCWTYhUWw/'
1100
1101 _TESTS = [{
1102 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1103 'info_dict': {
1104 'id': '1RDxlgyvNXzJL',
1105 'ext': 'm4a',
1106 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1107 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1108 'uploader': r're:Lucio Di Gaetano.*?',
1109 'uploader_id': 'luciodigaetano',
1110 'live_status': 'was_live',
1111 'timestamp': 1659877956397,
1112 },
1113 'params': {'skip_download': 'm3u8'},
1114 }]
1115
1116 SPACE_STATUS = {
1117 'notstarted': 'is_upcoming',
1118 'ended': 'was_live',
1119 'running': 'is_live',
1120 'timedout': 'post_live',
1121 }
1122
1123 def _build_graphql_query(self, space_id):
1124 return {
1125 'variables': {
1126 'id': space_id,
1127 'isMetatagsQuery': True,
1128 'withDownvotePerspective': False,
1129 'withReactionsMetadata': False,
1130 'withReactionsPerspective': False,
1131 'withReplays': True,
1132 'withSuperFollowsUserFields': True,
1133 'withSuperFollowsTweetFields': True,
1134 },
1135 'features': {
1136 'dont_mention_me_view_api_enabled': True,
1137 'interactive_text_enabled': True,
1138 'responsive_web_edit_tweet_api_enabled': True,
1139 'responsive_web_enhance_cards_enabled': True,
1140 'responsive_web_uc_gql_enabled': True,
1141 'spaces_2022_h2_clipping': True,
1142 'spaces_2022_h2_spaces_communities': False,
1143 'standardized_nudges_misinfo': True,
1144 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1145 'vibe_api_enabled': True,
1146 },
1147 }
1148
1149 def _real_extract(self, url):
1150 space_id = self._match_id(url)
1151 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1152 if not space_data:
1153 raise ExtractorError('Twitter Space not found', expected=True)
1154
1155 metadata = space_data['metadata']
1156 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1157
1158 formats = []
1159 if live_status == 'is_upcoming':
1160 self.raise_no_formats('Twitter Space not started yet', expected=True)
1161 elif live_status == 'post_live':
1162 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1163 else:
1164 source = self._call_api(
1165 f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key'])['source']
1166
1167 # XXX: Native downloader does not work
1168 formats = self._extract_m3u8_formats(
1169 traverse_obj(source, 'noRedirectPlaybackUrl', 'location'),
1170 metadata['media_key'], 'm4a', 'm3u8', live=live_status == 'is_live')
1171 for fmt in formats:
1172 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1173
1174 participants = ', '.join(traverse_obj(
1175 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1176 return {
1177 'id': space_id,
1178 'title': metadata.get('title'),
1179 'description': f'Twitter Space participated by {participants}',
1180 'uploader': traverse_obj(
1181 metadata, ('creator_results', 'result', 'legacy', 'name')),
1182 'uploader_id': traverse_obj(
1183 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1184 'live_status': live_status,
1185 'timestamp': metadata.get('created_at'),
1186 'formats': formats,
1187 }
1188
1189
86b868c6
U
1190class TwitterShortenerIE(TwitterBaseIE):
1191 IE_NAME = 'twitter:shortener'
a537ab1a
U
1192 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1193 _BASE_URL = 'https://t.co/'
86b868c6
U
1194
1195 def _real_extract(self, url):
5ad28e7f 1196 mobj = self._match_valid_url(url)
a537ab1a
U
1197 eid, id = mobj.group('eid', 'id')
1198 if eid:
1199 id = eid
1200 url = self._BASE_URL + id
1201 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
1202 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1203 if new_url.startswith(__UNSAFE_LINK):
1204 new_url = new_url.replace(__UNSAFE_LINK, "")
9e20a9c4 1205 return self.url_result(new_url)