5 from .common
import InfoExtractor
6 from .periscope
import PeriscopeBaseIE
, PeriscopeIE
7 from ..compat
import functools
# isort: split
10 compat_urllib_parse_unquote
,
11 compat_urllib_parse_urlparse
,
13 from ..networking
.exceptions
import HTTPError
35 class TwitterBaseIE(InfoExtractor
):
36 _NETRC_MACHINE
= 'twitter'
37 _API_BASE
= 'https://api.x.com/1.1/'
38 _GRAPHQL_API_BASE
= 'https://x.com/i/api/graphql/'
39 _BASE_REGEX
= r
'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
40 _AUTH
= 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
41 _LEGACY_AUTH
= 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
44 _LOGIN_INIT_DATA
= json
.dumps({
47 'debug_overrides': {},
56 'app_download_cta': 1,
57 'check_logged_in_account': 1,
58 'choice_selection': 3,
59 'contacts_live_sync_permission_prompt': 0,
61 'email_verification': 2,
71 'in_app_notification': 1,
73 'js_instrumentation': 1,
75 'notifications_permission_prompt': 2,
77 'open_home_timeline': 1,
79 'phone_verification': 4,
88 'tweet_selection_urt': 1,
91 'user_recommendations_list': 4,
92 'user_recommendations_urt': 1,
96 }, separators
=(',', ':')).encode()
98 def _extract_variant_formats(self
, variant
, video_id
):
99 variant_url
= variant
.get('url')
102 elif '.m3u8' in variant_url
:
103 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
104 variant_url
, video_id
, 'mp4', 'm3u8_native',
105 m3u8_id
='hls', fatal
=False)
106 for f
in traverse_obj(fmts
, lambda _
, v
: v
['vcodec'] == 'none' and v
.get('tbr') is None):
107 if mobj
:= re
.match(r
'hls-[Aa]udio-(?P<bitrate>\d{4,})', f
['format_id']):
108 f
['tbr'] = int_or_none(mobj
.group('bitrate'), 1000)
111 tbr
= int_or_none(dict_get(variant
, ('bitrate', 'bit_rate')), 1000) or None
114 'format_id': 'http' + ('-%d' % tbr
if tbr
else ''),
117 self
._search
_dimensions
_in
_video
_url
(f
, variant_url
)
120 def _extract_formats_from_vmap_url(self
, vmap_url
, video_id
):
121 vmap_url
= url_or_none(vmap_url
)
124 vmap_data
= self
._download
_xml
(vmap_url
, video_id
)
128 for video_variant
in vmap_data
.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
129 video_variant
.attrib
['url'] = compat_urllib_parse_unquote(
130 video_variant
.attrib
['url'])
131 urls
.append(video_variant
.attrib
['url'])
132 fmts
, subs
= self
._extract
_variant
_formats
(
133 video_variant
.attrib
, video_id
)
135 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
136 video_url
= strip_or_none(xpath_text(vmap_data
, './/MediaFile'))
137 if video_url
not in urls
:
138 fmts
, subs
= self
._extract
_variant
_formats
({'url': video_url}
, video_id
)
140 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
141 return formats
, subtitles
144 def _search_dimensions_in_video_url(a_format
, video_url
):
145 m
= re
.search(r
'/(?P<width>\d+)x(?P<height>\d+)/', video_url
)
148 'width': int(m
.group('width')),
149 'height': int(m
.group('height')),
153 def is_logged_in(self
):
154 return bool(self
._get
_cookies
(self
._API
_BASE
).get('auth_token'))
156 # XXX: Temporary workaround until twitter.com => x.com migration is completed
157 def _real_initialize(self
):
158 if self
.is_logged_in
or not self
._get
_cookies
('https://twitter.com/').get('auth_token'):
160 # User has not yet been migrated to x.com and has passed twitter.com cookies
161 TwitterBaseIE
._API
_BASE
= 'https://api.twitter.com/1.1/'
162 TwitterBaseIE
._GRAPHQL
_API
_BASE
= 'https://twitter.com/i/api/graphql/'
164 @functools.cached_property
165 def _selected_api(self
):
166 return self
._configuration
_arg
('api', ['graphql'], ie_key
='Twitter')[0]
168 def _fetch_guest_token(self
, display_id
):
169 guest_token
= traverse_obj(self
._download
_json
(
170 f
'{self._API_BASE}guest/activate.json', display_id
, 'Downloading guest token', data
=b
'',
171 headers
=self
._set
_base
_headers
(legacy
=display_id
and self
._selected
_api
== 'legacy')),
172 ('guest_token', {str}
))
174 raise ExtractorError('Could not retrieve guest token')
177 def _set_base_headers(self
, legacy
=False):
178 bearer_token
= self
._LEGACY
_AUTH
if legacy
and not self
.is_logged_in
else self
._AUTH
180 'Authorization': f
'Bearer {bearer_token}',
181 'x-csrf-token': try_call(lambda: self
._get
_cookies
(self
._API
_BASE
)['ct0'].value
),
184 def _call_login_api(self
, note
, headers
, query
={}, data
=None):
185 response
= self
._download
_json
(
186 f
'{self._API_BASE}onboarding/task.json', None, note
,
187 headers
=headers
, query
=query
, data
=data
, expected_status
=400)
188 error
= traverse_obj(response
, ('errors', 0, 'message', {str}
))
190 raise ExtractorError(f
'Login failed, Twitter API says: {error}', expected
=True)
191 elif traverse_obj(response
, 'status') != 'success':
192 raise ExtractorError('Login was unsuccessful')
194 subtask
= traverse_obj(
195 response
, ('subtasks', ..., 'subtask_id', {str}
), get_all
=False)
197 raise ExtractorError('Twitter API did not return next login subtask')
199 self
._flow
_token
= response
['flow_token']
203 def _perform_login(self
, username
, password
):
204 if self
.is_logged_in
:
207 guest_token
= self
._fetch
_guest
_token
(None)
209 **self
._set
_base
_headers
(),
210 'content-type': 'application/json',
211 'x-guest-token': guest_token
,
212 'x-twitter-client-language': 'en',
213 'x-twitter-active-user': 'yes',
214 'Referer': 'https://x.com/',
215 'Origin': 'https://x.com',
218 def build_login_json(*subtask_inputs
):
220 'flow_token': self
._flow
_token
,
221 'subtask_inputs': subtask_inputs
222 }, separators
=(',', ':')).encode()
224 def input_dict(subtask_id
, text
):
226 'subtask_id': subtask_id
,
233 next_subtask
= self
._call
_login
_api
(
234 'Downloading flow token', headers
, query
={'flow_name': 'login'}
, data
=self
._LOGIN
_INIT
_DATA
)
236 while not self
.is_logged_in
:
237 if next_subtask
== 'LoginJsInstrumentationSubtask':
238 next_subtask
= self
._call
_login
_api
(
239 'Submitting JS instrumentation response', headers
, data
=build_login_json({
240 'subtask_id': next_subtask
,
241 'js_instrumentation': {
247 elif next_subtask
== 'LoginEnterUserIdentifierSSO':
248 next_subtask
= self
._call
_login
_api
(
249 'Submitting username', headers
, data
=build_login_json({
250 'subtask_id': next_subtask
,
252 'setting_responses': [{
253 'key': 'user_identifier',
264 elif next_subtask
== 'LoginEnterAlternateIdentifierSubtask':
265 next_subtask
= self
._call
_login
_api
(
266 'Submitting alternate identifier', headers
,
267 data
=build_login_json(input_dict(next_subtask
, self
._get
_tfa
_info
(
268 'one of username, phone number or email that was not used as --username'))))
270 elif next_subtask
== 'LoginEnterPassword':
271 next_subtask
= self
._call
_login
_api
(
272 'Submitting password', headers
, data
=build_login_json({
273 'subtask_id': next_subtask
,
275 'password': password
,
280 elif next_subtask
== 'AccountDuplicationCheck':
281 next_subtask
= self
._call
_login
_api
(
282 'Submitting account duplication check', headers
, data
=build_login_json({
283 'subtask_id': next_subtask
,
284 'check_logged_in_account': {
285 'link': 'AccountDuplicationCheck_false'
289 elif next_subtask
== 'LoginTwoFactorAuthChallenge':
290 next_subtask
= self
._call
_login
_api
(
291 'Submitting 2FA token', headers
, data
=build_login_json(input_dict(
292 next_subtask
, self
._get
_tfa
_info
('two-factor authentication token'))))
294 elif next_subtask
== 'LoginAcid':
295 next_subtask
= self
._call
_login
_api
(
296 'Submitting confirmation code', headers
, data
=build_login_json(input_dict(
297 next_subtask
, self
._get
_tfa
_info
('confirmation code sent to your email or phone'))))
299 elif next_subtask
== 'ArkoseLogin':
300 self
.raise_login_required('Twitter is requiring captcha for this login attempt', method
='cookies')
302 elif next_subtask
== 'DenyLoginSubtask':
303 self
.raise_login_required('Twitter rejected this login attempt as suspicious', method
='cookies')
305 elif next_subtask
== 'LoginSuccessSubtask':
306 raise ExtractorError('Twitter API did not grant auth token cookie')
309 raise ExtractorError(f
'Unrecognized subtask ID "{next_subtask}"')
313 def _call_api(self
, path
, video_id
, query
={}, graphql
=False):
314 headers
= self
._set
_base
_headers
(legacy
=not graphql
and self
._selected
_api
== 'legacy')
316 'x-twitter-auth-type': 'OAuth2Session',
317 'x-twitter-client-language': 'en',
318 'x-twitter-active-user': 'yes',
319 } if self
.is_logged_in
else {
320 'x-guest-token': self
._fetch
_guest
_token
(video_id
)
322 allowed_status
= {400, 401, 403, 404}
if graphql
else {403}
323 result
= self
._download
_json
(
324 (self
._GRAPHQL
_API
_BASE
if graphql
else self
._API
_BASE
) + path
,
325 video_id
, headers
=headers
, query
=query
, expected_status
=allowed_status
,
326 note
=f
'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
328 if result
.get('errors'):
329 errors
= ', '.join(set(traverse_obj(result
, ('errors', ..., 'message', {str}
))))
330 if errors
and 'not authorized' in errors
:
331 self
.raise_login_required(remove_end(errors
, '.'))
332 raise ExtractorError(f
'Error(s) while querying API: {errors or "Unknown error"}')
336 def _build_graphql_query(self
, media_id
):
337 raise NotImplementedError('Method must be implemented to support GraphQL')
339 def _call_graphql_api(self
, endpoint
, media_id
):
340 data
= self
._build
_graphql
_query
(media_id
)
341 query
= {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
342 return traverse_obj(self
._call
_api
(endpoint
, media_id
, query
=query
, graphql
=True), 'data')
345 class TwitterCardIE(InfoExtractor
):
346 IE_NAME
= 'twitter:card'
347 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
350 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
351 # MD5 checksums are different in different places
353 'id': '560070131976392705',
355 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
356 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
357 'uploader': 'Twitter',
358 'uploader_id': 'Twitter',
359 'thumbnail': r
're:^https?://.*\.jpg',
361 'timestamp': 1422366112,
362 'upload_date': '20150127',
364 'comment_count': int,
368 'display_id': '560070183650213889',
369 'uploader_url': 'https://twitter.com/Twitter',
373 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
374 'md5': '7137eca597f72b9abbe61e5ae0161399',
376 'id': '623160978427936768',
378 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
379 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
381 'uploader_id': 'NASA',
382 'timestamp': 1437408129,
383 'upload_date': '20150720',
384 'uploader_url': 'https://twitter.com/NASA',
386 'comment_count': int,
389 'tags': ['PlutoFlyby'],
391 'params': {'format': '[protocol=https]'}
394 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
395 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
399 'title': 'Ubuntu 11.10 Overview',
400 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
401 'upload_date': '20111013',
402 'uploader': 'OMG! UBUNTU!',
403 'uploader_id': 'omgubuntu',
404 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
405 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
406 'channel_follower_count': int,
407 'chapters': 'count:8',
408 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
410 'categories': ['Film & Animation'],
412 'comment_count': int,
413 'availability': 'public',
415 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
418 'channel': 'OMG! UBUNTU!',
419 'playable_in_embed': True,
421 'add_ie': ['Youtube'],
424 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
428 'upload_date': '20151113',
429 'uploader_id': '1189339351084113920',
430 'uploader': 'ArsenalTerje',
431 'title': 'Vine by ArsenalTerje',
432 'timestamp': 1447451307,
433 'alt_title': 'Vine by ArsenalTerje',
434 'comment_count': int,
436 'thumbnail': r
're:^https?://[^?#]+\.jpg',
441 'params': {'skip_download': 'm3u8'}
,
444 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
445 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
447 'id': '705235433198714880',
449 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
450 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
451 'uploader': 'Brent Yarina',
452 'uploader_id': 'BTNBrentYarina',
453 'timestamp': 1456976204,
454 'upload_date': '20160303',
456 'skip': 'This content is no longer available.',
459 'url': 'https://twitter.com/i/videos/752274308186120192',
460 'only_matching': True,
464 def _real_extract(self
, url
):
465 status_id
= self
._match
_id
(url
)
466 return self
.url_result(
467 'https://twitter.com/statuses/' + status_id
,
468 TwitterIE
.ie_key(), status_id
)
471 class TwitterIE(TwitterBaseIE
):
473 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
476 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
478 'id': '643211870443208704',
479 'display_id': '643211948184596480',
481 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
482 'thumbnail': r
're:^https?://.*\.jpg',
483 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
484 'channel_id': '549749560',
485 'uploader': 'FREE THE NIPPLE',
486 'uploader_id': 'freethenipple',
488 'timestamp': 1442188653,
489 'upload_date': '20150913',
490 'uploader_url': 'https://twitter.com/freethenipple',
491 'comment_count': int,
496 '_old_archive_ids': ['twitter 643211948184596480'],
498 'skip': 'Requires authentication',
500 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
501 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
503 'id': '657991469417025536',
505 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
506 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
507 'thumbnail': r
're:^https?://.*\.png',
509 'uploader_id': 'giphz',
511 'expected_warnings': ['height', 'width'],
512 'skip': 'Account suspended',
514 'url': 'https://twitter.com/starwars/status/665052190608723968',
516 'id': '665052190608723968',
517 'display_id': '665052190608723968',
519 'title': r
're:Star Wars.*A new beginning is coming December 18.*',
520 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
521 'channel_id': '20106852',
522 'uploader_id': 'starwars',
523 'uploader': r
're:Star Wars.*',
524 'timestamp': 1447395772,
525 'upload_date': '20151113',
526 'uploader_url': 'https://twitter.com/starwars',
527 'comment_count': int,
530 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
532 '_old_archive_ids': ['twitter 665052190608723968'],
535 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
537 'id': '705235433198714880',
539 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
540 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
541 'uploader_id': 'BTNBrentYarina',
542 'uploader': 'Brent Yarina',
543 'timestamp': 1456976204,
544 'upload_date': '20160303',
545 'uploader_url': 'https://twitter.com/BTNBrentYarina',
546 'comment_count': int,
553 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
554 # Test case of TwitterCardIE
555 'skip_download': True,
557 'skip': 'Dead external link',
559 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
561 'id': '700207414000242688',
562 'display_id': '700207533655363584',
564 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
565 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
566 'thumbnail': r
're:^https?://.*\.jpg',
567 'channel_id': '1383165541',
568 'uploader': 'jaydin donte geer',
569 'uploader_id': 'jaydingeer',
571 'timestamp': 1455777459,
572 'upload_date': '20160218',
573 'uploader_url': 'https://twitter.com/jaydingeer',
574 'comment_count': int,
577 'tags': ['Damndaniel'],
579 '_old_archive_ids': ['twitter 700207533655363584'],
582 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
583 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
587 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
588 'uploader': 'TAKUMA',
589 'uploader_id': '1004126642786242560',
590 'timestamp': 1402826626,
591 'upload_date': '20140615',
592 'thumbnail': r
're:^https?://.*\.jpg',
593 'alt_title': 'Vine by TAKUMA',
594 'comment_count': int,
601 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
603 'id': '717462543795523584',
604 'display_id': '719944021058060289',
606 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
607 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
608 'channel_id': '701615052',
609 'uploader_id': 'CaptainAmerica',
610 'uploader': 'Captain America',
612 'timestamp': 1460483005,
613 'upload_date': '20160412',
614 'uploader_url': 'https://twitter.com/CaptainAmerica',
615 'thumbnail': r
're:^https?://.*\.jpg',
616 'comment_count': int,
621 '_old_archive_ids': ['twitter 719944021058060289'],
624 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
626 'id': '1zqKVVlkqLaKB',
628 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
629 'upload_date': '20160923',
630 'uploader_id': '1PmKqpJdOJQoY',
631 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
632 'timestamp': 1474613214,
633 'thumbnail': r
're:^https?://.*\.jpg',
635 'add_ie': ['Periscope'],
636 'skip': 'Broadcast not found',
638 # has mp4 formats via mobile API
639 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
641 'id': '852077943283097602',
643 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
644 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
645 'channel_id': '2526757026',
646 'uploader': 'عالم الأخبار',
647 'uploader_id': 'news_al3alm',
649 'timestamp': 1492000653,
650 'upload_date': '20170412',
651 'display_id': '852138619213144067',
653 'uploader_url': 'https://twitter.com/news_al3alm',
654 'thumbnail': r
're:^https?://.*\.jpg',
658 'comment_count': int,
659 '_old_archive_ids': ['twitter 852138619213144067'],
662 'url': 'https://twitter.com/i/web/status/910031516746514432',
664 'id': '910030238373089285',
665 'display_id': '910031516746514432',
667 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
668 'thumbnail': r
're:^https?://.*\.jpg',
669 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
670 'channel_id': '2319432498',
671 'uploader': 'Préfet de Guadeloupe',
672 'uploader_id': 'Prefet971',
674 'timestamp': 1505803395,
675 'upload_date': '20170919',
676 'uploader_url': 'https://twitter.com/Prefet971',
677 'comment_count': int,
682 '_old_archive_ids': ['twitter 910031516746514432'],
685 'skip_download': True, # requires ffmpeg
688 # card via api.twitter.com/1.1/videos/tweet/config
689 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
691 'id': '1001551417340022785',
692 'display_id': '1001551623938805763',
694 'title': 're:.*?Shep is on a roll today.*?',
695 'thumbnail': r
're:^https?://.*\.jpg',
696 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
697 'channel_id': '255036353',
698 'uploader': 'Lis Power',
699 'uploader_id': 'LisPower1',
701 'timestamp': 1527623489,
702 'upload_date': '20180529',
703 'uploader_url': 'https://twitter.com/LisPower1',
704 'comment_count': int,
709 '_old_archive_ids': ['twitter 1001551623938805763'],
712 'skip_download': True, # requires ffmpeg
715 'url': 'https://twitter.com/foobar/status/1087791357756956680',
717 'id': '1087791272830607360',
718 'display_id': '1087791357756956680',
720 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
721 'thumbnail': r
're:^https?://.*\.jpg',
722 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
726 'timestamp': 1548184644,
727 'upload_date': '20190122',
728 'uploader_url': 'https://twitter.com/X',
729 'comment_count': int,
736 'skip': 'This Tweet is unavailable',
738 # not available in Periscope
739 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
741 'id': '1vOGwqejwoWxB',
743 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
745 'uploader_id': '1eVjYOLGkGrQL',
746 'thumbnail': r
're:^https?://.*\.jpg',
747 'tags': ['EduTECH2019'],
750 'add_ie': ['TwitterBroadcast'],
751 'skip': 'Broadcast no longer exists',
754 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
756 'id': '1349774757969989634',
757 'display_id': '1349794411333394432',
759 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
760 'thumbnail': r
're:^https?://.*\.jpg',
761 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
762 'channel_id': '18552281',
763 'uploader': 'Brooklyn Nets',
764 'uploader_id': 'BrooklynNets',
766 'timestamp': 1610651040,
767 'upload_date': '20210114',
768 'uploader_url': 'https://twitter.com/BrooklynNets',
769 'comment_count': int,
774 '_old_archive_ids': ['twitter 1349794411333394432'],
777 'skip_download': True,
780 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
782 'id': '1577855447914409984',
783 'display_id': '1577855540407197696',
785 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
786 'description': 'md5:b9c3699335447391d11753ab21c70a74',
787 'upload_date': '20221006',
788 'channel_id': '143077138',
789 'uploader': 'Oshtru',
790 'uploader_id': 'oshtru',
791 'uploader_url': 'https://twitter.com/oshtru',
792 'thumbnail': r
're:^https?://.*\.jpg',
794 'timestamp': 1665025050,
795 'comment_count': int,
800 '_old_archive_ids': ['twitter 1577855540407197696'],
802 'params': {'skip_download': True}
,
804 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
806 'id': '1577719286659006464',
807 'title': 'Ultima Reload - Test',
808 'description': 'Test https://t.co/Y3KEZD7Dad',
809 'channel_id': '168922496',
810 'uploader': 'Ultima Reload',
811 'uploader_id': 'UltimaShadowX',
812 'uploader_url': 'https://twitter.com/UltimaShadowX',
813 'upload_date': '20221005',
814 'timestamp': 1664992565,
815 'comment_count': int,
822 'params': {'skip_download': True}
,
824 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
826 'id': '1575559336759263233',
827 'display_id': '1575560063510810624',
829 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
830 'thumbnail': r
're:^https?://.*\.jpg',
831 'description': 'md5:95aea692fda36a12081b9629b02daa92',
832 'channel_id': '1094109584',
833 'uploader': 'Max Olson',
834 'uploader_id': 'MesoMax919',
835 'uploader_url': 'https://twitter.com/MesoMax919',
837 'timestamp': 1664477766,
838 'upload_date': '20220929',
839 'comment_count': int,
842 'tags': ['HurricaneIan'],
844 '_old_archive_ids': ['twitter 1575560063510810624'],
847 # Adult content, fails if not logged in
848 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
850 'id': '1575199163847000068',
851 'display_id': '1575199173472927762',
855 'channel_id': '1217167793541480450',
857 'uploader_id': 'Rizdraws',
858 'uploader_url': 'https://twitter.com/Rizdraws',
859 'upload_date': '20220928',
860 'timestamp': 1664391723,
861 'thumbnail': r
're:^https?://.+\.jpg',
864 'comment_count': int,
867 '_old_archive_ids': ['twitter 1575199173472927762'],
869 'params': {'skip_download': 'The media could not be played'}
,
870 'skip': 'Requires authentication',
872 # Playlist result only with graphql API
873 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
874 'playlist_mincount': 2,
876 'id': '1395079556562706435',
879 'channel_id': '21539378',
882 'upload_date': '20210519',
885 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
886 'uploader_id': 'Srirachachau',
887 'comment_count': int,
888 'uploader_url': 'https://twitter.com/Srirachachau',
889 'timestamp': 1621447860,
892 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
893 'playlist_mincount': 2,
895 'id': '1578353380363501568',
897 'channel_id': '2195866214',
898 'uploader_id': 'DavidToons_',
902 'timestamp': 1665143744,
903 'uploader_url': 'https://twitter.com/DavidToons_',
904 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
906 'comment_count': int,
907 'upload_date': '20221007',
911 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
914 'id': '1578401165338976258',
916 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
917 'channel_id': '19338359',
919 'uploader_id': 'primevideouk',
920 'timestamp': 1665155137,
921 'upload_date': '20221007',
923 'uploader_url': 'https://twitter.com/primevideouk',
924 'comment_count': int,
927 'tags': ['TheRingsOfPower'],
931 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
933 'id': '1lPJqmBeeNAJb',
935 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
936 'uploader': r
're:Monique Camarra.+?',
937 'uploader_id': 'MoniqueCamarra',
938 'live_status': 'was_live',
939 'release_timestamp': 1658417414,
940 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
941 'timestamp': 1658407771,
942 'release_date': '20220721',
943 'upload_date': '20220721',
945 'add_ie': ['TwitterSpaces'],
946 'params': {'skip_download': 'm3u8'}
,
947 'skip': 'Requires authentication',
949 # URL specifies video number but --yes-playlist
950 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
951 'playlist_mincount': 2,
953 'id': '1600649710662213632',
954 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
955 'timestamp': 1670459604.0,
956 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
957 'comment_count': int,
958 'uploader_id': 'CTVJLaidlaw',
959 'channel_id': '80082014',
961 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
962 'upload_date': '20221208',
964 'uploader': 'Jocelyn Laidlaw',
965 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
969 # URL specifies video number and --no-playlist
970 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
972 'id': '1600649511827013632',
974 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
975 'thumbnail': r
're:^https?://.+\.jpg',
976 'timestamp': 1670459604.0,
977 'channel_id': '80082014',
978 'uploader_id': 'CTVJLaidlaw',
979 'uploader': 'Jocelyn Laidlaw',
981 'comment_count': int,
982 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
984 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
985 'display_id': '1600649710662213632',
987 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
988 'upload_date': '20221208',
990 '_old_archive_ids': ['twitter 1600649710662213632'],
992 'params': {'noplaylist': True}
,
994 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
995 # note the id different between extraction and url
996 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
998 'id': '1621117577354424321',
999 'display_id': '1621117700482416640',
1001 'title': '뽀 - 아 최우제 이동속도 봐',
1002 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
1004 'channel_id': '1281839411068432384',
1006 'uploader_id': 's2FAKER',
1007 'uploader_url': 'https://twitter.com/s2FAKER',
1008 'upload_date': '20230202',
1009 'timestamp': 1675339553.0,
1010 'thumbnail': r
're:https?://pbs\.twimg\.com/.+',
1014 'repost_count': int,
1015 'comment_count': int,
1016 '_old_archive_ids': ['twitter 1621117700482416640'],
1018 'skip': 'Requires authentication',
1020 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1022 'id': '1599108643743473680',
1023 'display_id': '1599108751385972737',
1025 'title': '\u06ea - \U0001F48B',
1026 'channel_id': '1347791436809441283',
1027 'uploader_url': 'https://twitter.com/hlo_again',
1029 'uploader_id': 'hlo_again',
1030 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1031 'repost_count': int,
1033 'comment_count': int,
1034 'upload_date': '20221203',
1036 'timestamp': 1670092210.0,
1038 'uploader': '\u06ea',
1039 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1040 '_old_archive_ids': ['twitter 1599108751385972737'],
1042 'params': {'noplaylist': True}
,
1044 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1046 'id': '1600009362759733248',
1047 'display_id': '1600009574919962625',
1049 'channel_id': '211814412',
1050 'uploader_url': 'https://twitter.com/MunTheShinobi',
1051 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1052 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1055 'repost_count': int,
1056 'upload_date': '20221206',
1057 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1058 'comment_count': int,
1061 'uploader_id': 'MunTheShinobi',
1062 'duration': 139.987,
1063 'timestamp': 1670306984.0,
1064 '_old_archive_ids': ['twitter 1600009574919962625'],
1067 # retweeted_status (private)
1068 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1070 'id': '1623274794488659969',
1071 'display_id': '1623739803874349067',
1073 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1074 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1075 'uploader': 'Johnny Bullets',
1076 'uploader_id': 'Johnnybull3ts',
1077 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1081 'timestamp': 1675853859.0,
1082 'upload_date': '20230208',
1083 'thumbnail': r
're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1085 'repost_count': int,
1087 'skip': 'Protected tweet',
1090 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1092 'id': '1694928337846538240',
1094 'display_id': '1695424220702888009',
1095 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1096 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1097 'channel_id': '15212187',
1098 'uploader': 'Benny Johnson',
1099 'uploader_id': 'bennyjohnson',
1100 'uploader_url': 'https://twitter.com/bennyjohnson',
1104 'timestamp': 1692962814.0,
1105 'upload_date': '20230825',
1106 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1108 'repost_count': int,
1109 'comment_count': int,
1110 '_old_archive_ids': ['twitter 1695424220702888009'],
1113 # retweeted_status w/ legacy API
1114 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1116 'id': '1694928337846538240',
1118 'display_id': '1695424220702888009',
1119 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1120 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1121 'channel_id': '15212187',
1122 'uploader': 'Benny Johnson',
1123 'uploader_id': 'bennyjohnson',
1124 'uploader_url': 'https://twitter.com/bennyjohnson',
1128 'timestamp': 1692962814.0,
1129 'upload_date': '20230825',
1130 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1132 'repost_count': int,
1133 '_old_archive_ids': ['twitter 1695424220702888009'],
1135 'params': {'extractor_args': {'twitter': {'api': ['legacy']}
}},
1137 # Broadcast embedded in tweet
1138 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1140 'id': '1rmxPMjLzAXKN',
1142 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1143 'uploader': 'Jessica Dobson',
1144 'uploader_id': 'JessicaDobsonWX',
1145 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1146 'timestamp': 1701566398,
1147 'upload_date': '20231203',
1148 'live_status': 'was_live',
1149 'thumbnail': r
're:https://[^/]+pscp\.tv/.+\.jpg',
1150 'concurrent_view_count': int,
1153 'add_ie': ['TwitterBroadcast'],
1155 # Animated gif and quote tweet video
1156 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1157 'playlist_mincount': 2,
1159 'id': '1696256659889565950',
1160 'title': 'BAKOON - https://t.co/zom968d0a0',
1161 'description': 'https://t.co/zom968d0a0',
1163 'channel_id': '1263540390',
1164 'uploader': 'BAKOON',
1165 'uploader_id': 'BAKKOOONN',
1166 'uploader_url': 'https://twitter.com/BAKKOOONN',
1168 'timestamp': 1693254077.0,
1169 'upload_date': '20230828',
1171 'comment_count': int,
1172 'repost_count': int,
1174 'skip': 'Requires authentication',
1176 # "stale tweet" with typename "TweetWithVisibilityResults"
1177 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1178 'md5': '511377ff8dfa7545307084dca4dce319',
1180 'id': '1724883339285544960',
1182 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1183 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1184 'display_id': '1724884212803834154',
1185 'channel_id': '337808606',
1186 'uploader': 'Robert F. Kennedy Jr',
1187 'uploader_id': 'RobertKennedyJr',
1188 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1189 'upload_date': '20231115',
1190 'timestamp': 1700079417.0,
1191 'duration': 341.048,
1192 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1193 'tags': ['Kennedy24'],
1194 'repost_count': int,
1196 'comment_count': int,
1198 '_old_archive_ids': ['twitter 1724884212803834154'],
1202 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1203 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1205 'id': '1790637589910654976',
1207 'title': 'Historic Vids - One of the most intense moments in history',
1208 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1209 'display_id': '1790637656616943991',
1210 'uploader': 'Historic Vids',
1211 'uploader_id': 'historyinmemes',
1212 'uploader_url': 'https://twitter.com/historyinmemes',
1213 'channel_id': '855481986290524160',
1214 'upload_date': '20240515',
1215 'timestamp': 1715756260.0,
1218 'comment_count': int,
1219 'repost_count': int,
1221 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1223 '_old_archive_ids': ['twitter 1790637656616943991'],
1227 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1228 'only_matching': True,
1231 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1232 'only_matching': True,
1234 # promo_video_website card
1235 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1236 'only_matching': True,
1238 # promo_video_convo card
1239 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1240 'only_matching': True,
1243 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1244 'only_matching': True,
1246 # video_direct_message card
1247 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1248 'only_matching': True,
1250 # poll2choice_video card
1251 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1252 'only_matching': True,
1254 # poll3choice_video card
1255 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1256 'only_matching': True,
1258 # poll4choice_video card
1259 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1260 'only_matching': True,
1263 _MEDIA_ID_RE
= re
.compile(r
'_video/(\d+)/')
1266 def _GRAPHQL_ENDPOINT(self
):
1267 if self
.is_logged_in
:
1268 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1269 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1271 def _graphql_to_legacy(self
, data
, twid
):
1272 result
= traverse_obj(data
, (
1273 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1274 lambda _
, v
: v
['entryId'] == f
'tweet-{twid}', 'content', 'itemContent',
1275 'tweet_results', 'result', ('tweet', None), {dict}
,
1276 ), default
={}, get_all
=False) if self
.is_logged_in
else traverse_obj(
1277 data
, ('tweetResult', 'result', {dict}
), default
={})
1279 typename
= result
.get('__typename')
1280 if typename
not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1281 self
.report_warning(f
'Unknown typename: {typename}', twid
, only_once
=True)
1283 if 'tombstone' in result
:
1284 cause
= remove_end(traverse_obj(result
, ('tombstone', 'text', 'text', {str}
)), '. Learn more')
1285 raise ExtractorError(f
'Twitter API says: {cause or "Unknown error"}', expected
=True)
1286 elif typename
== 'TweetUnavailable':
1287 reason
= result
.get('reason')
1288 if reason
== 'NsfwLoggedOut':
1289 self
.raise_login_required('NSFW tweet requires authentication')
1290 elif reason
== 'Protected':
1291 self
.raise_login_required('You are not authorized to view this protected tweet')
1292 raise ExtractorError(reason
or 'Requested tweet is unavailable', expected
=True)
1293 # Result for "stale tweet" needs additional transformation
1294 elif typename
== 'TweetWithVisibilityResults':
1295 result
= traverse_obj(result
, ('tweet', {dict}
)) or {}
1297 status
= result
.get('legacy', {})
1298 status
.update(traverse_obj(result
, {
1299 'user': ('core', 'user_results', 'result', 'legacy'),
1300 'card': ('card', 'legacy'),
1301 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1302 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1303 }, expected_type
=dict, default
={}))
1305 # extra transformations needed since result does not match legacy format
1306 if status
.get('retweeted_status'):
1307 status
['retweeted_status']['user'] = traverse_obj(status
, (
1308 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict}
)) or {}
1311 binding_value
.get('key'): binding_value
.get('value')
1312 for binding_value
in traverse_obj(status
, ('card', 'binding_values', ..., {dict}
))
1315 status
['card']['binding_values'] = binding_values
1319 def _build_graphql_query(self
, media_id
):
1322 'focalTweetId': media_id
,
1323 'includePromotedContent': True,
1324 'with_rux_injections': False,
1325 'withBirdwatchNotes': True,
1326 'withCommunity': True,
1327 'withDownvotePerspective': False,
1328 'withQuickPromoteEligibilityTweetFields': True,
1329 'withReactionsMetadata': False,
1330 'withReactionsPerspective': False,
1331 'withSuperFollowsTweetFields': True,
1332 'withSuperFollowsUserFields': True,
1333 'withV2Timeline': True,
1337 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1338 'interactive_text_enabled': True,
1339 'responsive_web_edit_tweet_api_enabled': True,
1340 'responsive_web_enhance_cards_enabled': True,
1341 'responsive_web_graphql_timeline_navigation_enabled': False,
1342 'responsive_web_text_conversations_enabled': False,
1343 'responsive_web_uc_gql_enabled': True,
1344 'standardized_nudges_misinfo': True,
1345 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1346 'tweetypie_unmention_optimization_enabled': True,
1347 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1348 'verified_phone_label_enabled': False,
1349 'vibe_api_enabled': True,
1351 } if self
.is_logged_in
else {
1353 'tweetId': media_id
,
1354 'withCommunity': False,
1355 'includePromotedContent': False,
1359 'creator_subscriptions_tweet_preview_api_enabled': True,
1360 'tweetypie_unmention_optimization_enabled': True,
1361 'responsive_web_edit_tweet_api_enabled': True,
1362 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1363 'view_counts_everywhere_api_enabled': True,
1364 'longform_notetweets_consumption_enabled': True,
1365 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1366 'tweet_awards_web_tipping_enabled': False,
1367 'freedom_of_speech_not_reach_fetch_enabled': True,
1368 'standardized_nudges_misinfo': True,
1369 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1370 'longform_notetweets_rich_text_read_enabled': True,
1371 'longform_notetweets_inline_media_enabled': True,
1372 'responsive_web_graphql_exclude_directive_enabled': True,
1373 'verified_phone_label_enabled': False,
1374 'responsive_web_media_download_video_enabled': False,
1375 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1376 'responsive_web_graphql_timeline_navigation_enabled': True,
1377 'responsive_web_enhance_cards_enabled': False
1380 'withArticleRichContentState': False
1384 def _call_syndication_api(self
, twid
):
1385 self
.report_warning(
1386 'Not all metadata or media is available via syndication endpoint', twid
, only_once
=True)
1387 status
= self
._download
_json
(
1388 'https://cdn.syndication.twimg.com/tweet-result', twid
, 'Downloading syndication JSON',
1389 headers
={'User-Agent': 'Googlebot'}
, query
={
1391 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1392 'token': ''.join(random
.choices('123456789abcdefghijklmnopqrstuvwxyz', k
=10)),
1395 raise ExtractorError('Syndication endpoint returned empty JSON response')
1396 # Transform the result so its structure matches that of legacy/graphql
1398 for detail
in traverse_obj(status
, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict}
)):
1399 detail
['id_str'] = traverse_obj(detail
, (
1400 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}
, 1), get_all
=False) or twid
1401 media
.append(detail
)
1402 status
['extended_entities'] = {'media': media}
1406 def _extract_status(self
, twid
):
1407 if self
._selected
_api
not in ('graphql', 'legacy', 'syndication'):
1408 raise ExtractorError(f
'{self._selected_api!r} is not a valid API selection', expected
=True)
1411 if self
.is_logged_in
or self
._selected
_api
== 'graphql':
1412 status
= self
._graphql
_to
_legacy
(self
._call
_graphql
_api
(self
._GRAPHQL
_ENDPOINT
, twid
), twid
)
1413 elif self
._selected
_api
== 'legacy':
1414 status
= self
._call
_api
(f
'statuses/show/{twid}.json', twid
, {
1415 'cards_platform': 'Web-12',
1417 'include_reply_count': 1,
1418 'include_user_entities': 0,
1419 'tweet_mode': 'extended',
1421 except ExtractorError
as e
:
1422 if not isinstance(e
.cause
, HTTPError
) or not e
.cause
.status
== 429:
1424 self
.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1425 status
= self
._call
_syndication
_api
(twid
)
1427 if self
._selected
_api
== 'syndication':
1428 status
= self
._call
_syndication
_api
(twid
)
1430 return traverse_obj(status
, 'retweeted_status', None, expected_type
=dict) or {}
1432 def _real_extract(self
, url
):
1433 twid
, selected_index
= self
._match
_valid
_url
(url
).group('id', 'index')
1434 status
= self
._extract
_status
(twid
)
1436 title
= description
= traverse_obj(
1437 status
, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}
), get_all
=False) or ''
1438 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1439 title
= re
.sub(r
'\s+(https?://[^ ]+)', '', title
)
1440 user
= status
.get('user') or {}
1441 uploader
= user
.get('name')
1443 title
= f
'{uploader} - {title}'
1444 uploader_id
= user
.get('screen_name')
1449 'description': description
,
1450 'uploader': uploader
,
1451 'timestamp': unified_timestamp(status
.get('created_at')),
1452 'channel_id': str_or_none(status
.get('user_id_str')) or str_or_none(user
.get('id_str')),
1453 'uploader_id': uploader_id
,
1454 'uploader_url': format_field(uploader_id
, None, 'https://twitter.com/%s'),
1455 'like_count': int_or_none(status
.get('favorite_count')),
1456 'repost_count': int_or_none(status
.get('retweet_count')),
1457 'comment_count': int_or_none(status
.get('reply_count')),
1458 'age_limit': 18 if status
.get('possibly_sensitive') else 0,
1459 'tags': traverse_obj(status
, ('entities', 'hashtags', ..., 'text')),
1462 def extract_from_video_info(media
):
1463 media_id
= traverse_obj(media
, 'id_str', 'id', expected_type
=str_or_none
)
1464 self
.write_debug(f
'Extracting from video info: {media_id}')
1468 for variant
in traverse_obj(media
, ('video_info', 'variants', ...)):
1469 fmts
, subs
= self
._extract
_variant
_formats
(variant
, twid
)
1470 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
1471 formats
.extend(fmts
)
1474 media_url
= media
.get('media_url_https') or media
.get('media_url')
1476 def add_thumbnail(name
, size
):
1479 'url': update_url_query(media_url
, {'name': name}
),
1480 'width': int_or_none(size
.get('w') or size
.get('width')),
1481 'height': int_or_none(size
.get('h') or size
.get('height')),
1483 for name
, size
in media
.get('sizes', {}).items():
1484 add_thumbnail(name
, size
)
1485 add_thumbnail('orig', media
.get('original_info') or {})
1490 'subtitles': subtitles
,
1491 'thumbnails': thumbnails
,
1492 'view_count': traverse_obj(media
, ('mediaStats', 'viewCount', {int_or_none}
)), # No longer available
1493 'duration': float_or_none(traverse_obj(media
, ('video_info', 'duration_millis')), 1000),
1494 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1495 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1498 def extract_from_card_info(card
):
1502 self
.write_debug(f
'Extracting from card info: {card.get("url")}')
1503 binding_values
= card
['binding_values']
1505 def get_binding_value(k
):
1506 o
= binding_values
.get(k
) or {}
1507 return try_get(o
, lambda x
: x
[x
['type'].lower() + '_value'])
1509 card_name
= card
['name'].split(':')[-1]
1510 if card_name
== 'player':
1513 'url': get_binding_value('player_url'),
1515 elif card_name
== 'periscope_broadcast':
1518 'url': get_binding_value('url') or get_binding_value('player_url'),
1519 'ie_key': PeriscopeIE
.ie_key(),
1521 elif card_name
== 'broadcast':
1524 'url': get_binding_value('broadcast_url'),
1525 'ie_key': TwitterBroadcastIE
.ie_key(),
1527 elif card_name
== 'audiospace':
1530 'url': f
'https://twitter.com/i/spaces/{get_binding_value("id")}',
1531 'ie_key': TwitterSpacesIE
.ie_key(),
1533 elif card_name
== 'summary':
1536 'url': get_binding_value('card_url'),
1538 elif card_name
== 'unified_card':
1539 unified_card
= self
._parse
_json
(get_binding_value('unified_card'), twid
)
1540 yield from map(extract_from_video_info
, traverse_obj(
1541 unified_card
, ('media_entities', ...), expected_type
=dict))
1542 # amplify, promo_video_website, promo_video_convo, appplayer,
1543 # video_direct_message, poll2choice_video, poll3choice_video,
1544 # poll4choice_video, ...
1546 is_amplify
= card_name
== 'amplify'
1547 vmap_url
= get_binding_value('amplify_url_vmap') if is_amplify
else get_binding_value('player_stream_url')
1548 content_id
= get_binding_value('%s_content_id' % (card_name
if is_amplify
else 'player'))
1549 formats
, subtitles
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, content_id
or twid
)
1552 for suffix
in ('_small', '', '_large', '_x_large', '_original'):
1553 image
= get_binding_value('player_image' + suffix
) or {}
1554 image_url
= image
.get('url')
1555 if not image_url
or '/player-placeholder' in image_url
:
1558 'id': suffix
[1:] if suffix
else 'medium',
1560 'width': int_or_none(image
.get('width')),
1561 'height': int_or_none(image
.get('height')),
1566 'subtitles': subtitles
,
1567 'thumbnails': thumbnails
,
1568 'duration': int_or_none(get_binding_value(
1569 'content_duration_seconds')),
1572 videos
= traverse_obj(status
, (
1573 (None, 'quoted_status'), 'extended_entities', 'media', lambda _
, m
: m
['type'] != 'photo', {dict}
))
1575 if self
._yes
_playlist
(twid
, selected_index
, video_label
='URL-specified video number'):
1576 selected_entries
= (*map(extract_from_video_info
, videos
), *extract_from_card_info(status
.get('card')))
1578 desired_obj
= traverse_obj(status
, (
1579 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index
) - 1, {dict}
), get_all
=False)
1581 raise ExtractorError(f
'Video #{selected_index} is unavailable', expected
=True)
1582 elif desired_obj
.get('type') != 'video':
1583 raise ExtractorError(f
'Media #{selected_index} is not a video', expected
=True)
1585 # Restore original archive id and video index in title
1586 for index
, entry
in enumerate(videos
, 1):
1587 if entry
.get('id') != desired_obj
.get('id'):
1590 info
['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1591 if len(videos
) != 1:
1592 info
['title'] += f
' #{index}'
1595 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1597 entries
= [{**info, **data, 'display_id': twid}
for data
in selected_entries
]
1599 expanded_url
= traverse_obj(status
, ('entities', 'urls', 0, 'expanded_url'), expected_type
=url_or_none
)
1600 if not expanded_url
or expanded_url
== url
:
1601 self
.raise_no_formats('No video could be found in this tweet', expected
=True)
1604 return self
.url_result(expanded_url
, display_id
=twid
, **info
)
1606 entries
[0]['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1608 if len(entries
) == 1:
1611 for index
, entry
in enumerate(entries
, 1):
1612 entry
['title'] += f
' #{index}'
1614 return self
.playlist_result(entries
, **info
)
1617 class TwitterAmplifyIE(TwitterBaseIE
):
1618 IE_NAME
= 'twitter:amplify'
1619 _VALID_URL
= r
'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1622 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1623 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1625 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1627 'title': 'Twitter Video',
1628 'thumbnail': 're:^https?://.*',
1630 'params': {'format': '[protocol=https]'}
,
1633 def _real_extract(self
, url
):
1634 video_id
= self
._match
_id
(url
)
1635 webpage
= self
._download
_webpage
(url
, video_id
)
1637 vmap_url
= self
._html
_search
_meta
(
1638 'twitter:amplify:vmap', webpage
, 'vmap url')
1639 formats
, _
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, video_id
)
1642 thumbnail
= self
._html
_search
_meta
(
1643 'twitter:image:src', webpage
, 'thumbnail', fatal
=False)
1645 def _find_dimension(target
):
1646 w
= int_or_none(self
._html
_search
_meta
(
1647 'twitter:%s:width' % target
, webpage
, fatal
=False))
1648 h
= int_or_none(self
._html
_search
_meta
(
1649 'twitter:%s:height' % target
, webpage
, fatal
=False))
1653 thumbnail_w
, thumbnail_h
= _find_dimension('image')
1656 'width': thumbnail_w
,
1657 'height': thumbnail_h
,
1660 video_w
, video_h
= _find_dimension('player')
1668 'title': 'Twitter Video',
1670 'thumbnails': thumbnails
,
1674 class TwitterBroadcastIE(TwitterBaseIE
, PeriscopeBaseIE
):
1675 IE_NAME
= 'twitter:broadcast'
1676 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1679 # untitled Periscope video
1680 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1682 'id': '1yNGaQLWpejGj',
1684 'title': 'Andrea May Sahouri - Periscope Broadcast',
1685 'uploader': 'Andrea May Sahouri',
1686 'uploader_id': 'andreamsahouri',
1687 'uploader_url': 'https://twitter.com/andreamsahouri',
1688 'timestamp': 1590973638,
1689 'upload_date': '20200601',
1690 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1694 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1696 'id': '1ZkKzeyrPbaxv',
1698 'title': 'Starship | SN10 | High-Altitude Flight Test',
1699 'uploader': 'SpaceX',
1700 'uploader_id': 'SpaceX',
1701 'uploader_url': 'https://twitter.com/SpaceX',
1702 'timestamp': 1614812942,
1703 'upload_date': '20210303',
1704 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1708 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1710 'id': '1OyKAVQrgzwGb',
1712 'title': 'Starship Flight Test',
1713 'uploader': 'SpaceX',
1714 'uploader_id': 'SpaceX',
1715 'uploader_url': 'https://twitter.com/SpaceX',
1716 'timestamp': 1681993964,
1717 'upload_date': '20230420',
1718 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1723 def _real_extract(self
, url
):
1724 broadcast_id
= self
._match
_id
(url
)
1725 broadcast
= self
._call
_api
(
1726 'broadcasts/show.json', broadcast_id
,
1727 {'ids': broadcast_id}
)['broadcasts'][broadcast_id
]
1729 raise ExtractorError('Broadcast no longer exists', expected
=True)
1730 info
= self
._parse
_broadcast
_data
(broadcast
, broadcast_id
)
1731 info
['title'] = broadcast
.get('status') or info
.get('title')
1732 info
['uploader_id'] = broadcast
.get('twitter_username') or info
.get('uploader_id')
1733 info
['uploader_url'] = format_field(broadcast
, 'twitter_username', 'https://twitter.com/%s', default
=None)
1734 if info
['live_status'] == 'is_upcoming':
1737 media_key
= broadcast
['media_key']
1738 source
= self
._call
_api
(
1739 f
'live_video_stream/status/{media_key}', media_key
)['source']
1740 m3u8_url
= source
.get('noRedirectPlaybackUrl') or source
['location']
1741 if '/live_video_stream/geoblocked/' in m3u8_url
:
1742 self
.raise_geo_restricted()
1743 m3u8_id
= compat_parse_qs(compat_urllib_parse_urlparse(
1744 m3u8_url
).query
).get('type', [None])[0]
1745 state
, width
, height
= self
._extract
_common
_format
_info
(broadcast
)
1746 info
['formats'] = self
._extract
_pscp
_m
3u8_formats
(
1747 m3u8_url
, broadcast_id
, m3u8_id
, state
, width
, height
)
1751 class TwitterSpacesIE(TwitterBaseIE
):
1752 IE_NAME
= 'twitter:spaces'
1753 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1756 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1758 'id': '1RDxlgyvNXzJL',
1760 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1761 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1762 'uploader': r
're:Lucio Di Gaetano.*?',
1763 'uploader_id': 'luciodigaetano',
1764 'live_status': 'was_live',
1765 'timestamp': 1659877956,
1766 'upload_date': '20220807',
1767 'release_timestamp': 1659904215,
1768 'release_date': '20220807',
1770 'params': {'skip_download': 'm3u8'}
,
1772 # post_live/TimedOut but downloadable
1773 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1775 'id': '1vAxRAVQWONJl',
1777 'title': 'Framing Up FinOps: Billing Tools',
1778 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1779 'uploader': 'Google Cloud',
1780 'uploader_id': 'googlecloud',
1781 'live_status': 'post_live',
1782 'timestamp': 1681409554,
1783 'upload_date': '20230413',
1784 'release_timestamp': 1681839000,
1785 'release_date': '20230418',
1787 'params': {'skip_download': 'm3u8'}
,
1789 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1790 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1792 'id': '1eaKbrQbjoRKX',
1795 'description': 'Twitter Space participated by nobody yet',
1796 'uploader': '息根とめる🔪Twitchで復活',
1797 'uploader_id': 'tomeru_ikinone',
1798 'live_status': 'was_live',
1799 'timestamp': 1685617198,
1800 'upload_date': '20230601',
1802 'params': {'skip_download': 'm3u8'}
,
1806 'notstarted': 'is_upcoming',
1807 'ended': 'was_live',
1808 'running': 'is_live',
1809 'timedout': 'post_live',
1812 def _build_graphql_query(self
, space_id
):
1816 'isMetatagsQuery': True,
1817 'withDownvotePerspective': False,
1818 'withReactionsMetadata': False,
1819 'withReactionsPerspective': False,
1820 'withReplays': True,
1821 'withSuperFollowsUserFields': True,
1822 'withSuperFollowsTweetFields': True,
1825 'dont_mention_me_view_api_enabled': True,
1826 'interactive_text_enabled': True,
1827 'responsive_web_edit_tweet_api_enabled': True,
1828 'responsive_web_enhance_cards_enabled': True,
1829 'responsive_web_uc_gql_enabled': True,
1830 'spaces_2022_h2_clipping': True,
1831 'spaces_2022_h2_spaces_communities': False,
1832 'standardized_nudges_misinfo': True,
1833 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1834 'vibe_api_enabled': True,
1838 def _real_extract(self
, url
):
1839 space_id
= self
._match
_id
(url
)
1840 if not self
.is_logged_in
:
1841 self
.raise_login_required('Twitter Spaces require authentication')
1842 space_data
= self
._call
_graphql
_api
('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id
)['audioSpace']
1844 raise ExtractorError('Twitter Space not found', expected
=True)
1846 metadata
= space_data
['metadata']
1847 live_status
= try_call(lambda: self
.SPACE_STATUS
[metadata
['state'].lower()])
1848 is_live
= live_status
== 'is_live'
1851 headers
= {'Referer': 'https://twitter.com/'}
1852 if live_status
== 'is_upcoming':
1853 self
.raise_no_formats('Twitter Space not started yet', expected
=True)
1854 elif not is_live
and not metadata
.get('is_space_available_for_replay'):
1855 self
.raise_no_formats('Twitter Space ended and replay is disabled', expected
=True)
1856 elif metadata
.get('media_key'):
1857 source
= traverse_obj(
1858 self
._call
_api
(f
'live_video_stream/status/{metadata["media_key"]}', metadata
['media_key']),
1859 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}
), get_all
=False)
1860 formats
= self
._extract
_m
3u8_formats
( # XXX: Some Spaces need ffmpeg as downloader
1861 source
, metadata
['media_key'], 'm4a', entry_protocol
='m3u8', live
=is_live
,
1862 headers
=headers
, fatal
=False) if source
else []
1864 fmt
.update({'vcodec': 'none', 'acodec': 'aac'}
)
1866 fmt
['container'] = 'm4a_dash'
1868 participants
= ', '.join(traverse_obj(
1869 space_data
, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1871 if not formats
and live_status
== 'post_live':
1872 self
.raise_no_formats('Twitter Space ended but not downloadable yet', expected
=True)
1876 'title': metadata
.get('title'),
1877 'description': f
'Twitter Space participated by {participants}',
1878 'uploader': traverse_obj(
1879 metadata
, ('creator_results', 'result', 'legacy', 'name')),
1880 'uploader_id': traverse_obj(
1881 metadata
, ('creator_results', 'result', 'legacy', 'screen_name')),
1882 'live_status': live_status
,
1883 'release_timestamp': try_call(
1884 lambda: int_or_none(metadata
['scheduled_start'], scale
=1000)),
1885 'timestamp': int_or_none(metadata
.get('created_at'), scale
=1000),
1887 'http_headers': headers
,
1891 class TwitterShortenerIE(TwitterBaseIE
):
1892 IE_NAME
= 'twitter:shortener'
1893 _VALID_URL
= r
'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1894 _BASE_URL
= 'https://t.co/'
1896 def _real_extract(self
, url
):
1897 mobj
= self
._match
_valid
_url
(url
)
1898 eid
, id = mobj
.group('eid', 'id')
1901 url
= self
._BASE
_URL
+ id
1902 new_url
= self
._request
_webpage
(url
, id, headers
={'User-Agent': 'curl'}
).url
1903 __UNSAFE_LINK
= "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1904 if new_url
.startswith(__UNSAFE_LINK
):
1905 new_url
= new_url
.replace(__UNSAFE_LINK
, "")
1906 return self
.url_result(new_url
)