5 from .common
import InfoExtractor
6 from .periscope
import PeriscopeBaseIE
, PeriscopeIE
7 from ..compat
import functools
# isort: split
10 compat_urllib_parse_unquote
,
11 compat_urllib_parse_urlparse
,
13 from ..networking
.exceptions
import HTTPError
35 class TwitterBaseIE(InfoExtractor
):
36 _NETRC_MACHINE
= 'twitter'
37 _API_BASE
= 'https://api.twitter.com/1.1/'
38 _GRAPHQL_API_BASE
= 'https://twitter.com/i/api/graphql/'
39 _BASE_REGEX
= r
'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
40 _AUTH
= 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
41 _LEGACY_AUTH
= 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
44 _LOGIN_INIT_DATA
= json
.dumps({
47 'debug_overrides': {},
56 'app_download_cta': 1,
57 'check_logged_in_account': 1,
58 'choice_selection': 3,
59 'contacts_live_sync_permission_prompt': 0,
61 'email_verification': 2,
71 'in_app_notification': 1,
73 'js_instrumentation': 1,
75 'notifications_permission_prompt': 2,
77 'open_home_timeline': 1,
79 'phone_verification': 4,
88 'tweet_selection_urt': 1,
91 'user_recommendations_list': 4,
92 'user_recommendations_urt': 1,
96 }, separators
=(',', ':')).encode()
98 def _extract_variant_formats(self
, variant
, video_id
):
99 variant_url
= variant
.get('url')
102 elif '.m3u8' in variant_url
:
103 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
104 variant_url
, video_id
, 'mp4', 'm3u8_native',
105 m3u8_id
='hls', fatal
=False)
106 for f
in traverse_obj(fmts
, lambda _
, v
: v
['vcodec'] == 'none' and v
.get('tbr') is None):
107 if mobj
:= re
.match(r
'hls-[Aa]udio-(?P<bitrate>\d{4,})', f
['format_id']):
108 f
['tbr'] = int_or_none(mobj
.group('bitrate'), 1000)
111 tbr
= int_or_none(dict_get(variant
, ('bitrate', 'bit_rate')), 1000) or None
114 'format_id': 'http' + ('-%d' % tbr
if tbr
else ''),
117 self
._search
_dimensions
_in
_video
_url
(f
, variant_url
)
120 def _extract_formats_from_vmap_url(self
, vmap_url
, video_id
):
121 vmap_url
= url_or_none(vmap_url
)
124 vmap_data
= self
._download
_xml
(vmap_url
, video_id
)
128 for video_variant
in vmap_data
.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
129 video_variant
.attrib
['url'] = compat_urllib_parse_unquote(
130 video_variant
.attrib
['url'])
131 urls
.append(video_variant
.attrib
['url'])
132 fmts
, subs
= self
._extract
_variant
_formats
(
133 video_variant
.attrib
, video_id
)
135 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
136 video_url
= strip_or_none(xpath_text(vmap_data
, './/MediaFile'))
137 if video_url
not in urls
:
138 fmts
, subs
= self
._extract
_variant
_formats
({'url': video_url}
, video_id
)
140 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
141 return formats
, subtitles
144 def _search_dimensions_in_video_url(a_format
, video_url
):
145 m
= re
.search(r
'/(?P<width>\d+)x(?P<height>\d+)/', video_url
)
148 'width': int(m
.group('width')),
149 'height': int(m
.group('height')),
153 def is_logged_in(self
):
154 return bool(self
._get
_cookies
(self
._API
_BASE
).get('auth_token'))
156 @functools.cached_property
157 def _selected_api(self
):
158 return self
._configuration
_arg
('api', ['graphql'], ie_key
='Twitter')[0]
160 def _fetch_guest_token(self
, display_id
):
161 guest_token
= traverse_obj(self
._download
_json
(
162 f
'{self._API_BASE}guest/activate.json', display_id
, 'Downloading guest token', data
=b
'',
163 headers
=self
._set
_base
_headers
(legacy
=display_id
and self
._selected
_api
== 'legacy')),
164 ('guest_token', {str}
))
166 raise ExtractorError('Could not retrieve guest token')
169 def _set_base_headers(self
, legacy
=False):
170 bearer_token
= self
._LEGACY
_AUTH
if legacy
and not self
.is_logged_in
else self
._AUTH
172 'Authorization': f
'Bearer {bearer_token}',
173 'x-csrf-token': try_call(lambda: self
._get
_cookies
(self
._API
_BASE
)['ct0'].value
),
176 def _call_login_api(self
, note
, headers
, query
={}, data
=None):
177 response
= self
._download
_json
(
178 f
'{self._API_BASE}onboarding/task.json', None, note
,
179 headers
=headers
, query
=query
, data
=data
, expected_status
=400)
180 error
= traverse_obj(response
, ('errors', 0, 'message', {str}
))
182 raise ExtractorError(f
'Login failed, Twitter API says: {error}', expected
=True)
183 elif traverse_obj(response
, 'status') != 'success':
184 raise ExtractorError('Login was unsuccessful')
186 subtask
= traverse_obj(
187 response
, ('subtasks', ..., 'subtask_id', {str}
), get_all
=False)
189 raise ExtractorError('Twitter API did not return next login subtask')
191 self
._flow
_token
= response
['flow_token']
195 def _perform_login(self
, username
, password
):
196 if self
.is_logged_in
:
199 webpage
= self
._download
_webpage
('https://twitter.com/', None, 'Downloading login page')
200 guest_token
= self
._search
_regex
(
201 r
'\.cookie\s*=\s*["\']gt
=(\d
+);', webpage, 'gt
', default=None) or self._fetch_guest_token(None)
203 **self._set_base_headers(),
204 'content
-type': 'application
/json
',
205 'x
-guest
-token
': guest_token,
206 'x
-twitter
-client
-language
': 'en
',
207 'x
-twitter
-active
-user
': 'yes
',
208 'Referer
': 'https
://twitter
.com
/',
209 'Origin
': 'https
://twitter
.com
',
212 def build_login_json(*subtask_inputs):
214 'flow_token
': self._flow_token,
215 'subtask_inputs
': subtask_inputs
216 }, separators=(',', ':')).encode()
218 def input_dict(subtask_id, text):
220 'subtask_id
': subtask_id,
227 next_subtask = self._call_login_api(
228 'Downloading flow token
', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
230 while not self.is_logged_in:
231 if next_subtask == 'LoginJsInstrumentationSubtask
':
232 next_subtask = self._call_login_api(
233 'Submitting JS instrumentation response
', headers, data=build_login_json({
234 'subtask_id
': next_subtask,
235 'js_instrumentation
': {
241 elif next_subtask == 'LoginEnterUserIdentifierSSO
':
242 next_subtask = self._call_login_api(
243 'Submitting username
', headers, data=build_login_json({
244 'subtask_id
': next_subtask,
246 'setting_responses
': [{
247 'key
': 'user_identifier
',
258 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask
':
259 next_subtask = self._call_login_api(
260 'Submitting alternate identifier
', headers,
261 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
262 'one of username
, phone number
or email that was
not used
as --username
'))))
264 elif next_subtask == 'LoginEnterPassword
':
265 next_subtask = self._call_login_api(
266 'Submitting password
', headers, data=build_login_json({
267 'subtask_id
': next_subtask,
269 'password
': password,
274 elif next_subtask == 'AccountDuplicationCheck
':
275 next_subtask = self._call_login_api(
276 'Submitting account duplication check
', headers, data=build_login_json({
277 'subtask_id
': next_subtask,
278 'check_logged_in_account
': {
279 'link
': 'AccountDuplicationCheck_false
'
283 elif next_subtask == 'LoginTwoFactorAuthChallenge
':
284 next_subtask = self._call_login_api(
285 'Submitting
2FA token
', headers, data=build_login_json(input_dict(
286 next_subtask, self._get_tfa_info('two
-factor authentication token
'))))
288 elif next_subtask == 'LoginAcid
':
289 next_subtask = self._call_login_api(
290 'Submitting confirmation code
', headers, data=build_login_json(input_dict(
291 next_subtask, self._get_tfa_info('confirmation code sent to your email
or phone
'))))
293 elif next_subtask == 'ArkoseLogin
':
294 self.raise_login_required('Twitter
is requiring captcha
for this login attempt
', method='cookies
')
296 elif next_subtask == 'DenyLoginSubtask
':
297 self.raise_login_required('Twitter rejected this login attempt
as suspicious
', method='cookies
')
299 elif next_subtask == 'LoginSuccessSubtask
':
300 raise ExtractorError('Twitter API did
not grant auth token cookie
')
303 raise ExtractorError(f'Unrecognized subtask ID
"{next_subtask}"')
307 def _call_api(self, path, video_id, query={}, graphql=False):
308 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy
')
310 'x
-twitter
-auth
-type': 'OAuth2Session
',
311 'x
-twitter
-client
-language
': 'en
',
312 'x
-twitter
-active
-user
': 'yes
',
313 } if self.is_logged_in else {
314 'x
-guest
-token
': self._fetch_guest_token(video_id)
316 allowed_status = {400, 401, 403, 404} if graphql else {403}
317 result = self._download_json(
318 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
319 video_id, headers=headers, query=query, expected_status=allowed_status,
320 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON
')
322 if result.get('errors
'):
323 errors = ', '.join(set(traverse_obj(result, ('errors
', ..., 'message
', {str}))))
324 if errors and 'not authorized
' in errors:
325 self.raise_login_required(remove_end(errors, '.'))
326 raise ExtractorError(f'Error(s
) while querying API
: {errors or "Unknown error"}
')
330 def _build_graphql_query(self, media_id):
331 raise NotImplementedError('Method must be implemented to support GraphQL
')
333 def _call_graphql_api(self, endpoint, media_id):
334 data = self._build_graphql_query(media_id)
335 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
336 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data
')
339 class TwitterCardIE(InfoExtractor):
340 IE_NAME = 'twitter
:card
'
341 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i
/(?
:cards
/tfw
/v1|
videos(?
:/tweet
)?
)/(?P
<id>\d
+)'
344 'url
': 'https
://twitter
.com
/i
/cards
/tfw
/v1
/560070183650213889',
345 # MD5 checksums are different in different places
347 'id': '560070131976392705',
349 'title
': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments
from your perspective
.",
350 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
351 'uploader': 'Twitter',
352 'uploader_id': 'Twitter',
353 'thumbnail': r're:^https?://.*\.jpg',
355 'timestamp': 1422366112,
356 'upload_date': '20150127',
358 'comment_count': int,
362 'display_id': '560070183650213889',
363 'uploader_url': 'https://twitter.com/Twitter',
367 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
368 'md5': '7137eca597f72b9abbe61e5ae0161399',
370 'id': '623160978427936768',
372 'title': "NASA
- Fly over Pluto
's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
373 'description
': "Fly over Pluto's icy Norgay Mountains
and Sputnik Plain
in this
@NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
375 'uploader_id': 'NASA',
376 'timestamp': 1437408129,
377 'upload_date': '20150720',
378 'uploader_url': 'https://twitter.com/NASA',
380 'comment_count': int,
383 'tags': ['PlutoFlyby'],
385 'params': {'format': '[protocol=https]'}
388 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
389 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
393 'title': 'Ubuntu 11.10 Overview',
394 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
395 'upload_date': '20111013',
396 'uploader': 'OMG! UBUNTU!',
397 'uploader_id': 'omgubuntu',
398 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
399 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
400 'channel_follower_count': int,
401 'chapters': 'count:8',
402 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
404 'categories': ['Film & Animation'],
406 'comment_count': int,
407 'availability': 'public',
409 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
412 'channel': 'OMG! UBUNTU!',
413 'playable_in_embed': True,
415 'add_ie': ['Youtube'],
418 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
422 'upload_date': '20151113',
423 'uploader_id': '1189339351084113920',
424 'uploader': 'ArsenalTerje',
425 'title': 'Vine by ArsenalTerje',
426 'timestamp': 1447451307,
427 'alt_title': 'Vine by ArsenalTerje',
428 'comment_count': int,
430 'thumbnail': r
're:^https?://[^?#]+\.jpg',
435 'params': {'skip_download': 'm3u8'}
,
438 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
439 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
441 'id': '705235433198714880',
443 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
444 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
445 'uploader': 'Brent Yarina',
446 'uploader_id': 'BTNBrentYarina',
447 'timestamp': 1456976204,
448 'upload_date': '20160303',
450 'skip': 'This content is no longer available.',
453 'url': 'https://twitter.com/i/videos/752274308186120192',
454 'only_matching': True,
458 def _real_extract(self
, url
):
459 status_id
= self
._match
_id
(url
)
460 return self
.url_result(
461 'https://twitter.com/statuses/' + status_id
,
462 TwitterIE
.ie_key(), status_id
)
465 class TwitterIE(TwitterBaseIE
):
467 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
470 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
472 'id': '643211870443208704',
473 'display_id': '643211948184596480',
475 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
476 'thumbnail': r
're:^https?://.*\.jpg',
477 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
478 'channel_id': '549749560',
479 'uploader': 'FREE THE NIPPLE',
480 'uploader_id': 'freethenipple',
482 'timestamp': 1442188653,
483 'upload_date': '20150913',
484 'uploader_url': 'https://twitter.com/freethenipple',
485 'comment_count': int,
490 '_old_archive_ids': ['twitter 643211948184596480'],
492 'skip': 'Requires authentication',
494 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
495 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
497 'id': '657991469417025536',
499 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
500 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
501 'thumbnail': r
're:^https?://.*\.png',
503 'uploader_id': 'giphz',
505 'expected_warnings': ['height', 'width'],
506 'skip': 'Account suspended',
508 'url': 'https://twitter.com/starwars/status/665052190608723968',
510 'id': '665052190608723968',
511 'display_id': '665052190608723968',
513 'title': r
're:Star Wars.*A new beginning is coming December 18.*',
514 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
515 'channel_id': '20106852',
516 'uploader_id': 'starwars',
517 'uploader': r
're:Star Wars.*',
518 'timestamp': 1447395772,
519 'upload_date': '20151113',
520 'uploader_url': 'https://twitter.com/starwars',
521 'comment_count': int,
524 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
526 '_old_archive_ids': ['twitter 665052190608723968'],
529 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
531 'id': '705235433198714880',
533 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
534 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
535 'uploader_id': 'BTNBrentYarina',
536 'uploader': 'Brent Yarina',
537 'timestamp': 1456976204,
538 'upload_date': '20160303',
539 'uploader_url': 'https://twitter.com/BTNBrentYarina',
540 'comment_count': int,
547 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
548 # Test case of TwitterCardIE
549 'skip_download': True,
551 'skip': 'Dead external link',
553 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
555 'id': '700207414000242688',
556 'display_id': '700207533655363584',
558 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
559 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
560 'thumbnail': r
're:^https?://.*\.jpg',
561 'channel_id': '1383165541',
562 'uploader': 'jaydin donte geer',
563 'uploader_id': 'jaydingeer',
565 'timestamp': 1455777459,
566 'upload_date': '20160218',
567 'uploader_url': 'https://twitter.com/jaydingeer',
568 'comment_count': int,
571 'tags': ['Damndaniel'],
573 '_old_archive_ids': ['twitter 700207533655363584'],
576 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
577 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
581 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
582 'uploader': 'TAKUMA',
583 'uploader_id': '1004126642786242560',
584 'timestamp': 1402826626,
585 'upload_date': '20140615',
586 'thumbnail': r
're:^https?://.*\.jpg',
587 'alt_title': 'Vine by TAKUMA',
588 'comment_count': int,
595 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
597 'id': '717462543795523584',
598 'display_id': '719944021058060289',
600 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
601 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
602 'channel_id': '701615052',
603 'uploader_id': 'CaptainAmerica',
604 'uploader': 'Captain America',
606 'timestamp': 1460483005,
607 'upload_date': '20160412',
608 'uploader_url': 'https://twitter.com/CaptainAmerica',
609 'thumbnail': r
're:^https?://.*\.jpg',
610 'comment_count': int,
615 '_old_archive_ids': ['twitter 719944021058060289'],
618 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
620 'id': '1zqKVVlkqLaKB',
622 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
623 'upload_date': '20160923',
624 'uploader_id': '1PmKqpJdOJQoY',
625 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
626 'timestamp': 1474613214,
627 'thumbnail': r
're:^https?://.*\.jpg',
629 'add_ie': ['Periscope'],
630 'skip': 'Broadcast not found',
632 # has mp4 formats via mobile API
633 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
635 'id': '852077943283097602',
637 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
638 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
639 'channel_id': '2526757026',
640 'uploader': 'عالم الأخبار',
641 'uploader_id': 'news_al3alm',
643 'timestamp': 1492000653,
644 'upload_date': '20170412',
645 'display_id': '852138619213144067',
647 'uploader_url': 'https://twitter.com/news_al3alm',
648 'thumbnail': r
're:^https?://.*\.jpg',
652 'comment_count': int,
653 '_old_archive_ids': ['twitter 852138619213144067'],
656 'url': 'https://twitter.com/i/web/status/910031516746514432',
658 'id': '910030238373089285',
659 'display_id': '910031516746514432',
661 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
662 'thumbnail': r
're:^https?://.*\.jpg',
663 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
664 'channel_id': '2319432498',
665 'uploader': 'Préfet de Guadeloupe',
666 'uploader_id': 'Prefet971',
668 'timestamp': 1505803395,
669 'upload_date': '20170919',
670 'uploader_url': 'https://twitter.com/Prefet971',
671 'comment_count': int,
676 '_old_archive_ids': ['twitter 910031516746514432'],
679 'skip_download': True, # requires ffmpeg
682 # card via api.twitter.com/1.1/videos/tweet/config
683 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
685 'id': '1001551417340022785',
686 'display_id': '1001551623938805763',
688 'title': 're:.*?Shep is on a roll today.*?',
689 'thumbnail': r
're:^https?://.*\.jpg',
690 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
691 'channel_id': '255036353',
692 'uploader': 'Lis Power',
693 'uploader_id': 'LisPower1',
695 'timestamp': 1527623489,
696 'upload_date': '20180529',
697 'uploader_url': 'https://twitter.com/LisPower1',
698 'comment_count': int,
703 '_old_archive_ids': ['twitter 1001551623938805763'],
706 'skip_download': True, # requires ffmpeg
709 'url': 'https://twitter.com/foobar/status/1087791357756956680',
711 'id': '1087791272830607360',
712 'display_id': '1087791357756956680',
714 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
715 'thumbnail': r
're:^https?://.*\.jpg',
716 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
720 'timestamp': 1548184644,
721 'upload_date': '20190122',
722 'uploader_url': 'https://twitter.com/X',
723 'comment_count': int,
730 'skip': 'This Tweet is unavailable',
732 # not available in Periscope
733 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
735 'id': '1vOGwqejwoWxB',
737 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
739 'uploader_id': '1eVjYOLGkGrQL',
740 'thumbnail': r
're:^https?://.*\.jpg',
741 'tags': ['EduTECH2019'],
744 'add_ie': ['TwitterBroadcast'],
745 'skip': 'Broadcast no longer exists',
748 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
750 'id': '1349774757969989634',
751 'display_id': '1349794411333394432',
753 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
754 'thumbnail': r
're:^https?://.*\.jpg',
755 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
756 'channel_id': '18552281',
757 'uploader': 'Brooklyn Nets',
758 'uploader_id': 'BrooklynNets',
760 'timestamp': 1610651040,
761 'upload_date': '20210114',
762 'uploader_url': 'https://twitter.com/BrooklynNets',
763 'comment_count': int,
768 '_old_archive_ids': ['twitter 1349794411333394432'],
771 'skip_download': True,
774 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
776 'id': '1577855447914409984',
777 'display_id': '1577855540407197696',
779 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
780 'description': 'md5:b9c3699335447391d11753ab21c70a74',
781 'upload_date': '20221006',
782 'channel_id': '143077138',
783 'uploader': 'Oshtru',
784 'uploader_id': 'oshtru',
785 'uploader_url': 'https://twitter.com/oshtru',
786 'thumbnail': r
're:^https?://.*\.jpg',
788 'timestamp': 1665025050,
789 'comment_count': int,
794 '_old_archive_ids': ['twitter 1577855540407197696'],
796 'params': {'skip_download': True}
,
798 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
800 'id': '1577719286659006464',
801 'title': 'Ultima Reload - Test',
802 'description': 'Test https://t.co/Y3KEZD7Dad',
803 'channel_id': '168922496',
804 'uploader': 'Ultima Reload',
805 'uploader_id': 'UltimaShadowX',
806 'uploader_url': 'https://twitter.com/UltimaShadowX',
807 'upload_date': '20221005',
808 'timestamp': 1664992565,
809 'comment_count': int,
816 'params': {'skip_download': True}
,
818 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
820 'id': '1575559336759263233',
821 'display_id': '1575560063510810624',
823 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
824 'thumbnail': r
're:^https?://.*\.jpg',
825 'description': 'md5:95aea692fda36a12081b9629b02daa92',
826 'channel_id': '1094109584',
827 'uploader': 'Max Olson',
828 'uploader_id': 'MesoMax919',
829 'uploader_url': 'https://twitter.com/MesoMax919',
831 'timestamp': 1664477766,
832 'upload_date': '20220929',
833 'comment_count': int,
836 'tags': ['HurricaneIan'],
838 '_old_archive_ids': ['twitter 1575560063510810624'],
841 # Adult content, fails if not logged in
842 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
844 'id': '1575199163847000068',
845 'display_id': '1575199173472927762',
849 'channel_id': '1217167793541480450',
851 'uploader_id': 'Rizdraws',
852 'uploader_url': 'https://twitter.com/Rizdraws',
853 'upload_date': '20220928',
854 'timestamp': 1664391723,
855 'thumbnail': r
're:^https?://.+\.jpg',
858 'comment_count': int,
861 '_old_archive_ids': ['twitter 1575199173472927762'],
863 'params': {'skip_download': 'The media could not be played'}
,
864 'skip': 'Requires authentication',
866 # Playlist result only with graphql API
867 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
868 'playlist_mincount': 2,
870 'id': '1395079556562706435',
873 'channel_id': '21539378',
876 'upload_date': '20210519',
879 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
880 'uploader_id': 'Srirachachau',
881 'comment_count': int,
882 'uploader_url': 'https://twitter.com/Srirachachau',
883 'timestamp': 1621447860,
886 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
887 'playlist_mincount': 2,
889 'id': '1578353380363501568',
891 'channel_id': '2195866214',
892 'uploader_id': 'DavidToons_',
896 'timestamp': 1665143744,
897 'uploader_url': 'https://twitter.com/DavidToons_',
898 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
900 'comment_count': int,
901 'upload_date': '20221007',
905 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
908 'id': '1578401165338976258',
910 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
911 'channel_id': '19338359',
913 'uploader_id': 'primevideouk',
914 'timestamp': 1665155137,
915 'upload_date': '20221007',
917 'uploader_url': 'https://twitter.com/primevideouk',
918 'comment_count': int,
921 'tags': ['TheRingsOfPower'],
925 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
927 'id': '1lPJqmBeeNAJb',
929 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
930 'uploader': r
're:Monique Camarra.+?',
931 'uploader_id': 'MoniqueCamarra',
932 'live_status': 'was_live',
933 'release_timestamp': 1658417414,
934 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
935 'timestamp': 1658407771,
936 'release_date': '20220721',
937 'upload_date': '20220721',
939 'add_ie': ['TwitterSpaces'],
940 'params': {'skip_download': 'm3u8'}
,
941 'skip': 'Requires authentication',
943 # URL specifies video number but --yes-playlist
944 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
945 'playlist_mincount': 2,
947 'id': '1600649710662213632',
948 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
949 'timestamp': 1670459604.0,
950 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
951 'comment_count': int,
952 'uploader_id': 'CTVJLaidlaw',
953 'channel_id': '80082014',
955 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
956 'upload_date': '20221208',
958 'uploader': 'Jocelyn Laidlaw',
959 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
963 # URL specifies video number and --no-playlist
964 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
966 'id': '1600649511827013632',
968 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
969 'thumbnail': r
're:^https?://.+\.jpg',
970 'timestamp': 1670459604.0,
971 'channel_id': '80082014',
972 'uploader_id': 'CTVJLaidlaw',
973 'uploader': 'Jocelyn Laidlaw',
975 'comment_count': int,
976 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
978 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
979 'display_id': '1600649710662213632',
981 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
982 'upload_date': '20221208',
984 '_old_archive_ids': ['twitter 1600649710662213632'],
986 'params': {'noplaylist': True}
,
988 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
989 # note the id different between extraction and url
990 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
992 'id': '1621117577354424321',
993 'display_id': '1621117700482416640',
995 'title': '뽀 - 아 최우제 이동속도 봐',
996 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
998 'channel_id': '1281839411068432384',
1000 'uploader_id': 's2FAKER',
1001 'uploader_url': 'https://twitter.com/s2FAKER',
1002 'upload_date': '20230202',
1003 'timestamp': 1675339553.0,
1004 'thumbnail': r
're:https?://pbs\.twimg\.com/.+',
1008 'repost_count': int,
1009 'comment_count': int,
1010 '_old_archive_ids': ['twitter 1621117700482416640'],
1012 'skip': 'Requires authentication',
1014 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
1016 'id': '1599108643743473680',
1017 'display_id': '1599108751385972737',
1019 'title': '\u06ea - \U0001F48B',
1020 'channel_id': '1347791436809441283',
1021 'uploader_url': 'https://twitter.com/hlo_again',
1023 'uploader_id': 'hlo_again',
1024 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1025 'repost_count': int,
1027 'comment_count': int,
1028 'upload_date': '20221203',
1030 'timestamp': 1670092210.0,
1032 'uploader': '\u06ea',
1033 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1034 '_old_archive_ids': ['twitter 1599108751385972737'],
1036 'params': {'noplaylist': True}
,
1038 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1040 'id': '1600009362759733248',
1041 'display_id': '1600009574919962625',
1043 'channel_id': '211814412',
1044 'uploader_url': 'https://twitter.com/MunTheShinobi',
1045 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1046 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1049 'repost_count': int,
1050 'upload_date': '20221206',
1051 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1052 'comment_count': int,
1055 'uploader_id': 'MunTheShinobi',
1056 'duration': 139.987,
1057 'timestamp': 1670306984.0,
1058 '_old_archive_ids': ['twitter 1600009574919962625'],
1061 # retweeted_status (private)
1062 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1064 'id': '1623274794488659969',
1065 'display_id': '1623739803874349067',
1067 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1068 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1069 'uploader': 'Johnny Bullets',
1070 'uploader_id': 'Johnnybull3ts',
1071 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1075 'timestamp': 1675853859.0,
1076 'upload_date': '20230208',
1077 'thumbnail': r
're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1079 'repost_count': int,
1081 'skip': 'Protected tweet',
1084 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1086 'id': '1694928337846538240',
1088 'display_id': '1695424220702888009',
1089 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1090 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1091 'channel_id': '15212187',
1092 'uploader': 'Benny Johnson',
1093 'uploader_id': 'bennyjohnson',
1094 'uploader_url': 'https://twitter.com/bennyjohnson',
1098 'timestamp': 1692962814.0,
1099 'upload_date': '20230825',
1100 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1102 'repost_count': int,
1103 'comment_count': int,
1104 '_old_archive_ids': ['twitter 1695424220702888009'],
1107 # retweeted_status w/ legacy API
1108 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1110 'id': '1694928337846538240',
1112 'display_id': '1695424220702888009',
1113 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1114 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1115 'channel_id': '15212187',
1116 'uploader': 'Benny Johnson',
1117 'uploader_id': 'bennyjohnson',
1118 'uploader_url': 'https://twitter.com/bennyjohnson',
1122 'timestamp': 1692962814.0,
1123 'upload_date': '20230825',
1124 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1126 'repost_count': int,
1127 '_old_archive_ids': ['twitter 1695424220702888009'],
1129 'params': {'extractor_args': {'twitter': {'api': ['legacy']}
}},
1131 # Broadcast embedded in tweet
1132 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1134 'id': '1rmxPMjLzAXKN',
1136 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1137 'uploader': 'Jessica Dobson',
1138 'uploader_id': 'JessicaDobsonWX',
1139 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1140 'timestamp': 1701566398,
1141 'upload_date': '20231203',
1142 'live_status': 'was_live',
1143 'thumbnail': r
're:https://[^/]+pscp\.tv/.+\.jpg',
1144 'concurrent_view_count': int,
1147 'add_ie': ['TwitterBroadcast'],
1149 # Animated gif and quote tweet video
1150 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1151 'playlist_mincount': 2,
1153 'id': '1696256659889565950',
1154 'title': 'BAKOON - https://t.co/zom968d0a0',
1155 'description': 'https://t.co/zom968d0a0',
1157 'channel_id': '1263540390',
1158 'uploader': 'BAKOON',
1159 'uploader_id': 'BAKKOOONN',
1160 'uploader_url': 'https://twitter.com/BAKKOOONN',
1162 'timestamp': 1693254077.0,
1163 'upload_date': '20230828',
1165 'comment_count': int,
1166 'repost_count': int,
1168 'skip': 'Requires authentication',
1170 # "stale tweet" with typename "TweetWithVisibilityResults"
1171 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1172 'md5': '511377ff8dfa7545307084dca4dce319',
1174 'id': '1724883339285544960',
1176 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1177 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1178 'display_id': '1724884212803834154',
1179 'channel_id': '337808606',
1180 'uploader': 'Robert F. Kennedy Jr',
1181 'uploader_id': 'RobertKennedyJr',
1182 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1183 'upload_date': '20231115',
1184 'timestamp': 1700079417.0,
1185 'duration': 341.048,
1186 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1187 'tags': ['Kennedy24'],
1188 'repost_count': int,
1190 'comment_count': int,
1192 '_old_archive_ids': ['twitter 1724884212803834154'],
1196 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1197 'only_matching': True,
1200 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1201 'only_matching': True,
1203 # promo_video_website card
1204 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1205 'only_matching': True,
1207 # promo_video_convo card
1208 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1209 'only_matching': True,
1212 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1213 'only_matching': True,
1215 # video_direct_message card
1216 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1217 'only_matching': True,
1219 # poll2choice_video card
1220 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1221 'only_matching': True,
1223 # poll3choice_video card
1224 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1225 'only_matching': True,
1227 # poll4choice_video card
1228 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1229 'only_matching': True,
1232 _MEDIA_ID_RE
= re
.compile(r
'_video/(\d+)/')
1235 def _GRAPHQL_ENDPOINT(self
):
1236 if self
.is_logged_in
:
1237 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1238 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1240 def _graphql_to_legacy(self
, data
, twid
):
1241 result
= traverse_obj(data
, (
1242 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1243 lambda _
, v
: v
['entryId'] == f
'tweet-{twid}', 'content', 'itemContent',
1244 'tweet_results', 'result', ('tweet', None), {dict}
,
1245 ), default
={}, get_all
=False) if self
.is_logged_in
else traverse_obj(
1246 data
, ('tweetResult', 'result', {dict}
), default
={})
1248 typename
= result
.get('__typename')
1249 if typename
not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1250 self
.report_warning(f
'Unknown typename: {typename}', twid
, only_once
=True)
1252 if 'tombstone' in result
:
1253 cause
= remove_end(traverse_obj(result
, ('tombstone', 'text', 'text', {str}
)), '. Learn more')
1254 raise ExtractorError(f
'Twitter API says: {cause or "Unknown error"}', expected
=True)
1255 elif typename
== 'TweetUnavailable':
1256 reason
= result
.get('reason')
1257 if reason
== 'NsfwLoggedOut':
1258 self
.raise_login_required('NSFW tweet requires authentication')
1259 elif reason
== 'Protected':
1260 self
.raise_login_required('You are not authorized to view this protected tweet')
1261 raise ExtractorError(reason
or 'Requested tweet is unavailable', expected
=True)
1262 # Result for "stale tweet" needs additional transformation
1263 elif typename
== 'TweetWithVisibilityResults':
1264 result
= traverse_obj(result
, ('tweet', {dict}
)) or {}
1266 status
= result
.get('legacy', {})
1267 status
.update(traverse_obj(result
, {
1268 'user': ('core', 'user_results', 'result', 'legacy'),
1269 'card': ('card', 'legacy'),
1270 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1271 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1272 }, expected_type
=dict, default
={}))
1274 # extra transformations needed since result does not match legacy format
1275 if status
.get('retweeted_status'):
1276 status
['retweeted_status']['user'] = traverse_obj(status
, (
1277 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict}
)) or {}
1280 binding_value
.get('key'): binding_value
.get('value')
1281 for binding_value
in traverse_obj(status
, ('card', 'binding_values', ..., {dict}
))
1284 status
['card']['binding_values'] = binding_values
1288 def _build_graphql_query(self
, media_id
):
1291 'focalTweetId': media_id
,
1292 'includePromotedContent': True,
1293 'with_rux_injections': False,
1294 'withBirdwatchNotes': True,
1295 'withCommunity': True,
1296 'withDownvotePerspective': False,
1297 'withQuickPromoteEligibilityTweetFields': True,
1298 'withReactionsMetadata': False,
1299 'withReactionsPerspective': False,
1300 'withSuperFollowsTweetFields': True,
1301 'withSuperFollowsUserFields': True,
1302 'withV2Timeline': True,
1306 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1307 'interactive_text_enabled': True,
1308 'responsive_web_edit_tweet_api_enabled': True,
1309 'responsive_web_enhance_cards_enabled': True,
1310 'responsive_web_graphql_timeline_navigation_enabled': False,
1311 'responsive_web_text_conversations_enabled': False,
1312 'responsive_web_uc_gql_enabled': True,
1313 'standardized_nudges_misinfo': True,
1314 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1315 'tweetypie_unmention_optimization_enabled': True,
1316 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1317 'verified_phone_label_enabled': False,
1318 'vibe_api_enabled': True,
1320 } if self
.is_logged_in
else {
1322 'tweetId': media_id
,
1323 'withCommunity': False,
1324 'includePromotedContent': False,
1328 'creator_subscriptions_tweet_preview_api_enabled': True,
1329 'tweetypie_unmention_optimization_enabled': True,
1330 'responsive_web_edit_tweet_api_enabled': True,
1331 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1332 'view_counts_everywhere_api_enabled': True,
1333 'longform_notetweets_consumption_enabled': True,
1334 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1335 'tweet_awards_web_tipping_enabled': False,
1336 'freedom_of_speech_not_reach_fetch_enabled': True,
1337 'standardized_nudges_misinfo': True,
1338 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1339 'longform_notetweets_rich_text_read_enabled': True,
1340 'longform_notetweets_inline_media_enabled': True,
1341 'responsive_web_graphql_exclude_directive_enabled': True,
1342 'verified_phone_label_enabled': False,
1343 'responsive_web_media_download_video_enabled': False,
1344 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1345 'responsive_web_graphql_timeline_navigation_enabled': True,
1346 'responsive_web_enhance_cards_enabled': False
1349 'withArticleRichContentState': False
1353 def _call_syndication_api(self
, twid
):
1354 self
.report_warning(
1355 'Not all metadata or media is available via syndication endpoint', twid
, only_once
=True)
1356 status
= self
._download
_json
(
1357 'https://cdn.syndication.twimg.com/tweet-result', twid
, 'Downloading syndication JSON',
1358 headers
={'User-Agent': 'Googlebot'}
, query
={
1360 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1361 'token': ''.join(random
.choices('123456789abcdefghijklmnopqrstuvwxyz', k
=10)),
1364 raise ExtractorError('Syndication endpoint returned empty JSON response')
1365 # Transform the result so its structure matches that of legacy/graphql
1367 for detail
in traverse_obj(status
, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict}
)):
1368 detail
['id_str'] = traverse_obj(detail
, (
1369 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}
, 1), get_all
=False) or twid
1370 media
.append(detail
)
1371 status
['extended_entities'] = {'media': media}
1375 def _extract_status(self
, twid
):
1376 if self
._selected
_api
not in ('graphql', 'legacy', 'syndication'):
1377 raise ExtractorError(f
'{self._selected_api!r} is not a valid API selection', expected
=True)
1380 if self
.is_logged_in
or self
._selected
_api
== 'graphql':
1381 status
= self
._graphql
_to
_legacy
(self
._call
_graphql
_api
(self
._GRAPHQL
_ENDPOINT
, twid
), twid
)
1382 elif self
._selected
_api
== 'legacy':
1383 status
= self
._call
_api
(f
'statuses/show/{twid}.json', twid
, {
1384 'cards_platform': 'Web-12',
1386 'include_reply_count': 1,
1387 'include_user_entities': 0,
1388 'tweet_mode': 'extended',
1390 except ExtractorError
as e
:
1391 if not isinstance(e
.cause
, HTTPError
) or not e
.cause
.status
== 429:
1393 self
.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1394 status
= self
._call
_syndication
_api
(twid
)
1396 if self
._selected
_api
== 'syndication':
1397 status
= self
._call
_syndication
_api
(twid
)
1399 return traverse_obj(status
, 'retweeted_status', None, expected_type
=dict) or {}
1401 def _real_extract(self
, url
):
1402 twid
, selected_index
= self
._match
_valid
_url
(url
).group('id', 'index')
1403 status
= self
._extract
_status
(twid
)
1405 title
= description
= traverse_obj(
1406 status
, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}
), get_all
=False) or ''
1407 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1408 title
= re
.sub(r
'\s+(https?://[^ ]+)', '', title
)
1409 user
= status
.get('user') or {}
1410 uploader
= user
.get('name')
1412 title
= f
'{uploader} - {title}'
1413 uploader_id
= user
.get('screen_name')
1418 'description': description
,
1419 'uploader': uploader
,
1420 'timestamp': unified_timestamp(status
.get('created_at')),
1421 'channel_id': str_or_none(status
.get('user_id_str')) or str_or_none(user
.get('id_str')),
1422 'uploader_id': uploader_id
,
1423 'uploader_url': format_field(uploader_id
, None, 'https://twitter.com/%s'),
1424 'like_count': int_or_none(status
.get('favorite_count')),
1425 'repost_count': int_or_none(status
.get('retweet_count')),
1426 'comment_count': int_or_none(status
.get('reply_count')),
1427 'age_limit': 18 if status
.get('possibly_sensitive') else 0,
1428 'tags': traverse_obj(status
, ('entities', 'hashtags', ..., 'text')),
1431 def extract_from_video_info(media
):
1432 media_id
= traverse_obj(media
, 'id_str', 'id', expected_type
=str_or_none
)
1433 self
.write_debug(f
'Extracting from video info: {media_id}')
1437 for variant
in traverse_obj(media
, ('video_info', 'variants', ...)):
1438 fmts
, subs
= self
._extract
_variant
_formats
(variant
, twid
)
1439 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
1440 formats
.extend(fmts
)
1443 media_url
= media
.get('media_url_https') or media
.get('media_url')
1445 def add_thumbnail(name
, size
):
1448 'url': update_url_query(media_url
, {'name': name}
),
1449 'width': int_or_none(size
.get('w') or size
.get('width')),
1450 'height': int_or_none(size
.get('h') or size
.get('height')),
1452 for name
, size
in media
.get('sizes', {}).items():
1453 add_thumbnail(name
, size
)
1454 add_thumbnail('orig', media
.get('original_info') or {})
1459 'subtitles': subtitles
,
1460 'thumbnails': thumbnails
,
1461 'view_count': traverse_obj(media
, ('mediaStats', 'viewCount', {int_or_none}
)), # No longer available
1462 'duration': float_or_none(traverse_obj(media
, ('video_info', 'duration_millis')), 1000),
1463 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1464 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1467 def extract_from_card_info(card
):
1471 self
.write_debug(f
'Extracting from card info: {card.get("url")}')
1472 binding_values
= card
['binding_values']
1474 def get_binding_value(k
):
1475 o
= binding_values
.get(k
) or {}
1476 return try_get(o
, lambda x
: x
[x
['type'].lower() + '_value'])
1478 card_name
= card
['name'].split(':')[-1]
1479 if card_name
== 'player':
1482 'url': get_binding_value('player_url'),
1484 elif card_name
== 'periscope_broadcast':
1487 'url': get_binding_value('url') or get_binding_value('player_url'),
1488 'ie_key': PeriscopeIE
.ie_key(),
1490 elif card_name
== 'broadcast':
1493 'url': get_binding_value('broadcast_url'),
1494 'ie_key': TwitterBroadcastIE
.ie_key(),
1496 elif card_name
== 'audiospace':
1499 'url': f
'https://twitter.com/i/spaces/{get_binding_value("id")}',
1500 'ie_key': TwitterSpacesIE
.ie_key(),
1502 elif card_name
== 'summary':
1505 'url': get_binding_value('card_url'),
1507 elif card_name
== 'unified_card':
1508 unified_card
= self
._parse
_json
(get_binding_value('unified_card'), twid
)
1509 yield from map(extract_from_video_info
, traverse_obj(
1510 unified_card
, ('media_entities', ...), expected_type
=dict))
1511 # amplify, promo_video_website, promo_video_convo, appplayer,
1512 # video_direct_message, poll2choice_video, poll3choice_video,
1513 # poll4choice_video, ...
1515 is_amplify
= card_name
== 'amplify'
1516 vmap_url
= get_binding_value('amplify_url_vmap') if is_amplify
else get_binding_value('player_stream_url')
1517 content_id
= get_binding_value('%s_content_id' % (card_name
if is_amplify
else 'player'))
1518 formats
, subtitles
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, content_id
or twid
)
1521 for suffix
in ('_small', '', '_large', '_x_large', '_original'):
1522 image
= get_binding_value('player_image' + suffix
) or {}
1523 image_url
= image
.get('url')
1524 if not image_url
or '/player-placeholder' in image_url
:
1527 'id': suffix
[1:] if suffix
else 'medium',
1529 'width': int_or_none(image
.get('width')),
1530 'height': int_or_none(image
.get('height')),
1535 'subtitles': subtitles
,
1536 'thumbnails': thumbnails
,
1537 'duration': int_or_none(get_binding_value(
1538 'content_duration_seconds')),
1541 videos
= traverse_obj(status
, (
1542 (None, 'quoted_status'), 'extended_entities', 'media', lambda _
, m
: m
['type'] != 'photo', {dict}
))
1544 if self
._yes
_playlist
(twid
, selected_index
, video_label
='URL-specified video number'):
1545 selected_entries
= (*map(extract_from_video_info
, videos
), *extract_from_card_info(status
.get('card')))
1547 desired_obj
= traverse_obj(status
, (
1548 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index
) - 1, {dict}
), get_all
=False)
1550 raise ExtractorError(f
'Video #{selected_index} is unavailable', expected
=True)
1551 elif desired_obj
.get('type') != 'video':
1552 raise ExtractorError(f
'Media #{selected_index} is not a video', expected
=True)
1554 # Restore original archive id and video index in title
1555 for index
, entry
in enumerate(videos
, 1):
1556 if entry
.get('id') != desired_obj
.get('id'):
1559 info
['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1560 if len(videos
) != 1:
1561 info
['title'] += f
' #{index}'
1564 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1566 entries
= [{**info, **data, 'display_id': twid}
for data
in selected_entries
]
1568 expanded_url
= traverse_obj(status
, ('entities', 'urls', 0, 'expanded_url'), expected_type
=url_or_none
)
1569 if not expanded_url
or expanded_url
== url
:
1570 self
.raise_no_formats('No video could be found in this tweet', expected
=True)
1573 return self
.url_result(expanded_url
, display_id
=twid
, **info
)
1575 entries
[0]['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1577 if len(entries
) == 1:
1580 for index
, entry
in enumerate(entries
, 1):
1581 entry
['title'] += f
' #{index}'
1583 return self
.playlist_result(entries
, **info
)
1586 class TwitterAmplifyIE(TwitterBaseIE
):
1587 IE_NAME
= 'twitter:amplify'
1588 _VALID_URL
= r
'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1591 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1592 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1594 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1596 'title': 'Twitter Video',
1597 'thumbnail': 're:^https?://.*',
1599 'params': {'format': '[protocol=https]'}
,
1602 def _real_extract(self
, url
):
1603 video_id
= self
._match
_id
(url
)
1604 webpage
= self
._download
_webpage
(url
, video_id
)
1606 vmap_url
= self
._html
_search
_meta
(
1607 'twitter:amplify:vmap', webpage
, 'vmap url')
1608 formats
, _
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, video_id
)
1611 thumbnail
= self
._html
_search
_meta
(
1612 'twitter:image:src', webpage
, 'thumbnail', fatal
=False)
1614 def _find_dimension(target
):
1615 w
= int_or_none(self
._html
_search
_meta
(
1616 'twitter:%s:width' % target
, webpage
, fatal
=False))
1617 h
= int_or_none(self
._html
_search
_meta
(
1618 'twitter:%s:height' % target
, webpage
, fatal
=False))
1622 thumbnail_w
, thumbnail_h
= _find_dimension('image')
1625 'width': thumbnail_w
,
1626 'height': thumbnail_h
,
1629 video_w
, video_h
= _find_dimension('player')
1637 'title': 'Twitter Video',
1639 'thumbnails': thumbnails
,
1643 class TwitterBroadcastIE(TwitterBaseIE
, PeriscopeBaseIE
):
1644 IE_NAME
= 'twitter:broadcast'
1645 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1648 # untitled Periscope video
1649 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1651 'id': '1yNGaQLWpejGj',
1653 'title': 'Andrea May Sahouri - Periscope Broadcast',
1654 'uploader': 'Andrea May Sahouri',
1655 'uploader_id': 'andreamsahouri',
1656 'uploader_url': 'https://twitter.com/andreamsahouri',
1657 'timestamp': 1590973638,
1658 'upload_date': '20200601',
1659 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1663 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1665 'id': '1ZkKzeyrPbaxv',
1667 'title': 'Starship | SN10 | High-Altitude Flight Test',
1668 'uploader': 'SpaceX',
1669 'uploader_id': 'SpaceX',
1670 'uploader_url': 'https://twitter.com/SpaceX',
1671 'timestamp': 1614812942,
1672 'upload_date': '20210303',
1673 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1677 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1679 'id': '1OyKAVQrgzwGb',
1681 'title': 'Starship Flight Test',
1682 'uploader': 'SpaceX',
1683 'uploader_id': 'SpaceX',
1684 'uploader_url': 'https://twitter.com/SpaceX',
1685 'timestamp': 1681993964,
1686 'upload_date': '20230420',
1687 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1692 def _real_extract(self
, url
):
1693 broadcast_id
= self
._match
_id
(url
)
1694 broadcast
= self
._call
_api
(
1695 'broadcasts/show.json', broadcast_id
,
1696 {'ids': broadcast_id}
)['broadcasts'][broadcast_id
]
1698 raise ExtractorError('Broadcast no longer exists', expected
=True)
1699 info
= self
._parse
_broadcast
_data
(broadcast
, broadcast_id
)
1700 info
['title'] = broadcast
.get('status') or info
.get('title')
1701 info
['uploader_id'] = broadcast
.get('twitter_username') or info
.get('uploader_id')
1702 info
['uploader_url'] = format_field(broadcast
, 'twitter_username', 'https://twitter.com/%s', default
=None)
1703 if info
['live_status'] == 'is_upcoming':
1706 media_key
= broadcast
['media_key']
1707 source
= self
._call
_api
(
1708 f
'live_video_stream/status/{media_key}', media_key
)['source']
1709 m3u8_url
= source
.get('noRedirectPlaybackUrl') or source
['location']
1710 if '/live_video_stream/geoblocked/' in m3u8_url
:
1711 self
.raise_geo_restricted()
1712 m3u8_id
= compat_parse_qs(compat_urllib_parse_urlparse(
1713 m3u8_url
).query
).get('type', [None])[0]
1714 state
, width
, height
= self
._extract
_common
_format
_info
(broadcast
)
1715 info
['formats'] = self
._extract
_pscp
_m
3u8_formats
(
1716 m3u8_url
, broadcast_id
, m3u8_id
, state
, width
, height
)
1720 class TwitterSpacesIE(TwitterBaseIE
):
1721 IE_NAME
= 'twitter:spaces'
1722 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1725 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1727 'id': '1RDxlgyvNXzJL',
1729 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1730 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1731 'uploader': r
're:Lucio Di Gaetano.*?',
1732 'uploader_id': 'luciodigaetano',
1733 'live_status': 'was_live',
1734 'timestamp': 1659877956,
1735 'upload_date': '20220807',
1736 'release_timestamp': 1659904215,
1737 'release_date': '20220807',
1739 'params': {'skip_download': 'm3u8'}
,
1741 # post_live/TimedOut but downloadable
1742 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1744 'id': '1vAxRAVQWONJl',
1746 'title': 'Framing Up FinOps: Billing Tools',
1747 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1748 'uploader': 'Google Cloud',
1749 'uploader_id': 'googlecloud',
1750 'live_status': 'post_live',
1751 'timestamp': 1681409554,
1752 'upload_date': '20230413',
1753 'release_timestamp': 1681839000,
1754 'release_date': '20230418',
1756 'params': {'skip_download': 'm3u8'}
,
1758 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1759 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1761 'id': '1eaKbrQbjoRKX',
1764 'description': 'Twitter Space participated by nobody yet',
1765 'uploader': '息根とめる🔪Twitchで復活',
1766 'uploader_id': 'tomeru_ikinone',
1767 'live_status': 'was_live',
1768 'timestamp': 1685617198,
1769 'upload_date': '20230601',
1771 'params': {'skip_download': 'm3u8'}
,
1775 'notstarted': 'is_upcoming',
1776 'ended': 'was_live',
1777 'running': 'is_live',
1778 'timedout': 'post_live',
1781 def _build_graphql_query(self
, space_id
):
1785 'isMetatagsQuery': True,
1786 'withDownvotePerspective': False,
1787 'withReactionsMetadata': False,
1788 'withReactionsPerspective': False,
1789 'withReplays': True,
1790 'withSuperFollowsUserFields': True,
1791 'withSuperFollowsTweetFields': True,
1794 'dont_mention_me_view_api_enabled': True,
1795 'interactive_text_enabled': True,
1796 'responsive_web_edit_tweet_api_enabled': True,
1797 'responsive_web_enhance_cards_enabled': True,
1798 'responsive_web_uc_gql_enabled': True,
1799 'spaces_2022_h2_clipping': True,
1800 'spaces_2022_h2_spaces_communities': False,
1801 'standardized_nudges_misinfo': True,
1802 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1803 'vibe_api_enabled': True,
1807 def _real_extract(self
, url
):
1808 space_id
= self
._match
_id
(url
)
1809 if not self
.is_logged_in
:
1810 self
.raise_login_required('Twitter Spaces require authentication')
1811 space_data
= self
._call
_graphql
_api
('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id
)['audioSpace']
1813 raise ExtractorError('Twitter Space not found', expected
=True)
1815 metadata
= space_data
['metadata']
1816 live_status
= try_call(lambda: self
.SPACE_STATUS
[metadata
['state'].lower()])
1817 is_live
= live_status
== 'is_live'
1820 headers
= {'Referer': 'https://twitter.com/'}
1821 if live_status
== 'is_upcoming':
1822 self
.raise_no_formats('Twitter Space not started yet', expected
=True)
1823 elif not is_live
and not metadata
.get('is_space_available_for_replay'):
1824 self
.raise_no_formats('Twitter Space ended and replay is disabled', expected
=True)
1825 elif metadata
.get('media_key'):
1826 source
= traverse_obj(
1827 self
._call
_api
(f
'live_video_stream/status/{metadata["media_key"]}', metadata
['media_key']),
1828 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}
), get_all
=False)
1829 formats
= self
._extract
_m
3u8_formats
( # XXX: Some Spaces need ffmpeg as downloader
1830 source
, metadata
['media_key'], 'm4a', entry_protocol
='m3u8', live
=is_live
,
1831 headers
=headers
, fatal
=False) if source
else []
1833 fmt
.update({'vcodec': 'none', 'acodec': 'aac'}
)
1835 fmt
['container'] = 'm4a_dash'
1837 participants
= ', '.join(traverse_obj(
1838 space_data
, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1840 if not formats
and live_status
== 'post_live':
1841 self
.raise_no_formats('Twitter Space ended but not downloadable yet', expected
=True)
1845 'title': metadata
.get('title'),
1846 'description': f
'Twitter Space participated by {participants}',
1847 'uploader': traverse_obj(
1848 metadata
, ('creator_results', 'result', 'legacy', 'name')),
1849 'uploader_id': traverse_obj(
1850 metadata
, ('creator_results', 'result', 'legacy', 'screen_name')),
1851 'live_status': live_status
,
1852 'release_timestamp': try_call(
1853 lambda: int_or_none(metadata
['scheduled_start'], scale
=1000)),
1854 'timestamp': int_or_none(metadata
.get('created_at'), scale
=1000),
1856 'http_headers': headers
,
1860 class TwitterShortenerIE(TwitterBaseIE
):
1861 IE_NAME
= 'twitter:shortener'
1862 _VALID_URL
= r
'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1863 _BASE_URL
= 'https://t.co/'
1865 def _real_extract(self
, url
):
1866 mobj
= self
._match
_valid
_url
(url
)
1867 eid
, id = mobj
.group('eid', 'id')
1870 url
= self
._BASE
_URL
+ id
1871 new_url
= self
._request
_webpage
(url
, id, headers
={'User-Agent': 'curl'}
).url
1872 __UNSAFE_LINK
= "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1873 if new_url
.startswith(__UNSAFE_LINK
):
1874 new_url
= new_url
.replace(__UNSAFE_LINK
, "")
1875 return self
.url_result(new_url
)