5 from .common
import InfoExtractor
6 from .periscope
import PeriscopeBaseIE
, PeriscopeIE
9 compat_urllib_parse_unquote
,
10 compat_urllib_parse_urlparse
,
33 class TwitterBaseIE(InfoExtractor
):
34 _NETRC_MACHINE
= 'twitter'
35 _API_BASE
= 'https://api.twitter.com/1.1/'
36 _GRAPHQL_API_BASE
= 'https://twitter.com/i/api/graphql/'
37 _BASE_REGEX
= r
'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
38 _AUTH
= 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
39 _LEGACY_AUTH
= 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
42 _LOGIN_INIT_DATA
= json
.dumps({
45 'debug_overrides': {},
54 'app_download_cta': 1,
55 'check_logged_in_account': 1,
56 'choice_selection': 3,
57 'contacts_live_sync_permission_prompt': 0,
59 'email_verification': 2,
69 'in_app_notification': 1,
71 'js_instrumentation': 1,
73 'notifications_permission_prompt': 2,
75 'open_home_timeline': 1,
77 'phone_verification': 4,
86 'tweet_selection_urt': 1,
89 'user_recommendations_list': 4,
90 'user_recommendations_urt': 1,
94 }, separators
=(',', ':')).encode()
96 def _extract_variant_formats(self
, variant
, video_id
):
97 variant_url
= variant
.get('url')
100 elif '.m3u8' in variant_url
:
101 return self
._extract
_m
3u8_formats
_and
_subtitles
(
102 variant_url
, video_id
, 'mp4', 'm3u8_native',
103 m3u8_id
='hls', fatal
=False)
105 tbr
= int_or_none(dict_get(variant
, ('bitrate', 'bit_rate')), 1000) or None
108 'format_id': 'http' + ('-%d' % tbr
if tbr
else ''),
111 self
._search
_dimensions
_in
_video
_url
(f
, variant_url
)
114 def _extract_formats_from_vmap_url(self
, vmap_url
, video_id
):
115 vmap_url
= url_or_none(vmap_url
)
118 vmap_data
= self
._download
_xml
(vmap_url
, video_id
)
122 for video_variant
in vmap_data
.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
123 video_variant
.attrib
['url'] = compat_urllib_parse_unquote(
124 video_variant
.attrib
['url'])
125 urls
.append(video_variant
.attrib
['url'])
126 fmts
, subs
= self
._extract
_variant
_formats
(
127 video_variant
.attrib
, video_id
)
129 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
130 video_url
= strip_or_none(xpath_text(vmap_data
, './/MediaFile'))
131 if video_url
not in urls
:
132 fmts
, subs
= self
._extract
_variant
_formats
({'url': video_url}
, video_id
)
134 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
135 return formats
, subtitles
138 def _search_dimensions_in_video_url(a_format
, video_url
):
139 m
= re
.search(r
'/(?P<width>\d+)x(?P<height>\d+)/', video_url
)
142 'width': int(m
.group('width')),
143 'height': int(m
.group('height')),
147 def is_logged_in(self
):
148 return bool(self
._get
_cookies
(self
._API
_BASE
).get('auth_token'))
150 def _fetch_guest_token(self
, display_id
):
151 guest_token
= traverse_obj(self
._download
_json
(
152 f
'{self._API_BASE}guest/activate.json', display_id
, 'Downloading guest token', data
=b
'',
153 headers
=self
._set
_base
_headers
(legacy
=display_id
and self
._configuration
_arg
('legacy_api'))),
154 ('guest_token', {str}
))
156 raise ExtractorError('Could not retrieve guest token')
159 def _set_base_headers(self
, legacy
=False):
160 bearer_token
= self
._LEGACY
_AUTH
if legacy
and not self
.is_logged_in
else self
._AUTH
162 'Authorization': f
'Bearer {bearer_token}',
163 'x-csrf-token': try_call(lambda: self
._get
_cookies
(self
._API
_BASE
)['ct0'].value
),
166 def _call_login_api(self
, note
, headers
, query
={}, data
=None):
167 response
= self
._download
_json
(
168 f
'{self._API_BASE}onboarding/task.json', None, note
,
169 headers
=headers
, query
=query
, data
=data
, expected_status
=400)
170 error
= traverse_obj(response
, ('errors', 0, 'message', {str}
))
172 raise ExtractorError(f
'Login failed, Twitter API says: {error}', expected
=True)
173 elif traverse_obj(response
, 'status') != 'success':
174 raise ExtractorError('Login was unsuccessful')
176 subtask
= traverse_obj(
177 response
, ('subtasks', ..., 'subtask_id', {str}
), get_all
=False)
179 raise ExtractorError('Twitter API did not return next login subtask')
181 self
._flow
_token
= response
['flow_token']
185 def _perform_login(self
, username
, password
):
186 if self
.is_logged_in
:
189 webpage
= self
._download
_webpage
('https://twitter.com/', None, 'Downloading login page')
190 guest_token
= self
._search
_regex
(
191 r
'\.cookie\s*=\s*["\']gt
=(\d
+);', webpage, 'gt
', default=None) or self._fetch_guest_token(None)
193 **self._set_base_headers(),
194 'content
-type': 'application
/json
',
195 'x
-guest
-token
': guest_token,
196 'x
-twitter
-client
-language
': 'en
',
197 'x
-twitter
-active
-user
': 'yes
',
198 'Referer
': 'https
://twitter
.com
/',
199 'Origin
': 'https
://twitter
.com
',
202 def build_login_json(*subtask_inputs):
204 'flow_token
': self._flow_token,
205 'subtask_inputs
': subtask_inputs
206 }, separators=(',', ':')).encode()
208 def input_dict(subtask_id, text):
210 'subtask_id
': subtask_id,
217 next_subtask = self._call_login_api(
218 'Downloading flow token
', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
220 while not self.is_logged_in:
221 if next_subtask == 'LoginJsInstrumentationSubtask
':
222 next_subtask = self._call_login_api(
223 'Submitting JS instrumentation response
', headers, data=build_login_json({
224 'subtask_id
': next_subtask,
225 'js_instrumentation
': {
231 elif next_subtask == 'LoginEnterUserIdentifierSSO
':
232 next_subtask = self._call_login_api(
233 'Submitting username
', headers, data=build_login_json({
234 'subtask_id
': next_subtask,
236 'setting_responses
': [{
237 'key
': 'user_identifier
',
248 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask
':
249 next_subtask = self._call_login_api(
250 'Submitting alternate identifier
', headers,
251 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
252 'one of username
, phone number
or email that was
not used
as --username
'))))
254 elif next_subtask == 'LoginEnterPassword
':
255 next_subtask = self._call_login_api(
256 'Submitting password
', headers, data=build_login_json({
257 'subtask_id
': next_subtask,
259 'password
': password,
264 elif next_subtask == 'AccountDuplicationCheck
':
265 next_subtask = self._call_login_api(
266 'Submitting account duplication check
', headers, data=build_login_json({
267 'subtask_id
': next_subtask,
268 'check_logged_in_account
': {
269 'link
': 'AccountDuplicationCheck_false
'
273 elif next_subtask == 'LoginTwoFactorAuthChallenge
':
274 next_subtask = self._call_login_api(
275 'Submitting
2FA token
', headers, data=build_login_json(input_dict(
276 next_subtask, self._get_tfa_info('two
-factor authentication token
'))))
278 elif next_subtask == 'LoginAcid
':
279 next_subtask = self._call_login_api(
280 'Submitting confirmation code
', headers, data=build_login_json(input_dict(
281 next_subtask, self._get_tfa_info('confirmation code sent to your email
or phone
'))))
283 elif next_subtask == 'ArkoseLogin
':
284 self.raise_login_required('Twitter
is requiring captcha
for this login attempt
', method='cookies
')
286 elif next_subtask == 'DenyLoginSubtask
':
287 self.raise_login_required('Twitter rejected this login attempt
as suspicious
', method='cookies
')
289 elif next_subtask == 'LoginSuccessSubtask
':
290 raise ExtractorError('Twitter API did
not grant auth token cookie
')
293 raise ExtractorError(f'Unrecognized subtask ID
"{next_subtask}"')
297 def _call_api(self, path, video_id, query={}, graphql=False):
298 headers = self._set_base_headers(legacy=not graphql and self._configuration_arg('legacy_api
'))
300 'x
-twitter
-auth
-type': 'OAuth2Session
',
301 'x
-twitter
-client
-language
': 'en
',
302 'x
-twitter
-active
-user
': 'yes
',
303 } if self.is_logged_in else {
304 'x
-guest
-token
': self._fetch_guest_token(video_id)
306 allowed_status = {400, 401, 403, 404} if graphql else {403}
307 result = self._download_json(
308 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
309 video_id, headers=headers, query=query, expected_status=allowed_status,
310 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON
')
312 if result.get('errors
'):
313 errors = ', '.join(set(traverse_obj(result, ('errors
', ..., 'message
', {str}))))
314 if errors and 'not authorized
' in errors:
315 self.raise_login_required(remove_end(errors, '.'))
316 raise ExtractorError(f'Error(s
) while querying API
: {errors or "Unknown error"}
')
320 def _build_graphql_query(self, media_id):
321 raise NotImplementedError('Method must be implemented to support GraphQL
')
323 def _call_graphql_api(self, endpoint, media_id):
324 data = self._build_graphql_query(media_id)
325 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
326 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data
')
329 class TwitterCardIE(InfoExtractor):
330 IE_NAME = 'twitter
:card
'
331 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i
/(?
:cards
/tfw
/v1|
videos(?
:/tweet
)?
)/(?P
<id>\d
+)'
334 'url
': 'https
://twitter
.com
/i
/cards
/tfw
/v1
/560070183650213889',
335 # MD5 checksums are different in different places
337 'id': '560070131976392705',
339 'title
': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments
from your perspective
.",
340 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
341 'uploader': 'Twitter',
342 'uploader_id': 'Twitter',
343 'thumbnail': r're:^https?://.*\.jpg',
345 'timestamp': 1422366112,
346 'upload_date': '20150127',
348 'comment_count': int,
352 'display_id': '560070183650213889',
353 'uploader_url': 'https://twitter.com/Twitter',
357 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
358 'md5': '7137eca597f72b9abbe61e5ae0161399',
360 'id': '623160978427936768',
362 'title': "NASA
- Fly over Pluto
's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
363 'description
': "Fly over Pluto's icy Norgay Mountains
and Sputnik Plain
in this
@NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
365 'uploader_id': 'NASA',
366 'timestamp': 1437408129,
367 'upload_date': '20150720',
368 'uploader_url': 'https://twitter.com/NASA',
370 'comment_count': int,
373 'tags': ['PlutoFlyby'],
375 'params': {'format': '[protocol=https]'}
378 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
379 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
383 'title': 'Ubuntu 11.10 Overview',
384 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
385 'upload_date': '20111013',
386 'uploader': 'OMG! UBUNTU!',
387 'uploader_id': 'omgubuntu',
388 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
389 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
390 'channel_follower_count': int,
391 'chapters': 'count:8',
392 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
394 'categories': ['Film & Animation'],
396 'comment_count': int,
397 'availability': 'public',
399 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
402 'channel': 'OMG! UBUNTU!',
403 'playable_in_embed': True,
405 'add_ie': ['Youtube'],
408 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
412 'upload_date': '20151113',
413 'uploader_id': '1189339351084113920',
414 'uploader': 'ArsenalTerje',
415 'title': 'Vine by ArsenalTerje',
416 'timestamp': 1447451307,
417 'alt_title': 'Vine by ArsenalTerje',
418 'comment_count': int,
420 'thumbnail': r
're:^https?://[^?#]+\.jpg',
425 'params': {'skip_download': 'm3u8'}
,
428 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
429 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
431 'id': '705235433198714880',
433 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
434 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
435 'uploader': 'Brent Yarina',
436 'uploader_id': 'BTNBrentYarina',
437 'timestamp': 1456976204,
438 'upload_date': '20160303',
440 'skip': 'This content is no longer available.',
443 'url': 'https://twitter.com/i/videos/752274308186120192',
444 'only_matching': True,
448 def _real_extract(self
, url
):
449 status_id
= self
._match
_id
(url
)
450 return self
.url_result(
451 'https://twitter.com/statuses/' + status_id
,
452 TwitterIE
.ie_key(), status_id
)
455 class TwitterIE(TwitterBaseIE
):
457 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
460 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
462 'id': '643211870443208704',
463 'display_id': '643211948184596480',
465 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
466 'thumbnail': r
're:^https?://.*\.jpg',
467 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
468 'uploader': 'FREE THE NIPPLE',
469 'uploader_id': 'freethenipple',
471 'timestamp': 1442188653,
472 'upload_date': '20150913',
473 'uploader_url': 'https://twitter.com/freethenipple',
474 'comment_count': int,
482 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
483 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
485 'id': '657991469417025536',
487 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
488 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
489 'thumbnail': r
're:^https?://.*\.png',
491 'uploader_id': 'giphz',
493 'expected_warnings': ['height', 'width'],
494 'skip': 'Account suspended',
496 'url': 'https://twitter.com/starwars/status/665052190608723968',
498 'id': '665052190608723968',
499 'display_id': '665052190608723968',
501 'title': r
're:Star Wars.*A new beginning is coming December 18.*',
502 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
503 'uploader_id': 'starwars',
504 'uploader': r
're:Star Wars.*',
505 'timestamp': 1447395772,
506 'upload_date': '20151113',
507 'uploader_url': 'https://twitter.com/starwars',
508 'comment_count': int,
511 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
515 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
517 'id': '705235433198714880',
519 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
520 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
521 'uploader_id': 'BTNBrentYarina',
522 'uploader': 'Brent Yarina',
523 'timestamp': 1456976204,
524 'upload_date': '20160303',
525 'uploader_url': 'https://twitter.com/BTNBrentYarina',
526 'comment_count': int,
533 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
534 # Test case of TwitterCardIE
535 'skip_download': True,
537 'skip': 'Dead external link',
539 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
541 'id': '700207414000242688',
542 'display_id': '700207533655363584',
544 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
545 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
546 'thumbnail': r
're:^https?://.*\.jpg',
547 'uploader': 'jaydin donte geer',
548 'uploader_id': 'jaydingeer',
550 'timestamp': 1455777459,
551 'upload_date': '20160218',
552 'uploader_url': 'https://twitter.com/jaydingeer',
553 'comment_count': int,
557 'tags': ['Damndaniel'],
561 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
562 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
566 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
567 'uploader': 'TAKUMA',
568 'uploader_id': '1004126642786242560',
569 'timestamp': 1402826626,
570 'upload_date': '20140615',
571 'thumbnail': r
're:^https?://.*\.jpg',
572 'alt_title': 'Vine by TAKUMA',
573 'comment_count': int,
580 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
582 'id': '717462543795523584',
583 'display_id': '719944021058060289',
585 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
586 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
587 'uploader_id': 'CaptainAmerica',
588 'uploader': 'Captain America',
590 'timestamp': 1460483005,
591 'upload_date': '20160412',
592 'uploader_url': 'https://twitter.com/CaptainAmerica',
593 'thumbnail': r
're:^https?://.*\.jpg',
594 'comment_count': int,
602 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
604 'id': '1zqKVVlkqLaKB',
606 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
607 'upload_date': '20160923',
608 'uploader_id': '1PmKqpJdOJQoY',
609 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
610 'timestamp': 1474613214,
611 'thumbnail': r
're:^https?://.*\.jpg',
613 'add_ie': ['Periscope'],
615 # has mp4 formats via mobile API
616 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
618 'id': '852077943283097602',
620 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
621 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
622 'uploader': 'عالم الأخبار',
623 'uploader_id': 'news_al3alm',
625 'timestamp': 1492000653,
626 'upload_date': '20170412',
627 'display_id': '852138619213144067',
629 'uploader_url': 'https://twitter.com/news_al3alm',
630 'thumbnail': r
're:^https?://.*\.jpg',
635 'comment_count': int,
638 'url': 'https://twitter.com/i/web/status/910031516746514432',
640 'id': '910030238373089285',
641 'display_id': '910031516746514432',
643 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
644 'thumbnail': r
're:^https?://.*\.jpg',
645 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
646 'uploader': 'Préfet de Guadeloupe',
647 'uploader_id': 'Prefet971',
649 'timestamp': 1505803395,
650 'upload_date': '20170919',
651 'uploader_url': 'https://twitter.com/Prefet971',
652 'comment_count': int,
660 'skip_download': True, # requires ffmpeg
663 # card via api.twitter.com/1.1/videos/tweet/config
664 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
666 'id': '1001551417340022785',
667 'display_id': '1001551623938805763',
669 'title': 're:.*?Shep is on a roll today.*?',
670 'thumbnail': r
're:^https?://.*\.jpg',
671 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
672 'uploader': 'Lis Power',
673 'uploader_id': 'LisPower1',
675 'timestamp': 1527623489,
676 'upload_date': '20180529',
677 'uploader_url': 'https://twitter.com/LisPower1',
678 'comment_count': int,
686 'skip_download': True, # requires ffmpeg
689 'url': 'https://twitter.com/foobar/status/1087791357756956680',
691 'id': '1087791272830607360',
692 'display_id': '1087791357756956680',
694 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
695 'thumbnail': r
're:^https?://.*\.jpg',
696 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
700 'timestamp': 1548184644,
701 'upload_date': '20190122',
702 'uploader_url': 'https://twitter.com/X',
703 'comment_count': int,
711 # not available in Periscope
712 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
714 'id': '1vOGwqejwoWxB',
716 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
718 'uploader_id': '1eVjYOLGkGrQL',
719 'thumbnail': r
're:^https?://.*\.jpg',
720 'tags': ['EduTECH2019'],
723 'add_ie': ['TwitterBroadcast'],
726 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
728 'id': '1349774757969989634',
729 'display_id': '1349794411333394432',
731 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
732 'thumbnail': r
're:^https?://.*\.jpg',
733 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
734 'uploader': 'Brooklyn Nets',
735 'uploader_id': 'BrooklynNets',
737 'timestamp': 1610651040,
738 'upload_date': '20210114',
739 'uploader_url': 'https://twitter.com/BrooklynNets',
740 'comment_count': int,
747 'skip_download': True,
750 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
752 'id': '1577855447914409984',
753 'display_id': '1577855540407197696',
755 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
756 'description': 'md5:b9c3699335447391d11753ab21c70a74',
757 'upload_date': '20221006',
758 'uploader': 'oshtru',
759 'uploader_id': 'oshtru',
760 'uploader_url': 'https://twitter.com/oshtru',
761 'thumbnail': r
're:^https?://.*\.jpg',
763 'timestamp': 1665025050,
764 'comment_count': int,
771 'params': {'skip_download': True}
,
773 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
775 'id': '1577719286659006464',
776 'title': 'Ultima📛 | #вʟм - Test',
777 'description': 'Test https://t.co/Y3KEZD7Dad',
778 'uploader': 'Ultima📛 | #вʟм',
779 'uploader_id': 'UltimaShadowX',
780 'uploader_url': 'https://twitter.com/UltimaShadowX',
781 'upload_date': '20221005',
782 'timestamp': 1664992565,
783 'comment_count': int,
790 'params': {'skip_download': True}
,
792 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
794 'id': '1575559336759263233',
795 'display_id': '1575560063510810624',
797 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
798 'thumbnail': r
're:^https?://.*\.jpg',
799 'description': 'md5:95aea692fda36a12081b9629b02daa92',
800 'uploader': 'Max Olson',
801 'uploader_id': 'MesoMax919',
802 'uploader_url': 'https://twitter.com/MesoMax919',
804 'timestamp': 1664477766,
805 'upload_date': '20220929',
806 'comment_count': int,
810 'tags': ['HurricaneIan'],
814 # Adult content, fails if not logged in (GraphQL)
815 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
817 'id': '1575199163847000068',
818 'display_id': '1575199173472927762',
823 'uploader_id': 'Rizdraws',
824 'uploader_url': 'https://twitter.com/Rizdraws',
825 'upload_date': '20220928',
826 'timestamp': 1664391723,
827 'thumbnail': r
're:^https?://.+\.jpg',
830 'comment_count': int,
834 'skip': 'Requires authentication',
836 # Playlist result only with auth
837 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
838 'playlist_mincount': 2,
840 'id': '1395079556562706435',
845 'upload_date': '20210519',
848 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
849 'uploader_id': 'Srirachachau',
850 'comment_count': int,
851 'uploader_url': 'https://twitter.com/Srirachachau',
852 'timestamp': 1621447860,
855 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
856 'playlist_mincount': 2,
858 'id': '1578353380363501568',
860 'uploader_id': 'DavidToons_',
864 'timestamp': 1665143744,
865 'uploader_url': 'https://twitter.com/DavidToons_',
866 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
868 'comment_count': int,
869 'upload_date': '20221007',
873 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
876 'id': '1578401165338976258',
878 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
880 'uploader_id': 'primevideouk',
881 'timestamp': 1665155137,
882 'upload_date': '20221007',
884 'uploader_url': 'https://twitter.com/primevideouk',
885 'comment_count': int,
888 'tags': ['TheRingsOfPower'],
892 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
894 'id': '1lPJqmBeeNAJb',
896 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
897 'uploader': r
're:Monique Camarra.+?',
898 'uploader_id': 'MoniqueCamarra',
899 'live_status': 'was_live',
900 'release_timestamp': 1658417414,
901 'description': 'md5:4dc8e972f1d8b3c6580376fabb02a3ad',
902 'timestamp': 1658407771,
903 'release_date': '20220721',
904 'upload_date': '20220721',
906 'add_ie': ['TwitterSpaces'],
907 'params': {'skip_download': 'm3u8'}
,
908 'skip': 'Requires authentication',
910 # URL specifies video number but --yes-playlist
911 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
912 'playlist_mincount': 2,
914 'id': '1600649710662213632',
915 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
916 'timestamp': 1670459604.0,
917 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
918 'comment_count': int,
919 'uploader_id': 'CTVJLaidlaw',
921 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
922 'upload_date': '20221208',
924 'uploader': 'Jocelyn Laidlaw',
925 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
929 # URL specifies video number and --no-playlist
930 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
932 'id': '1600649511827013632',
934 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
935 'thumbnail': r
're:^https?://.+\.jpg',
936 'timestamp': 1670459604.0,
937 'uploader_id': 'CTVJLaidlaw',
938 'uploader': 'Jocelyn Laidlaw',
940 'comment_count': int,
941 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
943 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
944 'display_id': '1600649710662213632',
947 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
948 'upload_date': '20221208',
951 'params': {'noplaylist': True}
,
953 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
954 # note the id different between extraction and url
955 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
957 'id': '1621117577354424321',
958 'display_id': '1621117700482416640',
960 'title': '뽀 - 아 최우제 이동속도 봐',
961 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
964 'uploader_id': 's2FAKER',
965 'uploader_url': 'https://twitter.com/s2FAKER',
966 'upload_date': '20230202',
967 'timestamp': 1675339553.0,
968 'thumbnail': r
're:https?://pbs\.twimg\.com/.+',
973 'comment_count': int,
977 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
979 'id': '1599108643743473680',
980 'display_id': '1599108751385972737',
982 'title': '\u06ea - \U0001F48B',
983 'uploader_url': 'https://twitter.com/hlo_again',
985 'uploader_id': 'hlo_again',
986 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
989 'comment_count': int,
991 'upload_date': '20221203',
993 'timestamp': 1670092210.0,
995 'uploader': '\u06ea',
996 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
998 'params': {'noplaylist': True}
,
1000 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1002 'id': '1600009362759733248',
1003 'display_id': '1600009574919962625',
1005 'uploader_url': 'https://twitter.com/MunTheShinobi',
1006 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1008 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1010 'uploader': 'Mün The Friend Of YWAP',
1011 'repost_count': int,
1012 'upload_date': '20221206',
1013 'title': 'Mün The Friend Of YWAP - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1014 'comment_count': int,
1017 'uploader_id': 'MunTheShinobi',
1018 'duration': 139.987,
1019 'timestamp': 1670306984.0,
1022 # url to retweet id w/ legacy api
1023 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1025 'id': '1623274794488659969',
1026 'display_id': '1623739803874349067',
1028 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1029 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1030 'uploader': 'Johnny Bullets',
1031 'uploader_id': 'Johnnybull3ts',
1032 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1036 'timestamp': 1675853859.0,
1037 'upload_date': '20230208',
1038 'thumbnail': r
're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1040 'repost_count': int,
1042 'params': {'extractor_args': {'twitter': {'legacy_api': ['']}
}},
1043 'skip': 'Protected tweet',
1045 # orig tweet w/ graphql
1046 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1048 'id': '1623274794488659969',
1049 'display_id': '1623739803874349067',
1051 'title': '@selfisekai@hackerspace.pl 🐀 - RT @Johnnybull3ts: Me after going viral to over 30million people: Whoopsie-daisy',
1052 'description': 'md5:9258bdbb54793bdc124fe1cd47e96c6a',
1053 'uploader': '@selfisekai@hackerspace.pl 🐀',
1054 'uploader_id': 'liberdalau',
1055 'uploader_url': 'https://twitter.com/liberdalau',
1059 'timestamp': 1675964711.0,
1060 'upload_date': '20230209',
1061 'thumbnail': r
're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1064 'repost_count': int,
1065 'comment_count': int,
1067 'skip': 'Protected tweet',
1070 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1071 'only_matching': True,
1074 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1075 'only_matching': True,
1077 # promo_video_website card
1078 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1079 'only_matching': True,
1081 # promo_video_convo card
1082 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1083 'only_matching': True,
1086 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1087 'only_matching': True,
1089 # video_direct_message card
1090 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1091 'only_matching': True,
1093 # poll2choice_video card
1094 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1095 'only_matching': True,
1097 # poll3choice_video card
1098 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1099 'only_matching': True,
1101 # poll4choice_video card
1102 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1103 'only_matching': True,
1106 def _graphql_to_legacy(self
, data
, twid
):
1107 result
= traverse_obj(data
, (
1108 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1109 lambda _
, v
: v
['entryId'] == f
'tweet-{twid}', 'content', 'itemContent',
1110 'tweet_results', 'result', ('tweet', None), {dict}
,
1111 ), default
={}, get_all
=False) if self
.is_logged_in
else traverse_obj(
1112 data
, ('tweetResult', 'result', {dict}
), default
={})
1114 if result
.get('__typename') not in ('Tweet', 'TweetTombstone', 'TweetUnavailable', None):
1115 self
.report_warning(f
'Unknown typename: {result.get("__typename")}', twid
, only_once
=True)
1117 if 'tombstone' in result
:
1118 cause
= remove_end(traverse_obj(result
, ('tombstone', 'text', 'text', {str}
)), '. Learn more')
1119 raise ExtractorError(f
'Twitter API says: {cause or "Unknown error"}', expected
=True)
1120 elif result
.get('__typename') == 'TweetUnavailable':
1121 reason
= result
.get('reason')
1122 if reason
== 'NsfwLoggedOut':
1123 self
.raise_login_required('NSFW tweet requires authentication')
1124 elif reason
== 'Protected':
1125 self
.raise_login_required('You are not authorized to view this protected tweet')
1126 raise ExtractorError(reason
or 'Requested tweet is unavailable', expected
=True)
1128 status
= result
.get('legacy', {})
1129 status
.update(traverse_obj(result
, {
1130 'user': ('core', 'user_results', 'result', 'legacy'),
1131 'card': ('card', 'legacy'),
1132 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1133 }, expected_type
=dict, default
={}))
1135 # extra transformation is needed since result does not match legacy format
1137 binding_value
.get('key'): binding_value
.get('value')
1138 for binding_value
in traverse_obj(status
, ('card', 'binding_values', ..., {dict}
))
1141 status
['card']['binding_values'] = binding_values
1145 def _build_graphql_query(self
, media_id
):
1148 'focalTweetId': media_id
,
1149 'includePromotedContent': True,
1150 'with_rux_injections': False,
1151 'withBirdwatchNotes': True,
1152 'withCommunity': True,
1153 'withDownvotePerspective': False,
1154 'withQuickPromoteEligibilityTweetFields': True,
1155 'withReactionsMetadata': False,
1156 'withReactionsPerspective': False,
1157 'withSuperFollowsTweetFields': True,
1158 'withSuperFollowsUserFields': True,
1159 'withV2Timeline': True,
1163 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1164 'interactive_text_enabled': True,
1165 'responsive_web_edit_tweet_api_enabled': True,
1166 'responsive_web_enhance_cards_enabled': True,
1167 'responsive_web_graphql_timeline_navigation_enabled': False,
1168 'responsive_web_text_conversations_enabled': False,
1169 'responsive_web_uc_gql_enabled': True,
1170 'standardized_nudges_misinfo': True,
1171 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1172 'tweetypie_unmention_optimization_enabled': True,
1173 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1174 'verified_phone_label_enabled': False,
1175 'vibe_api_enabled': True,
1177 } if self
.is_logged_in
else {
1179 'tweetId': media_id
,
1180 'withCommunity': False,
1181 'includePromotedContent': False,
1185 'creator_subscriptions_tweet_preview_api_enabled': True,
1186 'tweetypie_unmention_optimization_enabled': True,
1187 'responsive_web_edit_tweet_api_enabled': True,
1188 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1189 'view_counts_everywhere_api_enabled': True,
1190 'longform_notetweets_consumption_enabled': True,
1191 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1192 'tweet_awards_web_tipping_enabled': False,
1193 'freedom_of_speech_not_reach_fetch_enabled': True,
1194 'standardized_nudges_misinfo': True,
1195 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1196 'longform_notetweets_rich_text_read_enabled': True,
1197 'longform_notetweets_inline_media_enabled': True,
1198 'responsive_web_graphql_exclude_directive_enabled': True,
1199 'verified_phone_label_enabled': False,
1200 'responsive_web_media_download_video_enabled': False,
1201 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1202 'responsive_web_graphql_timeline_navigation_enabled': True,
1203 'responsive_web_enhance_cards_enabled': False
1206 'withArticleRichContentState': False
1210 def _extract_status(self
, twid
):
1211 if self
.is_logged_in
:
1212 return self
._graphql
_to
_legacy
(
1213 self
._call
_graphql
_api
('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid
), twid
)
1216 if not self
._configuration
_arg
('legacy_api'):
1217 return self
._graphql
_to
_legacy
(
1218 self
._call
_graphql
_api
('2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId', twid
), twid
)
1219 return traverse_obj(self
._call
_api
(f
'statuses/show/{twid}.json', twid
, {
1220 'cards_platform': 'Web-12',
1222 'include_reply_count': 1,
1223 'include_user_entities': 0,
1224 'tweet_mode': 'extended',
1225 }), 'retweeted_status', None)
1227 except ExtractorError
as e
:
1230 self
.report_warning(
1231 f
'{e.orig_msg}. Falling back to syndication endpoint; some metadata may be missing', twid
)
1233 status
= self
._download
_json
(
1234 'https://cdn.syndication.twimg.com/tweet-result', twid
, 'Downloading syndication JSON',
1235 headers
={'User-Agent': 'Googlebot'}
, query
={'id': twid}
)
1236 status
['extended_entities'] = {'media': status.get('mediaDetails')}
1239 def _real_extract(self
, url
):
1240 twid
, selected_index
= self
._match
_valid
_url
(url
).group('id', 'index')
1241 status
= self
._extract
_status
(twid
)
1243 title
= description
= traverse_obj(
1244 status
, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}
), get_all
=False) or ''
1245 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1246 title
= re
.sub(r
'\s+(https?://[^ ]+)', '', title
)
1247 user
= status
.get('user') or {}
1248 uploader
= user
.get('name')
1250 title
= f
'{uploader} - {title}'
1251 uploader_id
= user
.get('screen_name')
1256 'description': description
,
1257 'uploader': uploader
,
1258 'timestamp': unified_timestamp(status
.get('created_at')),
1259 'uploader_id': uploader_id
,
1260 'uploader_url': format_field(uploader_id
, None, 'https://twitter.com/%s'),
1261 'like_count': int_or_none(status
.get('favorite_count')),
1262 'repost_count': int_or_none(status
.get('retweet_count')),
1263 'comment_count': int_or_none(status
.get('reply_count')),
1264 'age_limit': 18 if status
.get('possibly_sensitive') else 0,
1265 'tags': traverse_obj(status
, ('entities', 'hashtags', ..., 'text')),
1268 def extract_from_video_info(media
):
1269 media_id
= traverse_obj(media
, 'id_str', 'id', (
1270 'video_info', 'variants', ..., 'url',
1271 {functools.partial(re.search, r'_video/(\d+)/')}
, 1
1272 ), get_all
=False, expected_type
=str_or_none
) or twid
1273 self
.write_debug(f
'Extracting from video info: {media_id}')
1277 for variant
in traverse_obj(media
, ('video_info', 'variants', ...)):
1278 fmts
, subs
= self
._extract
_variant
_formats
(variant
, twid
)
1279 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
1280 formats
.extend(fmts
)
1283 media_url
= media
.get('media_url_https') or media
.get('media_url')
1285 def add_thumbnail(name
, size
):
1288 'url': update_url_query(media_url
, {'name': name}
),
1289 'width': int_or_none(size
.get('w') or size
.get('width')),
1290 'height': int_or_none(size
.get('h') or size
.get('height')),
1292 for name
, size
in media
.get('sizes', {}).items():
1293 add_thumbnail(name
, size
)
1294 add_thumbnail('orig', media
.get('original_info') or {})
1299 'subtitles': subtitles
,
1300 'thumbnails': thumbnails
,
1301 'view_count': traverse_obj(media
, ('mediaStats', 'viewCount', {int_or_none}
)),
1302 'duration': float_or_none(traverse_obj(media
, ('video_info', 'duration_millis')), 1000),
1303 # The codec of http formats are unknown
1304 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1307 def extract_from_card_info(card
):
1311 self
.write_debug(f
'Extracting from card info: {card.get("url")}')
1312 binding_values
= card
['binding_values']
1314 def get_binding_value(k
):
1315 o
= binding_values
.get(k
) or {}
1316 return try_get(o
, lambda x
: x
[x
['type'].lower() + '_value'])
1318 card_name
= card
['name'].split(':')[-1]
1319 if card_name
== 'player':
1322 'url': get_binding_value('player_url'),
1324 elif card_name
== 'periscope_broadcast':
1327 'url': get_binding_value('url') or get_binding_value('player_url'),
1328 'ie_key': PeriscopeIE
.ie_key(),
1330 elif card_name
== 'broadcast':
1333 'url': get_binding_value('broadcast_url'),
1334 'ie_key': TwitterBroadcastIE
.ie_key(),
1336 elif card_name
== 'audiospace':
1339 'url': f
'https://twitter.com/i/spaces/{get_binding_value("id")}',
1340 'ie_key': TwitterSpacesIE
.ie_key(),
1342 elif card_name
== 'summary':
1345 'url': get_binding_value('card_url'),
1347 elif card_name
== 'unified_card':
1348 unified_card
= self
._parse
_json
(get_binding_value('unified_card'), twid
)
1349 yield from map(extract_from_video_info
, traverse_obj(
1350 unified_card
, ('media_entities', ...), expected_type
=dict))
1351 # amplify, promo_video_website, promo_video_convo, appplayer,
1352 # video_direct_message, poll2choice_video, poll3choice_video,
1353 # poll4choice_video, ...
1355 is_amplify
= card_name
== 'amplify'
1356 vmap_url
= get_binding_value('amplify_url_vmap') if is_amplify
else get_binding_value('player_stream_url')
1357 content_id
= get_binding_value('%s_content_id' % (card_name
if is_amplify
else 'player'))
1358 formats
, subtitles
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, content_id
or twid
)
1361 for suffix
in ('_small', '', '_large', '_x_large', '_original'):
1362 image
= get_binding_value('player_image' + suffix
) or {}
1363 image_url
= image
.get('url')
1364 if not image_url
or '/player-placeholder' in image_url
:
1367 'id': suffix
[1:] if suffix
else 'medium',
1369 'width': int_or_none(image
.get('width')),
1370 'height': int_or_none(image
.get('height')),
1375 'subtitles': subtitles
,
1376 'thumbnails': thumbnails
,
1377 'duration': int_or_none(get_binding_value(
1378 'content_duration_seconds')),
1381 videos
= traverse_obj(status
, (
1382 (None, 'quoted_status'), 'extended_entities', 'media', lambda _
, m
: m
['type'] != 'photo', {dict}
))
1384 if self
._yes
_playlist
(twid
, selected_index
, video_label
='URL-specified video number'):
1385 selected_entries
= (*map(extract_from_video_info
, videos
), *extract_from_card_info(status
.get('card')))
1387 desired_obj
= traverse_obj(status
, (
1388 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index
) - 1, {dict}
), get_all
=False)
1390 raise ExtractorError(f
'Video #{selected_index} is unavailable', expected
=True)
1391 elif desired_obj
.get('type') != 'video':
1392 raise ExtractorError(f
'Media #{selected_index} is not a video', expected
=True)
1394 # Restore original archive id and video index in title
1395 for index
, entry
in enumerate(videos
, 1):
1396 if entry
.get('id') != desired_obj
.get('id'):
1399 info
['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1400 if len(videos
) != 1:
1401 info
['title'] += f
' #{index}'
1404 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1406 entries
= [{**info, **data, 'display_id': twid}
for data
in selected_entries
]
1408 expanded_url
= traverse_obj(status
, ('entities', 'urls', 0, 'expanded_url'), expected_type
=url_or_none
)
1409 if not expanded_url
or expanded_url
== url
:
1410 self
.raise_no_formats('No video could be found in this tweet', expected
=True)
1413 return self
.url_result(expanded_url
, display_id
=twid
, **info
)
1415 entries
[0]['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1417 if len(entries
) == 1:
1420 for index
, entry
in enumerate(entries
, 1):
1421 entry
['title'] += f
' #{index}'
1423 return self
.playlist_result(entries
, **info
)
1426 class TwitterAmplifyIE(TwitterBaseIE
):
1427 IE_NAME
= 'twitter:amplify'
1428 _VALID_URL
= r
'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1431 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1432 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1434 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1436 'title': 'Twitter Video',
1437 'thumbnail': 're:^https?://.*',
1439 'params': {'format': '[protocol=https]'}
,
1442 def _real_extract(self
, url
):
1443 video_id
= self
._match
_id
(url
)
1444 webpage
= self
._download
_webpage
(url
, video_id
)
1446 vmap_url
= self
._html
_search
_meta
(
1447 'twitter:amplify:vmap', webpage
, 'vmap url')
1448 formats
, _
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, video_id
)
1451 thumbnail
= self
._html
_search
_meta
(
1452 'twitter:image:src', webpage
, 'thumbnail', fatal
=False)
1454 def _find_dimension(target
):
1455 w
= int_or_none(self
._html
_search
_meta
(
1456 'twitter:%s:width' % target
, webpage
, fatal
=False))
1457 h
= int_or_none(self
._html
_search
_meta
(
1458 'twitter:%s:height' % target
, webpage
, fatal
=False))
1462 thumbnail_w
, thumbnail_h
= _find_dimension('image')
1465 'width': thumbnail_w
,
1466 'height': thumbnail_h
,
1469 video_w
, video_h
= _find_dimension('player')
1477 'title': 'Twitter Video',
1479 'thumbnails': thumbnails
,
1483 class TwitterBroadcastIE(TwitterBaseIE
, PeriscopeBaseIE
):
1484 IE_NAME
= 'twitter:broadcast'
1485 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1488 # untitled Periscope video
1489 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1491 'id': '1yNGaQLWpejGj',
1493 'title': 'Andrea May Sahouri - Periscope Broadcast',
1494 'uploader': 'Andrea May Sahouri',
1495 'uploader_id': '1PXEdBZWpGwKe',
1496 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1501 def _real_extract(self
, url
):
1502 broadcast_id
= self
._match
_id
(url
)
1503 broadcast
= self
._call
_api
(
1504 'broadcasts/show.json', broadcast_id
,
1505 {'ids': broadcast_id}
)['broadcasts'][broadcast_id
]
1506 info
= self
._parse
_broadcast
_data
(broadcast
, broadcast_id
)
1507 media_key
= broadcast
['media_key']
1508 source
= self
._call
_api
(
1509 f
'live_video_stream/status/{media_key}', media_key
)['source']
1510 m3u8_url
= source
.get('noRedirectPlaybackUrl') or source
['location']
1511 if '/live_video_stream/geoblocked/' in m3u8_url
:
1512 self
.raise_geo_restricted()
1513 m3u8_id
= compat_parse_qs(compat_urllib_parse_urlparse(
1514 m3u8_url
).query
).get('type', [None])[0]
1515 state
, width
, height
= self
._extract
_common
_format
_info
(broadcast
)
1516 info
['formats'] = self
._extract
_pscp
_m
3u8_formats
(
1517 m3u8_url
, broadcast_id
, m3u8_id
, state
, width
, height
)
1521 class TwitterSpacesIE(TwitterBaseIE
):
1522 IE_NAME
= 'twitter:spaces'
1523 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1526 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1528 'id': '1RDxlgyvNXzJL',
1530 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1531 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1532 'uploader': r
're:Lucio Di Gaetano.*?',
1533 'uploader_id': 'luciodigaetano',
1534 'live_status': 'was_live',
1535 'timestamp': 1659877956,
1536 'upload_date': '20220807',
1537 'release_timestamp': 1659904215,
1538 'release_date': '20220807',
1540 'params': {'skip_download': 'm3u8'}
,
1542 # post_live/TimedOut but downloadable
1543 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1545 'id': '1vAxRAVQWONJl',
1547 'title': 'Framing Up FinOps: Billing Tools',
1548 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1549 'uploader': 'Google Cloud',
1550 'uploader_id': 'googlecloud',
1551 'live_status': 'post_live',
1552 'timestamp': 1681409554,
1553 'upload_date': '20230413',
1554 'release_timestamp': 1681839000,
1555 'release_date': '20230418',
1557 'params': {'skip_download': 'm3u8'}
,
1559 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1560 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1562 'id': '1eaKbrQbjoRKX',
1565 'description': 'Twitter Space participated by nobody yet',
1566 'uploader': '息根とめる🔪Twitchで復活',
1567 'uploader_id': 'tomeru_ikinone',
1568 'live_status': 'was_live',
1569 'timestamp': 1685617198,
1570 'upload_date': '20230601',
1572 'params': {'skip_download': 'm3u8'}
,
1576 'notstarted': 'is_upcoming',
1577 'ended': 'was_live',
1578 'running': 'is_live',
1579 'timedout': 'post_live',
1582 def _build_graphql_query(self
, space_id
):
1586 'isMetatagsQuery': True,
1587 'withDownvotePerspective': False,
1588 'withReactionsMetadata': False,
1589 'withReactionsPerspective': False,
1590 'withReplays': True,
1591 'withSuperFollowsUserFields': True,
1592 'withSuperFollowsTweetFields': True,
1595 'dont_mention_me_view_api_enabled': True,
1596 'interactive_text_enabled': True,
1597 'responsive_web_edit_tweet_api_enabled': True,
1598 'responsive_web_enhance_cards_enabled': True,
1599 'responsive_web_uc_gql_enabled': True,
1600 'spaces_2022_h2_clipping': True,
1601 'spaces_2022_h2_spaces_communities': False,
1602 'standardized_nudges_misinfo': True,
1603 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1604 'vibe_api_enabled': True,
1608 def _real_extract(self
, url
):
1609 space_id
= self
._match
_id
(url
)
1610 if not self
.is_logged_in
:
1611 self
.raise_login_required('Twitter Spaces require authentication')
1612 space_data
= self
._call
_graphql
_api
('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id
)['audioSpace']
1614 raise ExtractorError('Twitter Space not found', expected
=True)
1616 metadata
= space_data
['metadata']
1617 live_status
= try_call(lambda: self
.SPACE_STATUS
[metadata
['state'].lower()])
1618 is_live
= live_status
== 'is_live'
1621 headers
= {'Referer': 'https://twitter.com/'}
1622 if live_status
== 'is_upcoming':
1623 self
.raise_no_formats('Twitter Space not started yet', expected
=True)
1624 elif not is_live
and not metadata
.get('is_space_available_for_replay'):
1625 self
.raise_no_formats('Twitter Space ended and replay is disabled', expected
=True)
1626 elif metadata
.get('media_key'):
1627 source
= traverse_obj(
1628 self
._call
_api
(f
'live_video_stream/status/{metadata["media_key"]}', metadata
['media_key']),
1629 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}
), get_all
=False)
1630 formats
= self
._extract
_m
3u8_formats
( # XXX: Some Spaces need ffmpeg as downloader
1631 source
, metadata
['media_key'], 'm4a', entry_protocol
='m3u8', live
=is_live
,
1632 headers
=headers
, fatal
=False) if source
else []
1634 fmt
.update({'vcodec': 'none', 'acodec': 'aac'}
)
1636 fmt
['container'] = 'm4a_dash'
1638 participants
= ', '.join(traverse_obj(
1639 space_data
, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1641 if not formats
and live_status
== 'post_live':
1642 self
.raise_no_formats('Twitter Space ended but not downloadable yet', expected
=True)
1646 'title': metadata
.get('title'),
1647 'description': f
'Twitter Space participated by {participants}',
1648 'uploader': traverse_obj(
1649 metadata
, ('creator_results', 'result', 'legacy', 'name')),
1650 'uploader_id': traverse_obj(
1651 metadata
, ('creator_results', 'result', 'legacy', 'screen_name')),
1652 'live_status': live_status
,
1653 'release_timestamp': try_call(
1654 lambda: int_or_none(metadata
['scheduled_start'], scale
=1000)),
1655 'timestamp': int_or_none(metadata
.get('created_at'), scale
=1000),
1657 'http_headers': headers
,
1661 class TwitterShortenerIE(TwitterBaseIE
):
1662 IE_NAME
= 'twitter:shortener'
1663 _VALID_URL
= r
'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1664 _BASE_URL
= 'https://t.co/'
1666 def _real_extract(self
, url
):
1667 mobj
= self
._match
_valid
_url
(url
)
1668 eid
, id = mobj
.group('eid', 'id')
1671 url
= self
._BASE
_URL
+ id
1672 new_url
= self
._request
_webpage
(url
, id, headers
={'User-Agent': 'curl'}
).url
1673 __UNSAFE_LINK
= "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1674 if new_url
.startswith(__UNSAFE_LINK
):
1675 new_url
= new_url
.replace(__UNSAFE_LINK
, "")
1676 return self
.url_result(new_url
)