5 from .common
import InfoExtractor
6 from .periscope
import PeriscopeBaseIE
, PeriscopeIE
7 from ..compat
import functools
# isort: split
10 compat_urllib_parse_unquote
,
11 compat_urllib_parse_urlparse
,
34 class TwitterBaseIE(InfoExtractor
):
35 _NETRC_MACHINE
= 'twitter'
36 _API_BASE
= 'https://api.twitter.com/1.1/'
37 _GRAPHQL_API_BASE
= 'https://twitter.com/i/api/graphql/'
38 _BASE_REGEX
= r
'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
39 _AUTH
= 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
40 _LEGACY_AUTH
= 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
43 _LOGIN_INIT_DATA
= json
.dumps({
46 'debug_overrides': {},
55 'app_download_cta': 1,
56 'check_logged_in_account': 1,
57 'choice_selection': 3,
58 'contacts_live_sync_permission_prompt': 0,
60 'email_verification': 2,
70 'in_app_notification': 1,
72 'js_instrumentation': 1,
74 'notifications_permission_prompt': 2,
76 'open_home_timeline': 1,
78 'phone_verification': 4,
87 'tweet_selection_urt': 1,
90 'user_recommendations_list': 4,
91 'user_recommendations_urt': 1,
95 }, separators
=(',', ':')).encode()
97 def _extract_variant_formats(self
, variant
, video_id
):
98 variant_url
= variant
.get('url')
101 elif '.m3u8' in variant_url
:
102 return self
._extract
_m
3u8_formats
_and
_subtitles
(
103 variant_url
, video_id
, 'mp4', 'm3u8_native',
104 m3u8_id
='hls', fatal
=False)
106 tbr
= int_or_none(dict_get(variant
, ('bitrate', 'bit_rate')), 1000) or None
109 'format_id': 'http' + ('-%d' % tbr
if tbr
else ''),
112 self
._search
_dimensions
_in
_video
_url
(f
, variant_url
)
115 def _extract_formats_from_vmap_url(self
, vmap_url
, video_id
):
116 vmap_url
= url_or_none(vmap_url
)
119 vmap_data
= self
._download
_xml
(vmap_url
, video_id
)
123 for video_variant
in vmap_data
.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
124 video_variant
.attrib
['url'] = compat_urllib_parse_unquote(
125 video_variant
.attrib
['url'])
126 urls
.append(video_variant
.attrib
['url'])
127 fmts
, subs
= self
._extract
_variant
_formats
(
128 video_variant
.attrib
, video_id
)
130 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
131 video_url
= strip_or_none(xpath_text(vmap_data
, './/MediaFile'))
132 if video_url
not in urls
:
133 fmts
, subs
= self
._extract
_variant
_formats
({'url': video_url}
, video_id
)
135 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
136 return formats
, subtitles
139 def _search_dimensions_in_video_url(a_format
, video_url
):
140 m
= re
.search(r
'/(?P<width>\d+)x(?P<height>\d+)/', video_url
)
143 'width': int(m
.group('width')),
144 'height': int(m
.group('height')),
148 def is_logged_in(self
):
149 return bool(self
._get
_cookies
(self
._API
_BASE
).get('auth_token'))
151 @functools.cached_property
152 def _selected_api(self
):
153 return self
._configuration
_arg
('api', ['graphql'], ie_key
='Twitter')[0]
155 def _fetch_guest_token(self
, display_id
):
156 guest_token
= traverse_obj(self
._download
_json
(
157 f
'{self._API_BASE}guest/activate.json', display_id
, 'Downloading guest token', data
=b
'',
158 headers
=self
._set
_base
_headers
(legacy
=display_id
and self
._selected
_api
== 'legacy')),
159 ('guest_token', {str}
))
161 raise ExtractorError('Could not retrieve guest token')
164 def _set_base_headers(self
, legacy
=False):
165 bearer_token
= self
._LEGACY
_AUTH
if legacy
and not self
.is_logged_in
else self
._AUTH
167 'Authorization': f
'Bearer {bearer_token}',
168 'x-csrf-token': try_call(lambda: self
._get
_cookies
(self
._API
_BASE
)['ct0'].value
),
171 def _call_login_api(self
, note
, headers
, query
={}, data
=None):
172 response
= self
._download
_json
(
173 f
'{self._API_BASE}onboarding/task.json', None, note
,
174 headers
=headers
, query
=query
, data
=data
, expected_status
=400)
175 error
= traverse_obj(response
, ('errors', 0, 'message', {str}
))
177 raise ExtractorError(f
'Login failed, Twitter API says: {error}', expected
=True)
178 elif traverse_obj(response
, 'status') != 'success':
179 raise ExtractorError('Login was unsuccessful')
181 subtask
= traverse_obj(
182 response
, ('subtasks', ..., 'subtask_id', {str}
), get_all
=False)
184 raise ExtractorError('Twitter API did not return next login subtask')
186 self
._flow
_token
= response
['flow_token']
190 def _perform_login(self
, username
, password
):
191 if self
.is_logged_in
:
194 webpage
= self
._download
_webpage
('https://twitter.com/', None, 'Downloading login page')
195 guest_token
= self
._search
_regex
(
196 r
'\.cookie\s*=\s*["\']gt
=(\d
+);', webpage, 'gt
', default=None) or self._fetch_guest_token(None)
198 **self._set_base_headers(),
199 'content
-type': 'application
/json
',
200 'x
-guest
-token
': guest_token,
201 'x
-twitter
-client
-language
': 'en
',
202 'x
-twitter
-active
-user
': 'yes
',
203 'Referer
': 'https
://twitter
.com
/',
204 'Origin
': 'https
://twitter
.com
',
207 def build_login_json(*subtask_inputs):
209 'flow_token
': self._flow_token,
210 'subtask_inputs
': subtask_inputs
211 }, separators=(',', ':')).encode()
213 def input_dict(subtask_id, text):
215 'subtask_id
': subtask_id,
222 next_subtask = self._call_login_api(
223 'Downloading flow token
', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
225 while not self.is_logged_in:
226 if next_subtask == 'LoginJsInstrumentationSubtask
':
227 next_subtask = self._call_login_api(
228 'Submitting JS instrumentation response
', headers, data=build_login_json({
229 'subtask_id
': next_subtask,
230 'js_instrumentation
': {
236 elif next_subtask == 'LoginEnterUserIdentifierSSO
':
237 next_subtask = self._call_login_api(
238 'Submitting username
', headers, data=build_login_json({
239 'subtask_id
': next_subtask,
241 'setting_responses
': [{
242 'key
': 'user_identifier
',
253 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask
':
254 next_subtask = self._call_login_api(
255 'Submitting alternate identifier
', headers,
256 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
257 'one of username
, phone number
or email that was
not used
as --username
'))))
259 elif next_subtask == 'LoginEnterPassword
':
260 next_subtask = self._call_login_api(
261 'Submitting password
', headers, data=build_login_json({
262 'subtask_id
': next_subtask,
264 'password
': password,
269 elif next_subtask == 'AccountDuplicationCheck
':
270 next_subtask = self._call_login_api(
271 'Submitting account duplication check
', headers, data=build_login_json({
272 'subtask_id
': next_subtask,
273 'check_logged_in_account
': {
274 'link
': 'AccountDuplicationCheck_false
'
278 elif next_subtask == 'LoginTwoFactorAuthChallenge
':
279 next_subtask = self._call_login_api(
280 'Submitting
2FA token
', headers, data=build_login_json(input_dict(
281 next_subtask, self._get_tfa_info('two
-factor authentication token
'))))
283 elif next_subtask == 'LoginAcid
':
284 next_subtask = self._call_login_api(
285 'Submitting confirmation code
', headers, data=build_login_json(input_dict(
286 next_subtask, self._get_tfa_info('confirmation code sent to your email
or phone
'))))
288 elif next_subtask == 'ArkoseLogin
':
289 self.raise_login_required('Twitter
is requiring captcha
for this login attempt
', method='cookies
')
291 elif next_subtask == 'DenyLoginSubtask
':
292 self.raise_login_required('Twitter rejected this login attempt
as suspicious
', method='cookies
')
294 elif next_subtask == 'LoginSuccessSubtask
':
295 raise ExtractorError('Twitter API did
not grant auth token cookie
')
298 raise ExtractorError(f'Unrecognized subtask ID
"{next_subtask}"')
302 def _call_api(self, path, video_id, query={}, graphql=False):
303 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy
')
305 'x
-twitter
-auth
-type': 'OAuth2Session
',
306 'x
-twitter
-client
-language
': 'en
',
307 'x
-twitter
-active
-user
': 'yes
',
308 } if self.is_logged_in else {
309 'x
-guest
-token
': self._fetch_guest_token(video_id)
311 allowed_status = {400, 401, 403, 404} if graphql else {403}
312 result = self._download_json(
313 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
314 video_id, headers=headers, query=query, expected_status=allowed_status,
315 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON
')
317 if result.get('errors
'):
318 errors = ', '.join(set(traverse_obj(result, ('errors
', ..., 'message
', {str}))))
319 if errors and 'not authorized
' in errors:
320 self.raise_login_required(remove_end(errors, '.'))
321 raise ExtractorError(f'Error(s
) while querying API
: {errors or "Unknown error"}
')
325 def _build_graphql_query(self, media_id):
326 raise NotImplementedError('Method must be implemented to support GraphQL
')
328 def _call_graphql_api(self, endpoint, media_id):
329 data = self._build_graphql_query(media_id)
330 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
331 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data
')
334 class TwitterCardIE(InfoExtractor):
335 IE_NAME = 'twitter
:card
'
336 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i
/(?
:cards
/tfw
/v1|
videos(?
:/tweet
)?
)/(?P
<id>\d
+)'
339 'url
': 'https
://twitter
.com
/i
/cards
/tfw
/v1
/560070183650213889',
340 # MD5 checksums are different in different places
342 'id': '560070131976392705',
344 'title
': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments
from your perspective
.",
345 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
346 'uploader': 'Twitter',
347 'uploader_id': 'Twitter',
348 'thumbnail': r're:^https?://.*\.jpg',
350 'timestamp': 1422366112,
351 'upload_date': '20150127',
353 'comment_count': int,
357 'display_id': '560070183650213889',
358 'uploader_url': 'https://twitter.com/Twitter',
362 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
363 'md5': '7137eca597f72b9abbe61e5ae0161399',
365 'id': '623160978427936768',
367 'title': "NASA
- Fly over Pluto
's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
368 'description
': "Fly over Pluto's icy Norgay Mountains
and Sputnik Plain
in this
@NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
370 'uploader_id': 'NASA',
371 'timestamp': 1437408129,
372 'upload_date': '20150720',
373 'uploader_url': 'https://twitter.com/NASA',
375 'comment_count': int,
378 'tags': ['PlutoFlyby'],
380 'params': {'format': '[protocol=https]'}
383 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
384 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
388 'title': 'Ubuntu 11.10 Overview',
389 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
390 'upload_date': '20111013',
391 'uploader': 'OMG! UBUNTU!',
392 'uploader_id': 'omgubuntu',
393 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
394 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
395 'channel_follower_count': int,
396 'chapters': 'count:8',
397 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
399 'categories': ['Film & Animation'],
401 'comment_count': int,
402 'availability': 'public',
404 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
407 'channel': 'OMG! UBUNTU!',
408 'playable_in_embed': True,
410 'add_ie': ['Youtube'],
413 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
417 'upload_date': '20151113',
418 'uploader_id': '1189339351084113920',
419 'uploader': 'ArsenalTerje',
420 'title': 'Vine by ArsenalTerje',
421 'timestamp': 1447451307,
422 'alt_title': 'Vine by ArsenalTerje',
423 'comment_count': int,
425 'thumbnail': r
're:^https?://[^?#]+\.jpg',
430 'params': {'skip_download': 'm3u8'}
,
433 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
434 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
436 'id': '705235433198714880',
438 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
439 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
440 'uploader': 'Brent Yarina',
441 'uploader_id': 'BTNBrentYarina',
442 'timestamp': 1456976204,
443 'upload_date': '20160303',
445 'skip': 'This content is no longer available.',
448 'url': 'https://twitter.com/i/videos/752274308186120192',
449 'only_matching': True,
453 def _real_extract(self
, url
):
454 status_id
= self
._match
_id
(url
)
455 return self
.url_result(
456 'https://twitter.com/statuses/' + status_id
,
457 TwitterIE
.ie_key(), status_id
)
460 class TwitterIE(TwitterBaseIE
):
462 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
465 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
467 'id': '643211870443208704',
468 'display_id': '643211948184596480',
470 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
471 'thumbnail': r
're:^https?://.*\.jpg',
472 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
473 'uploader': 'FREE THE NIPPLE',
474 'uploader_id': 'freethenipple',
476 'timestamp': 1442188653,
477 'upload_date': '20150913',
478 'uploader_url': 'https://twitter.com/freethenipple',
479 'comment_count': int,
484 '_old_archive_ids': ['twitter 643211948184596480'],
487 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
488 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
490 'id': '657991469417025536',
492 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
493 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
494 'thumbnail': r
're:^https?://.*\.png',
496 'uploader_id': 'giphz',
498 'expected_warnings': ['height', 'width'],
499 'skip': 'Account suspended',
501 'url': 'https://twitter.com/starwars/status/665052190608723968',
503 'id': '665052190608723968',
504 'display_id': '665052190608723968',
506 'title': r
're:Star Wars.*A new beginning is coming December 18.*',
507 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
508 'uploader_id': 'starwars',
509 'uploader': r
're:Star Wars.*',
510 'timestamp': 1447395772,
511 'upload_date': '20151113',
512 'uploader_url': 'https://twitter.com/starwars',
513 'comment_count': int,
516 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
518 '_old_archive_ids': ['twitter 665052190608723968'],
521 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
523 'id': '705235433198714880',
525 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
526 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
527 'uploader_id': 'BTNBrentYarina',
528 'uploader': 'Brent Yarina',
529 'timestamp': 1456976204,
530 'upload_date': '20160303',
531 'uploader_url': 'https://twitter.com/BTNBrentYarina',
532 'comment_count': int,
539 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
540 # Test case of TwitterCardIE
541 'skip_download': True,
543 'skip': 'Dead external link',
545 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
547 'id': '700207414000242688',
548 'display_id': '700207533655363584',
550 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
551 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
552 'thumbnail': r
're:^https?://.*\.jpg',
553 'uploader': 'jaydin donte geer',
554 'uploader_id': 'jaydingeer',
556 'timestamp': 1455777459,
557 'upload_date': '20160218',
558 'uploader_url': 'https://twitter.com/jaydingeer',
559 'comment_count': int,
562 'tags': ['Damndaniel'],
564 '_old_archive_ids': ['twitter 700207533655363584'],
567 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
568 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
572 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
573 'uploader': 'TAKUMA',
574 'uploader_id': '1004126642786242560',
575 'timestamp': 1402826626,
576 'upload_date': '20140615',
577 'thumbnail': r
're:^https?://.*\.jpg',
578 'alt_title': 'Vine by TAKUMA',
579 'comment_count': int,
586 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
588 'id': '717462543795523584',
589 'display_id': '719944021058060289',
591 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
592 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
593 'uploader_id': 'CaptainAmerica',
594 'uploader': 'Captain America',
596 'timestamp': 1460483005,
597 'upload_date': '20160412',
598 'uploader_url': 'https://twitter.com/CaptainAmerica',
599 'thumbnail': r
're:^https?://.*\.jpg',
600 'comment_count': int,
605 '_old_archive_ids': ['twitter 719944021058060289'],
608 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
610 'id': '1zqKVVlkqLaKB',
612 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
613 'upload_date': '20160923',
614 'uploader_id': '1PmKqpJdOJQoY',
615 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
616 'timestamp': 1474613214,
617 'thumbnail': r
're:^https?://.*\.jpg',
619 'add_ie': ['Periscope'],
620 'skip': 'Broadcast not found',
622 # has mp4 formats via mobile API
623 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
625 'id': '852077943283097602',
627 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
628 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
629 'uploader': 'عالم الأخبار',
630 'uploader_id': 'news_al3alm',
632 'timestamp': 1492000653,
633 'upload_date': '20170412',
634 'display_id': '852138619213144067',
636 'uploader_url': 'https://twitter.com/news_al3alm',
637 'thumbnail': r
're:^https?://.*\.jpg',
641 'comment_count': int,
642 '_old_archive_ids': ['twitter 852138619213144067'],
645 'url': 'https://twitter.com/i/web/status/910031516746514432',
647 'id': '910030238373089285',
648 'display_id': '910031516746514432',
650 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
651 'thumbnail': r
're:^https?://.*\.jpg',
652 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
653 'uploader': 'Préfet de Guadeloupe',
654 'uploader_id': 'Prefet971',
656 'timestamp': 1505803395,
657 'upload_date': '20170919',
658 'uploader_url': 'https://twitter.com/Prefet971',
659 'comment_count': int,
664 '_old_archive_ids': ['twitter 910031516746514432'],
667 'skip_download': True, # requires ffmpeg
670 # card via api.twitter.com/1.1/videos/tweet/config
671 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
673 'id': '1001551417340022785',
674 'display_id': '1001551623938805763',
676 'title': 're:.*?Shep is on a roll today.*?',
677 'thumbnail': r
're:^https?://.*\.jpg',
678 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
679 'uploader': 'Lis Power',
680 'uploader_id': 'LisPower1',
682 'timestamp': 1527623489,
683 'upload_date': '20180529',
684 'uploader_url': 'https://twitter.com/LisPower1',
685 'comment_count': int,
690 '_old_archive_ids': ['twitter 1001551623938805763'],
693 'skip_download': True, # requires ffmpeg
696 'url': 'https://twitter.com/foobar/status/1087791357756956680',
698 'id': '1087791272830607360',
699 'display_id': '1087791357756956680',
701 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
702 'thumbnail': r
're:^https?://.*\.jpg',
703 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
707 'timestamp': 1548184644,
708 'upload_date': '20190122',
709 'uploader_url': 'https://twitter.com/X',
710 'comment_count': int,
717 'skip': 'This Tweet is unavailable',
719 # not available in Periscope
720 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
722 'id': '1vOGwqejwoWxB',
724 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
726 'uploader_id': '1eVjYOLGkGrQL',
727 'thumbnail': r
're:^https?://.*\.jpg',
728 'tags': ['EduTECH2019'],
731 'add_ie': ['TwitterBroadcast'],
732 'skip': 'Broadcast no longer exists',
735 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
737 'id': '1349774757969989634',
738 'display_id': '1349794411333394432',
740 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
741 'thumbnail': r
're:^https?://.*\.jpg',
742 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
743 'uploader': 'Brooklyn Nets',
744 'uploader_id': 'BrooklynNets',
746 'timestamp': 1610651040,
747 'upload_date': '20210114',
748 'uploader_url': 'https://twitter.com/BrooklynNets',
749 'comment_count': int,
754 '_old_archive_ids': ['twitter 1349794411333394432'],
757 'skip_download': True,
760 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
762 'id': '1577855447914409984',
763 'display_id': '1577855540407197696',
765 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
766 'description': 'md5:b9c3699335447391d11753ab21c70a74',
767 'upload_date': '20221006',
768 'uploader': 'oshtru',
769 'uploader_id': 'oshtru',
770 'uploader_url': 'https://twitter.com/oshtru',
771 'thumbnail': r
're:^https?://.*\.jpg',
773 'timestamp': 1665025050,
774 'comment_count': int,
779 '_old_archive_ids': ['twitter 1577855540407197696'],
781 'params': {'skip_download': True}
,
783 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
785 'id': '1577719286659006464',
786 'title': 'Ultima - Test',
787 'description': 'Test https://t.co/Y3KEZD7Dad',
788 'uploader': 'Ultima',
789 'uploader_id': 'UltimaShadowX',
790 'uploader_url': 'https://twitter.com/UltimaShadowX',
791 'upload_date': '20221005',
792 'timestamp': 1664992565,
793 'comment_count': int,
800 'params': {'skip_download': True}
,
802 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
804 'id': '1575559336759263233',
805 'display_id': '1575560063510810624',
807 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
808 'thumbnail': r
're:^https?://.*\.jpg',
809 'description': 'md5:95aea692fda36a12081b9629b02daa92',
810 'uploader': 'Max Olson',
811 'uploader_id': 'MesoMax919',
812 'uploader_url': 'https://twitter.com/MesoMax919',
814 'timestamp': 1664477766,
815 'upload_date': '20220929',
816 'comment_count': int,
819 'tags': ['HurricaneIan'],
821 '_old_archive_ids': ['twitter 1575560063510810624'],
824 # Adult content, fails if not logged in
825 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
827 'id': '1575199163847000068',
828 'display_id': '1575199173472927762',
833 'uploader_id': 'Rizdraws',
834 'uploader_url': 'https://twitter.com/Rizdraws',
835 'upload_date': '20220928',
836 'timestamp': 1664391723,
837 'thumbnail': r
're:^https?://.+\.jpg',
840 'comment_count': int,
844 'params': {'skip_download': 'The media could not be played'}
,
845 'skip': 'Requires authentication',
847 # Playlist result only with graphql API
848 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
849 'playlist_mincount': 2,
851 'id': '1395079556562706435',
856 'upload_date': '20210519',
859 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
860 'uploader_id': 'Srirachachau',
861 'comment_count': int,
862 'uploader_url': 'https://twitter.com/Srirachachau',
863 'timestamp': 1621447860,
866 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
867 'playlist_mincount': 2,
869 'id': '1578353380363501568',
871 'uploader_id': 'DavidToons_',
875 'timestamp': 1665143744,
876 'uploader_url': 'https://twitter.com/DavidToons_',
877 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
879 'comment_count': int,
880 'upload_date': '20221007',
884 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
887 'id': '1578401165338976258',
889 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
891 'uploader_id': 'primevideouk',
892 'timestamp': 1665155137,
893 'upload_date': '20221007',
895 'uploader_url': 'https://twitter.com/primevideouk',
896 'comment_count': int,
899 'tags': ['TheRingsOfPower'],
903 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
905 'id': '1lPJqmBeeNAJb',
907 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
908 'uploader': r
're:Monique Camarra.+?',
909 'uploader_id': 'MoniqueCamarra',
910 'live_status': 'was_live',
911 'release_timestamp': 1658417414,
912 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
913 'timestamp': 1658407771,
914 'release_date': '20220721',
915 'upload_date': '20220721',
917 'add_ie': ['TwitterSpaces'],
918 'params': {'skip_download': 'm3u8'}
,
919 'skip': 'Requires authentication',
921 # URL specifies video number but --yes-playlist
922 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
923 'playlist_mincount': 2,
925 'id': '1600649710662213632',
926 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
927 'timestamp': 1670459604.0,
928 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
929 'comment_count': int,
930 'uploader_id': 'CTVJLaidlaw',
932 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
933 'upload_date': '20221208',
935 'uploader': 'Jocelyn Laidlaw',
936 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
940 # URL specifies video number and --no-playlist
941 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
943 'id': '1600649511827013632',
945 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
946 'thumbnail': r
're:^https?://.+\.jpg',
947 'timestamp': 1670459604.0,
948 'uploader_id': 'CTVJLaidlaw',
949 'uploader': 'Jocelyn Laidlaw',
951 'comment_count': int,
952 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
954 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
955 'display_id': '1600649710662213632',
957 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
958 'upload_date': '20221208',
960 '_old_archive_ids': ['twitter 1600649710662213632'],
962 'params': {'noplaylist': True}
,
964 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
965 # note the id different between extraction and url
966 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
968 'id': '1621117577354424321',
969 'display_id': '1621117700482416640',
971 'title': '뽀 - 아 최우제 이동속도 봐',
972 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
975 'uploader_id': 's2FAKER',
976 'uploader_url': 'https://twitter.com/s2FAKER',
977 'upload_date': '20230202',
978 'timestamp': 1675339553.0,
979 'thumbnail': r
're:https?://pbs\.twimg\.com/.+',
984 'comment_count': int,
985 '_old_archive_ids': ['twitter 1621117700482416640'],
988 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
990 'id': '1599108643743473680',
991 'display_id': '1599108751385972737',
993 'title': '\u06ea - \U0001F48B',
994 'uploader_url': 'https://twitter.com/hlo_again',
996 'uploader_id': 'hlo_again',
997 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
1000 'comment_count': int,
1001 'upload_date': '20221203',
1003 'timestamp': 1670092210.0,
1005 'uploader': '\u06ea',
1006 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
1007 '_old_archive_ids': ['twitter 1599108751385972737'],
1009 'params': {'noplaylist': True}
,
1011 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
1013 'id': '1600009362759733248',
1014 'display_id': '1600009574919962625',
1016 'uploader_url': 'https://twitter.com/MunTheShinobi',
1017 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1018 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1021 'repost_count': int,
1022 'upload_date': '20221206',
1023 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1024 'comment_count': int,
1027 'uploader_id': 'MunTheShinobi',
1028 'duration': 139.987,
1029 'timestamp': 1670306984.0,
1030 '_old_archive_ids': ['twitter 1600009574919962625'],
1033 # retweeted_status (private)
1034 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1036 'id': '1623274794488659969',
1037 'display_id': '1623739803874349067',
1039 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1040 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1041 'uploader': 'Johnny Bullets',
1042 'uploader_id': 'Johnnybull3ts',
1043 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1047 'timestamp': 1675853859.0,
1048 'upload_date': '20230208',
1049 'thumbnail': r
're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1051 'repost_count': int,
1053 'skip': 'Protected tweet',
1056 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1058 'id': '1694928337846538240',
1060 'display_id': '1695424220702888009',
1061 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1062 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1063 'uploader': 'Benny Johnson',
1064 'uploader_id': 'bennyjohnson',
1065 'uploader_url': 'https://twitter.com/bennyjohnson',
1069 'timestamp': 1692962814.0,
1070 'upload_date': '20230825',
1071 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1073 'repost_count': int,
1074 'comment_count': int,
1075 '_old_archive_ids': ['twitter 1695424220702888009'],
1078 # retweeted_status w/ legacy API
1079 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1081 'id': '1694928337846538240',
1083 'display_id': '1695424220702888009',
1084 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1085 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1086 'uploader': 'Benny Johnson',
1087 'uploader_id': 'bennyjohnson',
1088 'uploader_url': 'https://twitter.com/bennyjohnson',
1092 'timestamp': 1692962814.0,
1093 'upload_date': '20230825',
1094 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1096 'repost_count': int,
1097 '_old_archive_ids': ['twitter 1695424220702888009'],
1099 'params': {'extractor_args': {'twitter': {'api': ['legacy']}
}},
1101 # Broadcast embedded in tweet
1102 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1104 'id': '1rmxPMjLzAXKN',
1106 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1107 'uploader': 'Jessica Dobson',
1108 'uploader_id': 'JessicaDobsonWX',
1109 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1110 'timestamp': 1701566398,
1111 'upload_date': '20231203',
1112 'live_status': 'was_live',
1113 'thumbnail': r
're:https://[^/]+pscp\.tv/.+\.jpg',
1114 'concurrent_view_count': int,
1117 'add_ie': ['TwitterBroadcast'],
1119 # Animated gif and quote tweet video, with syndication API
1120 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1121 'playlist_mincount': 2,
1123 'id': '1696256659889565950',
1124 'title': 'BAKOON - https://t.co/zom968d0a0',
1125 'description': 'https://t.co/zom968d0a0',
1127 'uploader': 'BAKOON',
1128 'uploader_id': 'BAKKOOONN',
1129 'uploader_url': 'https://twitter.com/BAKKOOONN',
1131 'timestamp': 1693254077.0,
1132 'upload_date': '20230828',
1135 'params': {'extractor_args': {'twitter': {'api': ['syndication']}
}},
1136 'expected_warnings': ['Not all metadata'],
1138 # "stale tweet" with typename "TweetWithVisibilityResults"
1139 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1140 'md5': '62b1e11cdc2cdd0e527f83adb081f536',
1142 'id': '1724883339285544960',
1144 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1145 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1146 'display_id': '1724884212803834154',
1147 'uploader': 'Robert F. Kennedy Jr',
1148 'uploader_id': 'RobertKennedyJr',
1149 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1150 'upload_date': '20231115',
1151 'timestamp': 1700079417.0,
1152 'duration': 341.048,
1153 'thumbnail': r
're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1154 'tags': ['Kennedy24'],
1155 'repost_count': int,
1157 'comment_count': int,
1159 '_old_archive_ids': ['twitter 1724884212803834154'],
1163 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1164 'only_matching': True,
1167 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1168 'only_matching': True,
1170 # promo_video_website card
1171 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1172 'only_matching': True,
1174 # promo_video_convo card
1175 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1176 'only_matching': True,
1179 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1180 'only_matching': True,
1182 # video_direct_message card
1183 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1184 'only_matching': True,
1186 # poll2choice_video card
1187 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1188 'only_matching': True,
1190 # poll3choice_video card
1191 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1192 'only_matching': True,
1194 # poll4choice_video card
1195 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1196 'only_matching': True,
1199 _MEDIA_ID_RE
= re
.compile(r
'_video/(\d+)/')
1202 def _GRAPHQL_ENDPOINT(self
):
1203 if self
.is_logged_in
:
1204 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1205 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1207 def _graphql_to_legacy(self
, data
, twid
):
1208 result
= traverse_obj(data
, (
1209 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1210 lambda _
, v
: v
['entryId'] == f
'tweet-{twid}', 'content', 'itemContent',
1211 'tweet_results', 'result', ('tweet', None), {dict}
,
1212 ), default
={}, get_all
=False) if self
.is_logged_in
else traverse_obj(
1213 data
, ('tweetResult', 'result', {dict}
), default
={})
1215 typename
= result
.get('__typename')
1216 if typename
not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1217 self
.report_warning(f
'Unknown typename: {typename}', twid
, only_once
=True)
1219 if 'tombstone' in result
:
1220 cause
= remove_end(traverse_obj(result
, ('tombstone', 'text', 'text', {str}
)), '. Learn more')
1221 raise ExtractorError(f
'Twitter API says: {cause or "Unknown error"}', expected
=True)
1222 elif typename
== 'TweetUnavailable':
1223 reason
= result
.get('reason')
1224 if reason
== 'NsfwLoggedOut':
1225 self
.raise_login_required('NSFW tweet requires authentication')
1226 elif reason
== 'Protected':
1227 self
.raise_login_required('You are not authorized to view this protected tweet')
1228 raise ExtractorError(reason
or 'Requested tweet is unavailable', expected
=True)
1229 # Result for "stale tweet" needs additional transformation
1230 elif typename
== 'TweetWithVisibilityResults':
1231 result
= traverse_obj(result
, ('tweet', {dict}
)) or {}
1233 status
= result
.get('legacy', {})
1234 status
.update(traverse_obj(result
, {
1235 'user': ('core', 'user_results', 'result', 'legacy'),
1236 'card': ('card', 'legacy'),
1237 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1238 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1239 }, expected_type
=dict, default
={}))
1241 # extra transformations needed since result does not match legacy format
1242 if status
.get('retweeted_status'):
1243 status
['retweeted_status']['user'] = traverse_obj(status
, (
1244 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict}
)) or {}
1247 binding_value
.get('key'): binding_value
.get('value')
1248 for binding_value
in traverse_obj(status
, ('card', 'binding_values', ..., {dict}
))
1251 status
['card']['binding_values'] = binding_values
1255 def _build_graphql_query(self
, media_id
):
1258 'focalTweetId': media_id
,
1259 'includePromotedContent': True,
1260 'with_rux_injections': False,
1261 'withBirdwatchNotes': True,
1262 'withCommunity': True,
1263 'withDownvotePerspective': False,
1264 'withQuickPromoteEligibilityTweetFields': True,
1265 'withReactionsMetadata': False,
1266 'withReactionsPerspective': False,
1267 'withSuperFollowsTweetFields': True,
1268 'withSuperFollowsUserFields': True,
1269 'withV2Timeline': True,
1273 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1274 'interactive_text_enabled': True,
1275 'responsive_web_edit_tweet_api_enabled': True,
1276 'responsive_web_enhance_cards_enabled': True,
1277 'responsive_web_graphql_timeline_navigation_enabled': False,
1278 'responsive_web_text_conversations_enabled': False,
1279 'responsive_web_uc_gql_enabled': True,
1280 'standardized_nudges_misinfo': True,
1281 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1282 'tweetypie_unmention_optimization_enabled': True,
1283 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1284 'verified_phone_label_enabled': False,
1285 'vibe_api_enabled': True,
1287 } if self
.is_logged_in
else {
1289 'tweetId': media_id
,
1290 'withCommunity': False,
1291 'includePromotedContent': False,
1295 'creator_subscriptions_tweet_preview_api_enabled': True,
1296 'tweetypie_unmention_optimization_enabled': True,
1297 'responsive_web_edit_tweet_api_enabled': True,
1298 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1299 'view_counts_everywhere_api_enabled': True,
1300 'longform_notetweets_consumption_enabled': True,
1301 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1302 'tweet_awards_web_tipping_enabled': False,
1303 'freedom_of_speech_not_reach_fetch_enabled': True,
1304 'standardized_nudges_misinfo': True,
1305 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1306 'longform_notetweets_rich_text_read_enabled': True,
1307 'longform_notetweets_inline_media_enabled': True,
1308 'responsive_web_graphql_exclude_directive_enabled': True,
1309 'verified_phone_label_enabled': False,
1310 'responsive_web_media_download_video_enabled': False,
1311 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1312 'responsive_web_graphql_timeline_navigation_enabled': True,
1313 'responsive_web_enhance_cards_enabled': False
1316 'withArticleRichContentState': False
1320 def _extract_status(self
, twid
):
1321 if self
.is_logged_in
or self
._selected
_api
== 'graphql':
1322 status
= self
._graphql
_to
_legacy
(self
._call
_graphql
_api
(self
._GRAPHQL
_ENDPOINT
, twid
), twid
)
1324 elif self
._selected
_api
== 'legacy':
1325 status
= self
._call
_api
(f
'statuses/show/{twid}.json', twid
, {
1326 'cards_platform': 'Web-12',
1328 'include_reply_count': 1,
1329 'include_user_entities': 0,
1330 'tweet_mode': 'extended',
1333 elif self
._selected
_api
== 'syndication':
1334 self
.report_warning(
1335 'Not all metadata or media is available via syndication endpoint', twid
, only_once
=True)
1336 status
= self
._download
_json
(
1337 'https://cdn.syndication.twimg.com/tweet-result', twid
, 'Downloading syndication JSON',
1338 headers
={'User-Agent': 'Googlebot'}
, query
={
1340 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1341 'token': ''.join(random
.choices('123456789abcdefghijklmnopqrstuvwxyz', k
=10)),
1344 raise ExtractorError('Syndication endpoint returned empty JSON response')
1345 # Transform the result so its structure matches that of legacy/graphql
1347 for detail
in traverse_obj(status
, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict}
)):
1348 detail
['id_str'] = traverse_obj(detail
, (
1349 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}
, 1), get_all
=False) or twid
1350 media
.append(detail
)
1351 status
['extended_entities'] = {'media': media}
1354 raise ExtractorError(f
'"{self._selected_api}" is not a valid API selection', expected
=True)
1356 return traverse_obj(status
, 'retweeted_status', None, expected_type
=dict) or {}
1358 def _real_extract(self
, url
):
1359 twid
, selected_index
= self
._match
_valid
_url
(url
).group('id', 'index')
1360 status
= self
._extract
_status
(twid
)
1362 title
= description
= traverse_obj(
1363 status
, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}
), get_all
=False) or ''
1364 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1365 title
= re
.sub(r
'\s+(https?://[^ ]+)', '', title
)
1366 user
= status
.get('user') or {}
1367 uploader
= user
.get('name')
1369 title
= f
'{uploader} - {title}'
1370 uploader_id
= user
.get('screen_name')
1375 'description': description
,
1376 'uploader': uploader
,
1377 'timestamp': unified_timestamp(status
.get('created_at')),
1378 'uploader_id': uploader_id
,
1379 'uploader_url': format_field(uploader_id
, None, 'https://twitter.com/%s'),
1380 'like_count': int_or_none(status
.get('favorite_count')),
1381 'repost_count': int_or_none(status
.get('retweet_count')),
1382 'comment_count': int_or_none(status
.get('reply_count')),
1383 'age_limit': 18 if status
.get('possibly_sensitive') else 0,
1384 'tags': traverse_obj(status
, ('entities', 'hashtags', ..., 'text')),
1387 def extract_from_video_info(media
):
1388 media_id
= traverse_obj(media
, 'id_str', 'id', expected_type
=str_or_none
)
1389 self
.write_debug(f
'Extracting from video info: {media_id}')
1393 for variant
in traverse_obj(media
, ('video_info', 'variants', ...)):
1394 fmts
, subs
= self
._extract
_variant
_formats
(variant
, twid
)
1395 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
1396 formats
.extend(fmts
)
1399 media_url
= media
.get('media_url_https') or media
.get('media_url')
1401 def add_thumbnail(name
, size
):
1404 'url': update_url_query(media_url
, {'name': name}
),
1405 'width': int_or_none(size
.get('w') or size
.get('width')),
1406 'height': int_or_none(size
.get('h') or size
.get('height')),
1408 for name
, size
in media
.get('sizes', {}).items():
1409 add_thumbnail(name
, size
)
1410 add_thumbnail('orig', media
.get('original_info') or {})
1415 'subtitles': subtitles
,
1416 'thumbnails': thumbnails
,
1417 'view_count': traverse_obj(media
, ('mediaStats', 'viewCount', {int_or_none}
)), # No longer available
1418 'duration': float_or_none(traverse_obj(media
, ('video_info', 'duration_millis')), 1000),
1419 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1420 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1423 def extract_from_card_info(card
):
1427 self
.write_debug(f
'Extracting from card info: {card.get("url")}')
1428 binding_values
= card
['binding_values']
1430 def get_binding_value(k
):
1431 o
= binding_values
.get(k
) or {}
1432 return try_get(o
, lambda x
: x
[x
['type'].lower() + '_value'])
1434 card_name
= card
['name'].split(':')[-1]
1435 if card_name
== 'player':
1438 'url': get_binding_value('player_url'),
1440 elif card_name
== 'periscope_broadcast':
1443 'url': get_binding_value('url') or get_binding_value('player_url'),
1444 'ie_key': PeriscopeIE
.ie_key(),
1446 elif card_name
== 'broadcast':
1449 'url': get_binding_value('broadcast_url'),
1450 'ie_key': TwitterBroadcastIE
.ie_key(),
1452 elif card_name
== 'audiospace':
1455 'url': f
'https://twitter.com/i/spaces/{get_binding_value("id")}',
1456 'ie_key': TwitterSpacesIE
.ie_key(),
1458 elif card_name
== 'summary':
1461 'url': get_binding_value('card_url'),
1463 elif card_name
== 'unified_card':
1464 unified_card
= self
._parse
_json
(get_binding_value('unified_card'), twid
)
1465 yield from map(extract_from_video_info
, traverse_obj(
1466 unified_card
, ('media_entities', ...), expected_type
=dict))
1467 # amplify, promo_video_website, promo_video_convo, appplayer,
1468 # video_direct_message, poll2choice_video, poll3choice_video,
1469 # poll4choice_video, ...
1471 is_amplify
= card_name
== 'amplify'
1472 vmap_url
= get_binding_value('amplify_url_vmap') if is_amplify
else get_binding_value('player_stream_url')
1473 content_id
= get_binding_value('%s_content_id' % (card_name
if is_amplify
else 'player'))
1474 formats
, subtitles
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, content_id
or twid
)
1477 for suffix
in ('_small', '', '_large', '_x_large', '_original'):
1478 image
= get_binding_value('player_image' + suffix
) or {}
1479 image_url
= image
.get('url')
1480 if not image_url
or '/player-placeholder' in image_url
:
1483 'id': suffix
[1:] if suffix
else 'medium',
1485 'width': int_or_none(image
.get('width')),
1486 'height': int_or_none(image
.get('height')),
1491 'subtitles': subtitles
,
1492 'thumbnails': thumbnails
,
1493 'duration': int_or_none(get_binding_value(
1494 'content_duration_seconds')),
1497 videos
= traverse_obj(status
, (
1498 (None, 'quoted_status'), 'extended_entities', 'media', lambda _
, m
: m
['type'] != 'photo', {dict}
))
1500 if self
._yes
_playlist
(twid
, selected_index
, video_label
='URL-specified video number'):
1501 selected_entries
= (*map(extract_from_video_info
, videos
), *extract_from_card_info(status
.get('card')))
1503 desired_obj
= traverse_obj(status
, (
1504 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index
) - 1, {dict}
), get_all
=False)
1506 raise ExtractorError(f
'Video #{selected_index} is unavailable', expected
=True)
1507 elif desired_obj
.get('type') != 'video':
1508 raise ExtractorError(f
'Media #{selected_index} is not a video', expected
=True)
1510 # Restore original archive id and video index in title
1511 for index
, entry
in enumerate(videos
, 1):
1512 if entry
.get('id') != desired_obj
.get('id'):
1515 info
['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1516 if len(videos
) != 1:
1517 info
['title'] += f
' #{index}'
1520 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1522 entries
= [{**info, **data, 'display_id': twid}
for data
in selected_entries
]
1524 expanded_url
= traverse_obj(status
, ('entities', 'urls', 0, 'expanded_url'), expected_type
=url_or_none
)
1525 if not expanded_url
or expanded_url
== url
:
1526 self
.raise_no_formats('No video could be found in this tweet', expected
=True)
1529 return self
.url_result(expanded_url
, display_id
=twid
, **info
)
1531 entries
[0]['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1533 if len(entries
) == 1:
1536 for index
, entry
in enumerate(entries
, 1):
1537 entry
['title'] += f
' #{index}'
1539 return self
.playlist_result(entries
, **info
)
1542 class TwitterAmplifyIE(TwitterBaseIE
):
1543 IE_NAME
= 'twitter:amplify'
1544 _VALID_URL
= r
'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1547 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1548 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1550 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1552 'title': 'Twitter Video',
1553 'thumbnail': 're:^https?://.*',
1555 'params': {'format': '[protocol=https]'}
,
1558 def _real_extract(self
, url
):
1559 video_id
= self
._match
_id
(url
)
1560 webpage
= self
._download
_webpage
(url
, video_id
)
1562 vmap_url
= self
._html
_search
_meta
(
1563 'twitter:amplify:vmap', webpage
, 'vmap url')
1564 formats
, _
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, video_id
)
1567 thumbnail
= self
._html
_search
_meta
(
1568 'twitter:image:src', webpage
, 'thumbnail', fatal
=False)
1570 def _find_dimension(target
):
1571 w
= int_or_none(self
._html
_search
_meta
(
1572 'twitter:%s:width' % target
, webpage
, fatal
=False))
1573 h
= int_or_none(self
._html
_search
_meta
(
1574 'twitter:%s:height' % target
, webpage
, fatal
=False))
1578 thumbnail_w
, thumbnail_h
= _find_dimension('image')
1581 'width': thumbnail_w
,
1582 'height': thumbnail_h
,
1585 video_w
, video_h
= _find_dimension('player')
1593 'title': 'Twitter Video',
1595 'thumbnails': thumbnails
,
1599 class TwitterBroadcastIE(TwitterBaseIE
, PeriscopeBaseIE
):
1600 IE_NAME
= 'twitter:broadcast'
1601 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1604 # untitled Periscope video
1605 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1607 'id': '1yNGaQLWpejGj',
1609 'title': 'Andrea May Sahouri - Periscope Broadcast',
1610 'uploader': 'Andrea May Sahouri',
1611 'uploader_id': 'andreamsahouri',
1612 'uploader_url': 'https://twitter.com/andreamsahouri',
1613 'timestamp': 1590973638,
1614 'upload_date': '20200601',
1615 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1619 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1621 'id': '1ZkKzeyrPbaxv',
1623 'title': 'Starship | SN10 | High-Altitude Flight Test',
1624 'uploader': 'SpaceX',
1625 'uploader_id': 'SpaceX',
1626 'uploader_url': 'https://twitter.com/SpaceX',
1627 'timestamp': 1614812942,
1628 'upload_date': '20210303',
1629 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1633 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1635 'id': '1OyKAVQrgzwGb',
1637 'title': 'Starship Flight Test',
1638 'uploader': 'SpaceX',
1639 'uploader_id': 'SpaceX',
1640 'uploader_url': 'https://twitter.com/SpaceX',
1641 'timestamp': 1681993964,
1642 'upload_date': '20230420',
1643 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1648 def _real_extract(self
, url
):
1649 broadcast_id
= self
._match
_id
(url
)
1650 broadcast
= self
._call
_api
(
1651 'broadcasts/show.json', broadcast_id
,
1652 {'ids': broadcast_id}
)['broadcasts'][broadcast_id
]
1654 raise ExtractorError('Broadcast no longer exists', expected
=True)
1655 info
= self
._parse
_broadcast
_data
(broadcast
, broadcast_id
)
1656 info
['title'] = broadcast
.get('status') or info
.get('title')
1657 info
['uploader_id'] = broadcast
.get('twitter_username') or info
.get('uploader_id')
1658 info
['uploader_url'] = format_field(broadcast
, 'twitter_username', 'https://twitter.com/%s', default
=None)
1659 if info
['live_status'] == 'is_upcoming':
1662 media_key
= broadcast
['media_key']
1663 source
= self
._call
_api
(
1664 f
'live_video_stream/status/{media_key}', media_key
)['source']
1665 m3u8_url
= source
.get('noRedirectPlaybackUrl') or source
['location']
1666 if '/live_video_stream/geoblocked/' in m3u8_url
:
1667 self
.raise_geo_restricted()
1668 m3u8_id
= compat_parse_qs(compat_urllib_parse_urlparse(
1669 m3u8_url
).query
).get('type', [None])[0]
1670 state
, width
, height
= self
._extract
_common
_format
_info
(broadcast
)
1671 info
['formats'] = self
._extract
_pscp
_m
3u8_formats
(
1672 m3u8_url
, broadcast_id
, m3u8_id
, state
, width
, height
)
1676 class TwitterSpacesIE(TwitterBaseIE
):
1677 IE_NAME
= 'twitter:spaces'
1678 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1681 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1683 'id': '1RDxlgyvNXzJL',
1685 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1686 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1687 'uploader': r
're:Lucio Di Gaetano.*?',
1688 'uploader_id': 'luciodigaetano',
1689 'live_status': 'was_live',
1690 'timestamp': 1659877956,
1691 'upload_date': '20220807',
1692 'release_timestamp': 1659904215,
1693 'release_date': '20220807',
1695 'params': {'skip_download': 'm3u8'}
,
1697 # post_live/TimedOut but downloadable
1698 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1700 'id': '1vAxRAVQWONJl',
1702 'title': 'Framing Up FinOps: Billing Tools',
1703 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1704 'uploader': 'Google Cloud',
1705 'uploader_id': 'googlecloud',
1706 'live_status': 'post_live',
1707 'timestamp': 1681409554,
1708 'upload_date': '20230413',
1709 'release_timestamp': 1681839000,
1710 'release_date': '20230418',
1712 'params': {'skip_download': 'm3u8'}
,
1714 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1715 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1717 'id': '1eaKbrQbjoRKX',
1720 'description': 'Twitter Space participated by nobody yet',
1721 'uploader': '息根とめる🔪Twitchで復活',
1722 'uploader_id': 'tomeru_ikinone',
1723 'live_status': 'was_live',
1724 'timestamp': 1685617198,
1725 'upload_date': '20230601',
1727 'params': {'skip_download': 'm3u8'}
,
1731 'notstarted': 'is_upcoming',
1732 'ended': 'was_live',
1733 'running': 'is_live',
1734 'timedout': 'post_live',
1737 def _build_graphql_query(self
, space_id
):
1741 'isMetatagsQuery': True,
1742 'withDownvotePerspective': False,
1743 'withReactionsMetadata': False,
1744 'withReactionsPerspective': False,
1745 'withReplays': True,
1746 'withSuperFollowsUserFields': True,
1747 'withSuperFollowsTweetFields': True,
1750 'dont_mention_me_view_api_enabled': True,
1751 'interactive_text_enabled': True,
1752 'responsive_web_edit_tweet_api_enabled': True,
1753 'responsive_web_enhance_cards_enabled': True,
1754 'responsive_web_uc_gql_enabled': True,
1755 'spaces_2022_h2_clipping': True,
1756 'spaces_2022_h2_spaces_communities': False,
1757 'standardized_nudges_misinfo': True,
1758 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1759 'vibe_api_enabled': True,
1763 def _real_extract(self
, url
):
1764 space_id
= self
._match
_id
(url
)
1765 if not self
.is_logged_in
:
1766 self
.raise_login_required('Twitter Spaces require authentication')
1767 space_data
= self
._call
_graphql
_api
('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id
)['audioSpace']
1769 raise ExtractorError('Twitter Space not found', expected
=True)
1771 metadata
= space_data
['metadata']
1772 live_status
= try_call(lambda: self
.SPACE_STATUS
[metadata
['state'].lower()])
1773 is_live
= live_status
== 'is_live'
1776 headers
= {'Referer': 'https://twitter.com/'}
1777 if live_status
== 'is_upcoming':
1778 self
.raise_no_formats('Twitter Space not started yet', expected
=True)
1779 elif not is_live
and not metadata
.get('is_space_available_for_replay'):
1780 self
.raise_no_formats('Twitter Space ended and replay is disabled', expected
=True)
1781 elif metadata
.get('media_key'):
1782 source
= traverse_obj(
1783 self
._call
_api
(f
'live_video_stream/status/{metadata["media_key"]}', metadata
['media_key']),
1784 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}
), get_all
=False)
1785 formats
= self
._extract
_m
3u8_formats
( # XXX: Some Spaces need ffmpeg as downloader
1786 source
, metadata
['media_key'], 'm4a', entry_protocol
='m3u8', live
=is_live
,
1787 headers
=headers
, fatal
=False) if source
else []
1789 fmt
.update({'vcodec': 'none', 'acodec': 'aac'}
)
1791 fmt
['container'] = 'm4a_dash'
1793 participants
= ', '.join(traverse_obj(
1794 space_data
, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1796 if not formats
and live_status
== 'post_live':
1797 self
.raise_no_formats('Twitter Space ended but not downloadable yet', expected
=True)
1801 'title': metadata
.get('title'),
1802 'description': f
'Twitter Space participated by {participants}',
1803 'uploader': traverse_obj(
1804 metadata
, ('creator_results', 'result', 'legacy', 'name')),
1805 'uploader_id': traverse_obj(
1806 metadata
, ('creator_results', 'result', 'legacy', 'screen_name')),
1807 'live_status': live_status
,
1808 'release_timestamp': try_call(
1809 lambda: int_or_none(metadata
['scheduled_start'], scale
=1000)),
1810 'timestamp': int_or_none(metadata
.get('created_at'), scale
=1000),
1812 'http_headers': headers
,
1816 class TwitterShortenerIE(TwitterBaseIE
):
1817 IE_NAME
= 'twitter:shortener'
1818 _VALID_URL
= r
'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1819 _BASE_URL
= 'https://t.co/'
1821 def _real_extract(self
, url
):
1822 mobj
= self
._match
_valid
_url
(url
)
1823 eid
, id = mobj
.group('eid', 'id')
1826 url
= self
._BASE
_URL
+ id
1827 new_url
= self
._request
_webpage
(url
, id, headers
={'User-Agent': 'curl'}
).url
1828 __UNSAFE_LINK
= "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1829 if new_url
.startswith(__UNSAFE_LINK
):
1830 new_url
= new_url
.replace(__UNSAFE_LINK
, "")
1831 return self
.url_result(new_url
)