4 from .common
import InfoExtractor
5 from .periscope
import PeriscopeBaseIE
, PeriscopeIE
6 from ..compat
import functools
# isort: split
9 compat_urllib_parse_unquote
,
10 compat_urllib_parse_urlparse
,
32 class TwitterBaseIE(InfoExtractor
):
33 _API_BASE
= 'https://api.twitter.com/1.1/'
34 _GRAPHQL_API_BASE
= 'https://twitter.com/i/api/graphql/'
35 _BASE_REGEX
= r
'https?://(?:(?:www|m(?:obile)?)\.)?(?:twitter\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
36 _AUTH
= {'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'}
39 def _extract_variant_formats(self
, variant
, video_id
):
40 variant_url
= variant
.get('url')
43 elif '.m3u8' in variant_url
:
44 return self
._extract
_m
3u8_formats
_and
_subtitles
(
45 variant_url
, video_id
, 'mp4', 'm3u8_native',
46 m3u8_id
='hls', fatal
=False)
48 tbr
= int_or_none(dict_get(variant
, ('bitrate', 'bit_rate')), 1000) or None
51 'format_id': 'http' + ('-%d' % tbr
if tbr
else ''),
54 self
._search
_dimensions
_in
_video
_url
(f
, variant_url
)
57 def _extract_formats_from_vmap_url(self
, vmap_url
, video_id
):
58 vmap_url
= url_or_none(vmap_url
)
61 vmap_data
= self
._download
_xml
(vmap_url
, video_id
)
65 for video_variant
in vmap_data
.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
66 video_variant
.attrib
['url'] = compat_urllib_parse_unquote(
67 video_variant
.attrib
['url'])
68 urls
.append(video_variant
.attrib
['url'])
69 fmts
, subs
= self
._extract
_variant
_formats
(
70 video_variant
.attrib
, video_id
)
72 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
73 video_url
= strip_or_none(xpath_text(vmap_data
, './/MediaFile'))
74 if video_url
not in urls
:
75 fmts
, subs
= self
._extract
_variant
_formats
({'url': video_url}
, video_id
)
77 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
78 return formats
, subtitles
81 def _search_dimensions_in_video_url(a_format
, video_url
):
82 m
= re
.search(r
'/(?P<width>\d+)x(?P<height>\d+)/', video_url
)
85 'width': int(m
.group('width')),
86 'height': int(m
.group('height')),
89 @functools.cached_property
90 def is_logged_in(self
):
91 return bool(self
._get
_cookies
(self
._API
_BASE
).get('auth_token'))
93 def _call_api(self
, path
, video_id
, query
={}, graphql
=False):
94 cookies
= self
._get
_cookies
(self
._API
_BASE
)
95 headers
= self
._AUTH
.copy()
97 csrf_cookie
= cookies
.get('ct0')
99 headers
['x-csrf-token'] = csrf_cookie
.value
101 if self
.is_logged_in
:
103 'x-twitter-auth-type': 'OAuth2Session',
104 'x-twitter-client-language': 'en',
105 'x-twitter-active-user': 'yes',
108 for first_attempt
in (True, False):
109 if not self
.is_logged_in
and not self
._guest
_token
:
110 headers
.pop('x-guest-token', None)
111 self
._guest
_token
= traverse_obj(self
._download
_json
(
112 f
'{self._API_BASE}guest/activate.json', video_id
,
113 'Downloading guest token', data
=b
'', headers
=headers
), 'guest_token')
114 if self
._guest
_token
:
115 headers
['x-guest-token'] = self
._guest
_token
116 elif not self
.is_logged_in
:
117 raise ExtractorError('Could not retrieve guest token')
119 allowed_status
= {400, 401, 403, 404}
if graphql
else {403}
120 result
= self
._download
_json
(
121 (self
._GRAPHQL
_API
_BASE
if graphql
else self
._API
_BASE
) + path
,
122 video_id
, headers
=headers
, query
=query
, expected_status
=allowed_status
,
123 note
=f
'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
125 if result
.get('errors'):
126 errors
= ', '.join(set(traverse_obj(result
, ('errors', ..., 'message', {str}
))))
127 if not self
.is_logged_in
and first_attempt
and 'bad guest token' in errors
.lower():
128 self
.to_screen('Guest token has expired. Refreshing guest token')
129 self
._guest
_token
= None
132 raise ExtractorError(
133 f
'Error(s) while querying API: {errors or "Unknown error"}', expected
=True)
137 def _build_graphql_query(self
, media_id
):
138 raise NotImplementedError('Method must be implemented to support GraphQL')
140 def _call_graphql_api(self
, endpoint
, media_id
):
141 data
= self
._build
_graphql
_query
(media_id
)
142 query
= {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
143 return traverse_obj(self
._call
_api
(endpoint
, media_id
, query
=query
, graphql
=True), 'data')
146 class TwitterCardIE(InfoExtractor
):
147 IE_NAME
= 'twitter:card'
148 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
151 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
152 # MD5 checksums are different in different places
154 'id': '560070131976392705',
156 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
157 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
158 'uploader': 'Twitter',
159 'uploader_id': 'Twitter',
160 'thumbnail': r
're:^https?://.*\.jpg',
162 'timestamp': 1422366112,
163 'upload_date': '20150127',
165 'comment_count': int,
169 'display_id': '560070183650213889',
170 'uploader_url': 'https://twitter.com/Twitter',
174 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
175 'md5': '7137eca597f72b9abbe61e5ae0161399',
177 'id': '623160978427936768',
179 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
180 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
182 'uploader_id': 'NASA',
183 'timestamp': 1437408129,
184 'upload_date': '20150720',
185 'uploader_url': 'https://twitter.com/NASA',
187 'comment_count': int,
190 'tags': ['PlutoFlyby'],
192 'params': {'format': '[protocol=https]'}
195 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
196 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
200 'title': 'Ubuntu 11.10 Overview',
201 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
202 'upload_date': '20111013',
203 'uploader': 'OMG! UBUNTU!',
204 'uploader_id': 'omgubuntu',
205 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
206 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
207 'channel_follower_count': int,
208 'chapters': 'count:8',
209 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
211 'categories': ['Film & Animation'],
213 'comment_count': int,
214 'availability': 'public',
216 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
219 'channel': 'OMG! UBUNTU!',
220 'playable_in_embed': True,
222 'add_ie': ['Youtube'],
225 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
229 'upload_date': '20151113',
230 'uploader_id': '1189339351084113920',
231 'uploader': 'ArsenalTerje',
232 'title': 'Vine by ArsenalTerje',
233 'timestamp': 1447451307,
234 'alt_title': 'Vine by ArsenalTerje',
235 'comment_count': int,
237 'thumbnail': r
're:^https?://[^?#]+\.jpg',
242 'params': {'skip_download': 'm3u8'}
,
245 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
246 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
248 'id': '705235433198714880',
250 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
251 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
252 'uploader': 'Brent Yarina',
253 'uploader_id': 'BTNBrentYarina',
254 'timestamp': 1456976204,
255 'upload_date': '20160303',
257 'skip': 'This content is no longer available.',
260 'url': 'https://twitter.com/i/videos/752274308186120192',
261 'only_matching': True,
265 def _real_extract(self
, url
):
266 status_id
= self
._match
_id
(url
)
267 return self
.url_result(
268 'https://twitter.com/statuses/' + status_id
,
269 TwitterIE
.ie_key(), status_id
)
272 class TwitterIE(TwitterBaseIE
):
274 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
277 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
279 'id': '643211870443208704',
280 'display_id': '643211948184596480',
282 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
283 'thumbnail': r
're:^https?://.*\.jpg',
284 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
285 'uploader': 'FREE THE NIPPLE',
286 'uploader_id': 'freethenipple',
288 'timestamp': 1442188653,
289 'upload_date': '20150913',
290 'uploader_url': 'https://twitter.com/freethenipple',
291 'comment_count': int,
299 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
300 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
302 'id': '657991469417025536',
304 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
305 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
306 'thumbnail': r
're:^https?://.*\.png',
308 'uploader_id': 'giphz',
310 'expected_warnings': ['height', 'width'],
311 'skip': 'Account suspended',
313 'url': 'https://twitter.com/starwars/status/665052190608723968',
315 'id': '665052190608723968',
316 'display_id': '665052190608723968',
318 'title': r
're:Star Wars.*A new beginning is coming December 18.*',
319 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
320 'uploader_id': 'starwars',
321 'uploader': r
're:Star Wars.*',
322 'timestamp': 1447395772,
323 'upload_date': '20151113',
324 'uploader_url': 'https://twitter.com/starwars',
325 'comment_count': int,
328 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
332 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
334 'id': '705235433198714880',
336 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
337 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
338 'uploader_id': 'BTNBrentYarina',
339 'uploader': 'Brent Yarina',
340 'timestamp': 1456976204,
341 'upload_date': '20160303',
342 'uploader_url': 'https://twitter.com/BTNBrentYarina',
343 'comment_count': int,
350 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
351 # Test case of TwitterCardIE
352 'skip_download': True,
354 'skip': 'Dead external link',
356 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
358 'id': '700207414000242688',
359 'display_id': '700207533655363584',
361 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
362 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
363 'thumbnail': r
're:^https?://.*\.jpg',
364 'uploader': 'jaydin donte geer',
365 'uploader_id': 'jaydingeer',
367 'timestamp': 1455777459,
368 'upload_date': '20160218',
369 'uploader_url': 'https://twitter.com/jaydingeer',
370 'comment_count': int,
374 'tags': ['Damndaniel'],
378 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
379 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
383 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
384 'uploader': 'TAKUMA',
385 'uploader_id': '1004126642786242560',
386 'timestamp': 1402826626,
387 'upload_date': '20140615',
388 'thumbnail': r
're:^https?://.*\.jpg',
389 'alt_title': 'Vine by TAKUMA',
390 'comment_count': int,
397 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
399 'id': '717462543795523584',
400 'display_id': '719944021058060289',
402 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
403 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
404 'uploader_id': 'CaptainAmerica',
405 'uploader': 'Captain America',
407 'timestamp': 1460483005,
408 'upload_date': '20160412',
409 'uploader_url': 'https://twitter.com/CaptainAmerica',
410 'thumbnail': r
're:^https?://.*\.jpg',
411 'comment_count': int,
419 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
421 'id': '1zqKVVlkqLaKB',
423 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
424 'upload_date': '20160923',
425 'uploader_id': '1PmKqpJdOJQoY',
426 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
427 'timestamp': 1474613214,
428 'thumbnail': r
're:^https?://.*\.jpg',
430 'add_ie': ['Periscope'],
432 # has mp4 formats via mobile API
433 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
435 'id': '852138619213144067',
437 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
438 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
439 'uploader': 'عالم الأخبار',
440 'uploader_id': 'news_al3alm',
442 'timestamp': 1492000653,
443 'upload_date': '20170412',
445 'skip': 'Account suspended',
447 'url': 'https://twitter.com/i/web/status/910031516746514432',
449 'id': '910030238373089285',
450 'display_id': '910031516746514432',
452 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
453 'thumbnail': r
're:^https?://.*\.jpg',
454 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
455 'uploader': 'Préfet de Guadeloupe',
456 'uploader_id': 'Prefet971',
458 'timestamp': 1505803395,
459 'upload_date': '20170919',
460 'uploader_url': 'https://twitter.com/Prefet971',
461 'comment_count': int,
469 'skip_download': True, # requires ffmpeg
472 # card via api.twitter.com/1.1/videos/tweet/config
473 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
475 'id': '1001551417340022785',
476 'display_id': '1001551623938805763',
478 'title': 're:.*?Shep is on a roll today.*?',
479 'thumbnail': r
're:^https?://.*\.jpg',
480 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
481 'uploader': 'Lis Power',
482 'uploader_id': 'LisPower1',
484 'timestamp': 1527623489,
485 'upload_date': '20180529',
486 'uploader_url': 'https://twitter.com/LisPower1',
487 'comment_count': int,
495 'skip_download': True, # requires ffmpeg
498 'url': 'https://twitter.com/foobar/status/1087791357756956680',
500 'id': '1087791272830607360',
501 'display_id': '1087791357756956680',
503 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
504 'thumbnail': r
're:^https?://.*\.jpg',
505 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
506 'uploader': 'Twitter',
507 'uploader_id': 'Twitter',
509 'timestamp': 1548184644,
510 'upload_date': '20190122',
511 'uploader_url': 'https://twitter.com/Twitter',
512 'comment_count': int,
520 # not available in Periscope
521 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
523 'id': '1vOGwqejwoWxB',
525 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
527 'uploader_id': '1eVjYOLGkGrQL',
528 'thumbnail': r
're:^https?://.*\.jpg',
529 'tags': ['EduTECH2019'],
532 'add_ie': ['TwitterBroadcast'],
535 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
537 'id': '1349774757969989634',
538 'display_id': '1349794411333394432',
540 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
541 'thumbnail': r
're:^https?://.*\.jpg',
542 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
543 'uploader': 'Brooklyn Nets',
544 'uploader_id': 'BrooklynNets',
546 'timestamp': 1610651040,
547 'upload_date': '20210114',
548 'uploader_url': 'https://twitter.com/BrooklynNets',
549 'comment_count': int,
556 'skip_download': True,
559 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
561 'id': '1577855447914409984',
562 'display_id': '1577855540407197696',
564 'title': 'md5:9d198efb93557b8f8d5b78c480407214',
565 'description': 'md5:b9c3699335447391d11753ab21c70a74',
566 'upload_date': '20221006',
567 'uploader': 'oshtru',
568 'uploader_id': 'oshtru',
569 'uploader_url': 'https://twitter.com/oshtru',
570 'thumbnail': r
're:^https?://.*\.jpg',
572 'timestamp': 1665025050,
573 'comment_count': int,
580 'params': {'skip_download': True}
,
582 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
584 'id': '1577719286659006464',
585 'title': 'Ultima | #\u0432\u029f\u043c - Test',
586 'description': 'Test https://t.co/Y3KEZD7Dad',
587 'uploader': 'Ultima | #\u0432\u029f\u043c',
588 'uploader_id': 'UltimaShadowX',
589 'uploader_url': 'https://twitter.com/UltimaShadowX',
590 'upload_date': '20221005',
591 'timestamp': 1664992565,
592 'comment_count': int,
599 'params': {'skip_download': True}
,
601 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
603 'id': '1575559336759263233',
604 'display_id': '1575560063510810624',
606 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
607 'thumbnail': r
're:^https?://.*\.jpg',
608 'description': 'md5:95aea692fda36a12081b9629b02daa92',
609 'uploader': 'Max Olson',
610 'uploader_id': 'MesoMax919',
611 'uploader_url': 'https://twitter.com/MesoMax919',
613 'timestamp': 1664477766,
614 'upload_date': '20220929',
615 'comment_count': int,
619 'tags': ['HurricaneIan'],
623 # Adult content, fails if not logged in (GraphQL)
624 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
626 'id': '1575199163847000068',
627 'display_id': '1575199173472927762',
632 'uploader_id': 'Rizdraws',
633 'uploader_url': 'https://twitter.com/Rizdraws',
634 'upload_date': '20220928',
635 'timestamp': 1664391723,
636 'thumbnail': r
're:^https?://.+\.jpg',
639 'comment_count': int,
643 'skip': 'Requires authentication',
645 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
646 'playlist_mincount': 2,
648 'id': '1395079556562706435',
653 'upload_date': '20210519',
656 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
657 'uploader_id': 'Srirachachau',
658 'comment_count': int,
659 'uploader_url': 'https://twitter.com/Srirachachau',
660 'timestamp': 1621447860,
663 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
664 'playlist_mincount': 2,
666 'id': '1578353380363501568',
668 'uploader_id': 'DavidToons_',
672 'timestamp': 1665143744,
673 'uploader_url': 'https://twitter.com/DavidToons_',
674 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
676 'comment_count': int,
677 'upload_date': '20221007',
681 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
684 'id': '1578401165338976258',
686 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
688 'uploader_id': 'primevideouk',
689 'timestamp': 1665155137,
690 'upload_date': '20221007',
692 'uploader_url': 'https://twitter.com/primevideouk',
693 'comment_count': int,
696 'tags': ['TheRingsOfPower'],
700 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
702 'id': '1lPJqmBeeNAJb',
704 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
705 'uploader': r
're:Monique Camarra.+?',
706 'uploader_id': 'MoniqueCamarra',
707 'live_status': 'was_live',
708 'description': 'md5:acce559345fd49f129c20dbcda3f1201',
709 'timestamp': 1658407771464,
711 'add_ie': ['TwitterSpaces'],
712 'params': {'skip_download': 'm3u8'}
,
714 # URL specifies video number but --yes-playlist
715 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
716 'playlist_mincount': 2,
718 'id': '1600649710662213632',
719 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
720 'timestamp': 1670459604.0,
721 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
722 'comment_count': int,
723 'uploader_id': 'CTVJLaidlaw',
725 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
726 'upload_date': '20221208',
728 'uploader': 'Jocelyn Laidlaw',
729 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
733 # URL specifies video number and --no-playlist
734 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
736 'id': '1600649511827013632',
738 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
739 'thumbnail': r
're:^https?://.+\.jpg',
740 'timestamp': 1670459604.0,
741 'uploader_id': 'CTVJLaidlaw',
742 'uploader': 'Jocelyn Laidlaw',
744 'comment_count': int,
745 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
747 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
748 'display_id': '1600649710662213632',
751 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
752 'upload_date': '20221208',
755 'params': {'noplaylist': True}
,
757 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
758 # note the id different between extraction and url
759 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
761 'id': '1621117577354424321',
762 'display_id': '1621117700482416640',
764 'title': '뽀 - 아 최우제 이동속도 봐',
765 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
768 'uploader_id': 's2FAKER',
769 'uploader_url': 'https://twitter.com/s2FAKER',
770 'upload_date': '20230202',
771 'timestamp': 1675339553.0,
772 'thumbnail': r
're:https?://pbs\.twimg\.com/.+',
777 'comment_count': int,
781 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
783 'id': '1599108643743473680',
784 'display_id': '1599108751385972737',
786 'title': '\u06ea - \U0001F48B',
787 'uploader_url': 'https://twitter.com/hlo_again',
789 'uploader_id': 'hlo_again',
790 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
793 'comment_count': int,
795 'upload_date': '20221203',
797 'timestamp': 1670092210.0,
799 'uploader': '\u06ea',
800 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
802 'params': {'noplaylist': True}
,
804 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
806 'id': '1600009362759733248',
807 'display_id': '1600009574919962625',
809 'uploader_url': 'https://twitter.com/MunTheShinobi',
810 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
812 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
814 'uploader': 'Mün The Shinobi',
816 'upload_date': '20221206',
817 'title': 'Mün The Shinobi - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
818 'comment_count': int,
821 'uploader_id': 'MunTheShinobi',
823 'timestamp': 1670306984.0,
826 # url to retweet id, legacy API
827 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
829 'id': '1623274794488659969',
830 'display_id': '1623739803874349067',
832 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
833 'description': 'md5:e873616a4a8fe0f93e71872678a672f3',
834 'uploader': 'Johnny Bullets',
835 'uploader_id': 'Johnnybull3ts',
836 'uploader_url': 'https://twitter.com/Johnnybull3ts',
840 'timestamp': 1675853859.0,
841 'upload_date': '20230208',
842 'thumbnail': r
're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
845 'comment_count': int,
847 'params': {'extractor_args': {'twitter': {'legacy_api': ['']}
}},
850 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
851 'only_matching': True,
854 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
855 'only_matching': True,
857 # promo_video_website card
858 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
859 'only_matching': True,
861 # promo_video_convo card
862 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
863 'only_matching': True,
866 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
867 'only_matching': True,
869 # video_direct_message card
870 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
871 'only_matching': True,
873 # poll2choice_video card
874 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
875 'only_matching': True,
877 # poll3choice_video card
878 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
879 'only_matching': True,
881 # poll4choice_video card
882 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
883 'only_matching': True,
886 def _graphql_to_legacy(self
, data
, twid
):
887 result
= traverse_obj(data
, (
888 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
889 lambda _
, v
: v
['entryId'] == f
'tweet-{twid}', 'content', 'itemContent',
890 'tweet_results', 'result', ('tweet', None),
891 ), expected_type
=dict, default
={}, get_all
=False)
893 if result
.get('__typename') not in ('Tweet', 'TweetTombstone', None):
894 self
.report_warning(f
'Unknown typename: {result.get("__typename")}', twid
, only_once
=True)
896 if 'tombstone' in result
:
897 cause
= remove_end(traverse_obj(result
, ('tombstone', 'text', 'text', {str}
)), '. Learn more')
898 if cause
and 'adult content' in cause
:
899 self
.raise_login_required(cause
)
900 raise ExtractorError(f
'Twitter API says: {cause or "Unknown error"}', expected
=True)
902 status
= result
.get('legacy', {})
903 status
.update(traverse_obj(result
, {
904 'user': ('core', 'user_results', 'result', 'legacy'),
905 'card': ('card', 'legacy'),
906 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
907 }, expected_type
=dict, default
={}))
909 # extra transformation is needed since result does not match legacy format
911 binding_value
.get('key'): binding_value
.get('value')
912 for binding_value
in traverse_obj(status
, ('card', 'binding_values', ..., {dict}
))
915 status
['card']['binding_values'] = binding_values
919 def _build_graphql_query(self
, media_id
):
922 'focalTweetId': media_id
,
923 'includePromotedContent': True,
924 'with_rux_injections': False,
925 'withBirdwatchNotes': True,
926 'withCommunity': True,
927 'withDownvotePerspective': False,
928 'withQuickPromoteEligibilityTweetFields': True,
929 'withReactionsMetadata': False,
930 'withReactionsPerspective': False,
931 'withSuperFollowsTweetFields': True,
932 'withSuperFollowsUserFields': True,
933 'withV2Timeline': True,
937 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
938 'interactive_text_enabled': True,
939 'responsive_web_edit_tweet_api_enabled': True,
940 'responsive_web_enhance_cards_enabled': True,
941 'responsive_web_graphql_timeline_navigation_enabled': False,
942 'responsive_web_text_conversations_enabled': False,
943 'responsive_web_uc_gql_enabled': True,
944 'standardized_nudges_misinfo': True,
945 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
946 'tweetypie_unmention_optimization_enabled': True,
947 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
948 'verified_phone_label_enabled': False,
949 'vibe_api_enabled': True,
953 def _real_extract(self
, url
):
954 twid
, selected_index
= self
._match
_valid
_url
(url
).group('id', 'index')
955 if self
._configuration
_arg
('legacy_api') and not self
.is_logged_in
:
956 status
= traverse_obj(self
._call
_api
(f
'statuses/show/{twid}.json', twid
, {
957 'cards_platform': 'Web-12',
959 'include_reply_count': 1,
960 'include_user_entities': 0,
961 'tweet_mode': 'extended',
962 }), 'retweeted_status', None)
964 result
= self
._call
_graphql
_api
('zZXycP0V6H7m-2r0mOnFcA/TweetDetail', twid
)
965 status
= self
._graphql
_to
_legacy
(result
, twid
)
967 title
= description
= status
['full_text'].replace('\n', ' ')
968 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
969 title
= re
.sub(r
'\s+(https?://[^ ]+)', '', title
)
970 user
= status
.get('user') or {}
971 uploader
= user
.get('name')
973 title
= f
'{uploader} - {title}'
974 uploader_id
= user
.get('screen_name')
979 'description': description
,
980 'uploader': uploader
,
981 'timestamp': unified_timestamp(status
.get('created_at')),
982 'uploader_id': uploader_id
,
983 'uploader_url': format_field(uploader_id
, None, 'https://twitter.com/%s'),
984 'like_count': int_or_none(status
.get('favorite_count')),
985 'repost_count': int_or_none(status
.get('retweet_count')),
986 'comment_count': int_or_none(status
.get('reply_count')),
987 'age_limit': 18 if status
.get('possibly_sensitive') else 0,
988 'tags': traverse_obj(status
, ('entities', 'hashtags', ..., 'text')),
991 def extract_from_video_info(media
):
992 media_id
= traverse_obj(media
, 'id_str', 'id', expected_type
=str_or_none
)
993 self
.write_debug(f
'Extracting from video info: {media_id}')
994 video_info
= media
.get('video_info') or {}
998 for variant
in video_info
.get('variants', []):
999 fmts
, subs
= self
._extract
_variant
_formats
(variant
, twid
)
1000 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
1001 formats
.extend(fmts
)
1004 media_url
= media
.get('media_url_https') or media
.get('media_url')
1006 def add_thumbnail(name
, size
):
1009 'url': update_url_query(media_url
, {'name': name}
),
1010 'width': int_or_none(size
.get('w') or size
.get('width')),
1011 'height': int_or_none(size
.get('h') or size
.get('height')),
1013 for name
, size
in media
.get('sizes', {}).items():
1014 add_thumbnail(name
, size
)
1015 add_thumbnail('orig', media
.get('original_info') or {})
1020 'subtitles': subtitles
,
1021 'thumbnails': thumbnails
,
1022 'view_count': traverse_obj(media
, ('mediaStats', 'viewCount', {int_or_none}
)),
1023 'duration': float_or_none(video_info
.get('duration_millis'), 1000),
1024 # The codec of http formats are unknown
1025 '_format_sort_fields': ('res', 'br', 'size', 'proto'),
1028 def extract_from_card_info(card
):
1032 self
.write_debug(f
'Extracting from card info: {card.get("url")}')
1033 binding_values
= card
['binding_values']
1035 def get_binding_value(k
):
1036 o
= binding_values
.get(k
) or {}
1037 return try_get(o
, lambda x
: x
[x
['type'].lower() + '_value'])
1039 card_name
= card
['name'].split(':')[-1]
1040 if card_name
== 'player':
1043 'url': get_binding_value('player_url'),
1045 elif card_name
== 'periscope_broadcast':
1048 'url': get_binding_value('url') or get_binding_value('player_url'),
1049 'ie_key': PeriscopeIE
.ie_key(),
1051 elif card_name
== 'broadcast':
1054 'url': get_binding_value('broadcast_url'),
1055 'ie_key': TwitterBroadcastIE
.ie_key(),
1057 elif card_name
== 'audiospace':
1060 'url': f
'https://twitter.com/i/spaces/{get_binding_value("id")}',
1061 'ie_key': TwitterSpacesIE
.ie_key(),
1063 elif card_name
== 'summary':
1066 'url': get_binding_value('card_url'),
1068 elif card_name
== 'unified_card':
1069 unified_card
= self
._parse
_json
(get_binding_value('unified_card'), twid
)
1070 yield from map(extract_from_video_info
, traverse_obj(
1071 unified_card
, ('media_entities', ...), expected_type
=dict))
1072 # amplify, promo_video_website, promo_video_convo, appplayer,
1073 # video_direct_message, poll2choice_video, poll3choice_video,
1074 # poll4choice_video, ...
1076 is_amplify
= card_name
== 'amplify'
1077 vmap_url
= get_binding_value('amplify_url_vmap') if is_amplify
else get_binding_value('player_stream_url')
1078 content_id
= get_binding_value('%s_content_id' % (card_name
if is_amplify
else 'player'))
1079 formats
, subtitles
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, content_id
or twid
)
1082 for suffix
in ('_small', '', '_large', '_x_large', '_original'):
1083 image
= get_binding_value('player_image' + suffix
) or {}
1084 image_url
= image
.get('url')
1085 if not image_url
or '/player-placeholder' in image_url
:
1088 'id': suffix
[1:] if suffix
else 'medium',
1090 'width': int_or_none(image
.get('width')),
1091 'height': int_or_none(image
.get('height')),
1096 'subtitles': subtitles
,
1097 'thumbnails': thumbnails
,
1098 'duration': int_or_none(get_binding_value(
1099 'content_duration_seconds')),
1102 videos
= traverse_obj(status
, (
1103 (None, 'quoted_status'), 'extended_entities', 'media', lambda _
, m
: m
['type'] != 'photo', {dict}
))
1105 if self
._yes
_playlist
(twid
, selected_index
, video_label
='URL-specified video number'):
1106 selected_entries
= (*map(extract_from_video_info
, videos
), *extract_from_card_info(status
.get('card')))
1108 desired_obj
= traverse_obj(status
, ('extended_entities', 'media', int(selected_index
) - 1, {dict}
))
1110 raise ExtractorError(f
'Video #{selected_index} is unavailable', expected
=True)
1111 elif desired_obj
.get('type') != 'video':
1112 raise ExtractorError(f
'Media #{selected_index} is not a video', expected
=True)
1114 # Restore original archive id and video index in title
1115 for index
, entry
in enumerate(videos
, 1):
1116 if entry
.get('id') != desired_obj
.get('id'):
1119 info
['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1120 if len(videos
) != 1:
1121 info
['title'] += f
' #{index}'
1124 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1126 entries
= [{**info, **data, 'display_id': twid}
for data
in selected_entries
]
1128 expanded_url
= traverse_obj(status
, ('entities', 'urls', 0, 'expanded_url'), expected_type
=url_or_none
)
1129 if not expanded_url
or expanded_url
== url
:
1130 self
.raise_no_formats('No video could be found in this tweet', expected
=True)
1133 return self
.url_result(expanded_url
, display_id
=twid
, **info
)
1135 entries
[0]['_old_archive_ids'] = [make_archive_id(self
, twid
)]
1137 if len(entries
) == 1:
1140 for index
, entry
in enumerate(entries
, 1):
1141 entry
['title'] += f
' #{index}'
1143 return self
.playlist_result(entries
, **info
)
1146 class TwitterAmplifyIE(TwitterBaseIE
):
1147 IE_NAME
= 'twitter:amplify'
1148 _VALID_URL
= r
'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1151 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1152 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1154 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1156 'title': 'Twitter Video',
1157 'thumbnail': 're:^https?://.*',
1159 'params': {'format': '[protocol=https]'}
,
1162 def _real_extract(self
, url
):
1163 video_id
= self
._match
_id
(url
)
1164 webpage
= self
._download
_webpage
(url
, video_id
)
1166 vmap_url
= self
._html
_search
_meta
(
1167 'twitter:amplify:vmap', webpage
, 'vmap url')
1168 formats
, _
= self
._extract
_formats
_from
_vmap
_url
(vmap_url
, video_id
)
1171 thumbnail
= self
._html
_search
_meta
(
1172 'twitter:image:src', webpage
, 'thumbnail', fatal
=False)
1174 def _find_dimension(target
):
1175 w
= int_or_none(self
._html
_search
_meta
(
1176 'twitter:%s:width' % target
, webpage
, fatal
=False))
1177 h
= int_or_none(self
._html
_search
_meta
(
1178 'twitter:%s:height' % target
, webpage
, fatal
=False))
1182 thumbnail_w
, thumbnail_h
= _find_dimension('image')
1185 'width': thumbnail_w
,
1186 'height': thumbnail_h
,
1189 video_w
, video_h
= _find_dimension('player')
1197 'title': 'Twitter Video',
1199 'thumbnails': thumbnails
,
1203 class TwitterBroadcastIE(TwitterBaseIE
, PeriscopeBaseIE
):
1204 IE_NAME
= 'twitter:broadcast'
1205 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1208 # untitled Periscope video
1209 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1211 'id': '1yNGaQLWpejGj',
1213 'title': 'Andrea May Sahouri - Periscope Broadcast',
1214 'uploader': 'Andrea May Sahouri',
1215 'uploader_id': '1PXEdBZWpGwKe',
1216 'thumbnail': r
're:^https?://[^?#]+\.jpg\?token=',
1221 def _real_extract(self
, url
):
1222 broadcast_id
= self
._match
_id
(url
)
1223 broadcast
= self
._call
_api
(
1224 'broadcasts/show.json', broadcast_id
,
1225 {'ids': broadcast_id}
)['broadcasts'][broadcast_id
]
1226 info
= self
._parse
_broadcast
_data
(broadcast
, broadcast_id
)
1227 media_key
= broadcast
['media_key']
1228 source
= self
._call
_api
(
1229 f
'live_video_stream/status/{media_key}', media_key
)['source']
1230 m3u8_url
= source
.get('noRedirectPlaybackUrl') or source
['location']
1231 if '/live_video_stream/geoblocked/' in m3u8_url
:
1232 self
.raise_geo_restricted()
1233 m3u8_id
= compat_parse_qs(compat_urllib_parse_urlparse(
1234 m3u8_url
).query
).get('type', [None])[0]
1235 state
, width
, height
= self
._extract
_common
_format
_info
(broadcast
)
1236 info
['formats'] = self
._extract
_pscp
_m
3u8_formats
(
1237 m3u8_url
, broadcast_id
, m3u8_id
, state
, width
, height
)
1241 class TwitterSpacesIE(TwitterBaseIE
):
1242 IE_NAME
= 'twitter:spaces'
1243 _VALID_URL
= TwitterBaseIE
._BASE
_REGEX
+ r
'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1246 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1248 'id': '1RDxlgyvNXzJL',
1250 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1251 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1252 'uploader': r
're:Lucio Di Gaetano.*?',
1253 'uploader_id': 'luciodigaetano',
1254 'live_status': 'was_live',
1255 'timestamp': 1659877956397,
1257 'params': {'skip_download': 'm3u8'}
,
1261 'notstarted': 'is_upcoming',
1262 'ended': 'was_live',
1263 'running': 'is_live',
1264 'timedout': 'post_live',
1267 def _build_graphql_query(self
, space_id
):
1271 'isMetatagsQuery': True,
1272 'withDownvotePerspective': False,
1273 'withReactionsMetadata': False,
1274 'withReactionsPerspective': False,
1275 'withReplays': True,
1276 'withSuperFollowsUserFields': True,
1277 'withSuperFollowsTweetFields': True,
1280 'dont_mention_me_view_api_enabled': True,
1281 'interactive_text_enabled': True,
1282 'responsive_web_edit_tweet_api_enabled': True,
1283 'responsive_web_enhance_cards_enabled': True,
1284 'responsive_web_uc_gql_enabled': True,
1285 'spaces_2022_h2_clipping': True,
1286 'spaces_2022_h2_spaces_communities': False,
1287 'standardized_nudges_misinfo': True,
1288 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1289 'vibe_api_enabled': True,
1293 def _real_extract(self
, url
):
1294 space_id
= self
._match
_id
(url
)
1295 space_data
= self
._call
_graphql
_api
('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id
)['audioSpace']
1297 raise ExtractorError('Twitter Space not found', expected
=True)
1299 metadata
= space_data
['metadata']
1300 live_status
= try_call(lambda: self
.SPACE_STATUS
[metadata
['state'].lower()])
1303 if live_status
== 'is_upcoming':
1304 self
.raise_no_formats('Twitter Space not started yet', expected
=True)
1305 elif live_status
== 'post_live':
1306 self
.raise_no_formats('Twitter Space ended but not downloadable yet', expected
=True)
1308 source
= self
._call
_api
(
1309 f
'live_video_stream/status/{metadata["media_key"]}', metadata
['media_key'])['source']
1311 # XXX: Native downloader does not work
1312 formats
= self
._extract
_m
3u8_formats
(
1313 traverse_obj(source
, 'noRedirectPlaybackUrl', 'location'),
1314 metadata
['media_key'], 'm4a', 'm3u8', live
=live_status
== 'is_live',
1315 headers
={'Referer': 'https://twitter.com/'}
)
1317 fmt
.update({'vcodec': 'none', 'acodec': 'aac'}
)
1319 participants
= ', '.join(traverse_obj(
1320 space_data
, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1323 'title': metadata
.get('title'),
1324 'description': f
'Twitter Space participated by {participants}',
1325 'uploader': traverse_obj(
1326 metadata
, ('creator_results', 'result', 'legacy', 'name')),
1327 'uploader_id': traverse_obj(
1328 metadata
, ('creator_results', 'result', 'legacy', 'screen_name')),
1329 'live_status': live_status
,
1330 'timestamp': metadata
.get('created_at'),
1335 class TwitterShortenerIE(TwitterBaseIE
):
1336 IE_NAME
= 'twitter:shortener'
1337 _VALID_URL
= r
'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
1338 _BASE_URL
= 'https://t.co/'
1340 def _real_extract(self
, url
):
1341 mobj
= self
._match
_valid
_url
(url
)
1342 eid
, id = mobj
.group('eid', 'id')
1345 url
= self
._BASE
_URL
+ id
1346 new_url
= self
._request
_webpage
(url
, id, headers
={'User-Agent': 'curl'}
).geturl()
1347 __UNSAFE_LINK
= "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
1348 if new_url
.startswith(__UNSAFE_LINK
):
1349 new_url
= new_url
.replace(__UNSAFE_LINK
, "")
1350 return self
.url_result(new_url
)