]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/twitter.py
[Theta] Fix valid URL (#2323)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import (
8 compat_HTTPError,
9 compat_parse_qs,
10 compat_urllib_parse_unquote,
11 compat_urllib_parse_urlparse,
12 )
13 from ..utils import (
14 dict_get,
15 ExtractorError,
16 float_or_none,
17 int_or_none,
18 traverse_obj,
19 try_get,
20 strip_or_none,
21 unified_timestamp,
22 update_url_query,
23 url_or_none,
24 xpath_text,
25 )
26
27 from .periscope import (
28 PeriscopeBaseIE,
29 PeriscopeIE,
30 )
31
32
33 class TwitterBaseIE(InfoExtractor):
34 _API_BASE = 'https://api.twitter.com/1.1/'
35 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?twitter\.com/'
36 _GUEST_TOKEN = None
37
38 def _extract_variant_formats(self, variant, video_id):
39 variant_url = variant.get('url')
40 if not variant_url:
41 return [], {}
42 elif '.m3u8' in variant_url:
43 return self._extract_m3u8_formats_and_subtitles(
44 variant_url, video_id, 'mp4', 'm3u8_native',
45 m3u8_id='hls', fatal=False)
46 else:
47 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
48 f = {
49 'url': variant_url,
50 'format_id': 'http' + ('-%d' % tbr if tbr else ''),
51 'tbr': tbr,
52 }
53 self._search_dimensions_in_video_url(f, variant_url)
54 return [f], {}
55
56 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
57 vmap_url = url_or_none(vmap_url)
58 if not vmap_url:
59 return [], {}
60 vmap_data = self._download_xml(vmap_url, video_id)
61 formats = []
62 subtitles = {}
63 urls = []
64 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
65 video_variant.attrib['url'] = compat_urllib_parse_unquote(
66 video_variant.attrib['url'])
67 urls.append(video_variant.attrib['url'])
68 fmts, subs = self._extract_variant_formats(
69 video_variant.attrib, video_id)
70 formats.extend(fmts)
71 subtitles = self._merge_subtitles(subtitles, subs)
72 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
73 if video_url not in urls:
74 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
75 formats.extend(fmts)
76 subtitles = self._merge_subtitles(subtitles, subs)
77 return formats, subtitles
78
79 @staticmethod
80 def _search_dimensions_in_video_url(a_format, video_url):
81 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
82 if m:
83 a_format.update({
84 'width': int(m.group('width')),
85 'height': int(m.group('height')),
86 })
87
88 def _call_api(self, path, video_id, query={}):
89 headers = {
90 'Authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h%2F40K4moUkGsoc%3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw',
91 }
92 if not self._GUEST_TOKEN:
93 self._GUEST_TOKEN = self._download_json(
94 self._API_BASE + 'guest/activate.json', video_id,
95 'Downloading guest token', data=b'',
96 headers=headers)['guest_token']
97 headers['x-guest-token'] = self._GUEST_TOKEN
98 try:
99 return self._download_json(
100 self._API_BASE + path, video_id, headers=headers, query=query)
101 except ExtractorError as e:
102 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
103 raise ExtractorError(self._parse_json(
104 e.cause.read().decode(),
105 video_id)['errors'][0]['message'], expected=True)
106 raise
107
108
109 class TwitterCardIE(InfoExtractor):
110 IE_NAME = 'twitter:card'
111 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
112 _TESTS = [
113 {
114 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
115 # MD5 checksums are different in different places
116 'info_dict': {
117 'id': '560070183650213889',
118 'ext': 'mp4',
119 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
120 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
121 'uploader': 'Twitter',
122 'uploader_id': 'Twitter',
123 'thumbnail': r're:^https?://.*\.jpg',
124 'duration': 30.033,
125 'timestamp': 1422366112,
126 'upload_date': '20150127',
127 },
128 },
129 {
130 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
131 'md5': '7137eca597f72b9abbe61e5ae0161399',
132 'info_dict': {
133 'id': '623160978427936768',
134 'ext': 'mp4',
135 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
136 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
137 'uploader': 'NASA',
138 'uploader_id': 'NASA',
139 'timestamp': 1437408129,
140 'upload_date': '20150720',
141 },
142 },
143 {
144 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
145 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
146 'info_dict': {
147 'id': 'dq4Oj5quskI',
148 'ext': 'mp4',
149 'title': 'Ubuntu 11.10 Overview',
150 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
151 'upload_date': '20111013',
152 'uploader': 'OMG! UBUNTU!',
153 'uploader_id': 'omgubuntu',
154 },
155 'add_ie': ['Youtube'],
156 },
157 {
158 'url': 'https://twitter.com/i/cards/tfw/v1/665289828897005568',
159 'md5': '6dabeaca9e68cbb71c99c322a4b42a11',
160 'info_dict': {
161 'id': 'iBb2x00UVlv',
162 'ext': 'mp4',
163 'upload_date': '20151113',
164 'uploader_id': '1189339351084113920',
165 'uploader': 'ArsenalTerje',
166 'title': 'Vine by ArsenalTerje',
167 'timestamp': 1447451307,
168 },
169 'add_ie': ['Vine'],
170 }, {
171 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
172 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
173 'info_dict': {
174 'id': '705235433198714880',
175 'ext': 'mp4',
176 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
177 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
178 'uploader': 'Brent Yarina',
179 'uploader_id': 'BTNBrentYarina',
180 'timestamp': 1456976204,
181 'upload_date': '20160303',
182 },
183 'skip': 'This content is no longer available.',
184 }, {
185 'url': 'https://twitter.com/i/videos/752274308186120192',
186 'only_matching': True,
187 },
188 ]
189
190 def _real_extract(self, url):
191 status_id = self._match_id(url)
192 return self.url_result(
193 'https://twitter.com/statuses/' + status_id,
194 TwitterIE.ie_key(), status_id)
195
196
197 class TwitterIE(TwitterBaseIE):
198 IE_NAME = 'twitter'
199 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)'
200
201 _TESTS = [{
202 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
203 'info_dict': {
204 'id': '643211948184596480',
205 'ext': 'mp4',
206 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
207 'thumbnail': r're:^https?://.*\.jpg',
208 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
209 'uploader': 'FREE THE NIPPLE',
210 'uploader_id': 'freethenipple',
211 'duration': 12.922,
212 'timestamp': 1442188653,
213 'upload_date': '20150913',
214 'age_limit': 18,
215 },
216 }, {
217 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
218 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
219 'info_dict': {
220 'id': '657991469417025536',
221 'ext': 'mp4',
222 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
223 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
224 'thumbnail': r're:^https?://.*\.png',
225 'uploader': 'Gifs',
226 'uploader_id': 'giphz',
227 },
228 'expected_warnings': ['height', 'width'],
229 'skip': 'Account suspended',
230 }, {
231 'url': 'https://twitter.com/starwars/status/665052190608723968',
232 'info_dict': {
233 'id': '665052190608723968',
234 'ext': 'mp4',
235 'title': 'Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens.',
236 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
237 'uploader_id': 'starwars',
238 'uploader': 'Star Wars',
239 'timestamp': 1447395772,
240 'upload_date': '20151113',
241 },
242 }, {
243 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
244 'info_dict': {
245 'id': '705235433198714880',
246 'ext': 'mp4',
247 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
248 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
249 'uploader_id': 'BTNBrentYarina',
250 'uploader': 'Brent Yarina',
251 'timestamp': 1456976204,
252 'upload_date': '20160303',
253 },
254 'params': {
255 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
256 # Test case of TwitterCardIE
257 'skip_download': True,
258 },
259 }, {
260 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
261 'info_dict': {
262 'id': '700207533655363584',
263 'ext': 'mp4',
264 'title': 'simon vertugo - BEAT PROD: @suhmeduh #Damndaniel',
265 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
266 'thumbnail': r're:^https?://.*\.jpg',
267 'uploader': 'simon vertugo',
268 'uploader_id': 'simonvertugo',
269 'duration': 30.0,
270 'timestamp': 1455777459,
271 'upload_date': '20160218',
272 },
273 }, {
274 'url': 'https://twitter.com/Filmdrunk/status/713801302971588609',
275 'md5': '89a15ed345d13b86e9a5a5e051fa308a',
276 'info_dict': {
277 'id': 'MIOxnrUteUd',
278 'ext': 'mp4',
279 'title': 'Dr.Pepperの飲み方 #japanese #バカ #ドクペ #電動ガン',
280 'uploader': 'TAKUMA',
281 'uploader_id': '1004126642786242560',
282 'timestamp': 1402826626,
283 'upload_date': '20140615',
284 },
285 'add_ie': ['Vine'],
286 }, {
287 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
288 'info_dict': {
289 'id': '719944021058060289',
290 'ext': 'mp4',
291 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
292 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
293 'uploader_id': 'CaptainAmerica',
294 'uploader': 'Captain America',
295 'duration': 3.17,
296 'timestamp': 1460483005,
297 'upload_date': '20160412',
298 },
299 }, {
300 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
301 'info_dict': {
302 'id': '1zqKVVlkqLaKB',
303 'ext': 'mp4',
304 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
305 'upload_date': '20160923',
306 'uploader_id': '1PmKqpJdOJQoY',
307 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
308 'timestamp': 1474613214,
309 },
310 'add_ie': ['Periscope'],
311 }, {
312 # has mp4 formats via mobile API
313 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
314 'info_dict': {
315 'id': '852138619213144067',
316 'ext': 'mp4',
317 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
318 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
319 'uploader': 'عالم الأخبار',
320 'uploader_id': 'news_al3alm',
321 'duration': 277.4,
322 'timestamp': 1492000653,
323 'upload_date': '20170412',
324 },
325 'skip': 'Account suspended',
326 }, {
327 'url': 'https://twitter.com/i/web/status/910031516746514432',
328 'info_dict': {
329 'id': '910031516746514432',
330 'ext': 'mp4',
331 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
332 'thumbnail': r're:^https?://.*\.jpg',
333 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
334 'uploader': 'Préfet de Guadeloupe',
335 'uploader_id': 'Prefet971',
336 'duration': 47.48,
337 'timestamp': 1505803395,
338 'upload_date': '20170919',
339 },
340 'params': {
341 'skip_download': True, # requires ffmpeg
342 },
343 }, {
344 # card via api.twitter.com/1.1/videos/tweet/config
345 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
346 'info_dict': {
347 'id': '1001551623938805763',
348 'ext': 'mp4',
349 'title': 're:.*?Shep is on a roll today.*?',
350 'thumbnail': r're:^https?://.*\.jpg',
351 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
352 'uploader': 'Lis Power',
353 'uploader_id': 'LisPower1',
354 'duration': 111.278,
355 'timestamp': 1527623489,
356 'upload_date': '20180529',
357 },
358 'params': {
359 'skip_download': True, # requires ffmpeg
360 },
361 }, {
362 'url': 'https://twitter.com/foobar/status/1087791357756956680',
363 'info_dict': {
364 'id': '1087791357756956680',
365 'ext': 'mp4',
366 'title': 'Twitter - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
367 'thumbnail': r're:^https?://.*\.jpg',
368 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
369 'uploader': 'Twitter',
370 'uploader_id': 'Twitter',
371 'duration': 61.567,
372 'timestamp': 1548184644,
373 'upload_date': '20190122',
374 },
375 }, {
376 # not available in Periscope
377 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
378 'info_dict': {
379 'id': '1vOGwqejwoWxB',
380 'ext': 'mp4',
381 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
382 'uploader': 'Vivi',
383 'uploader_id': '1eVjYOLGkGrQL',
384 },
385 'add_ie': ['TwitterBroadcast'],
386 }, {
387 # unified card
388 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
389 'info_dict': {
390 'id': '1349794411333394432',
391 'ext': 'mp4',
392 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
393 'thumbnail': r're:^https?://.*\.jpg',
394 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
395 'uploader': 'Brooklyn Nets',
396 'uploader_id': 'BrooklynNets',
397 'duration': 324.484,
398 'timestamp': 1610651040,
399 'upload_date': '20210114',
400 },
401 'params': {
402 'skip_download': True,
403 },
404 }, {
405 # Twitch Clip Embed
406 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
407 'only_matching': True,
408 }, {
409 # promo_video_website card
410 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
411 'only_matching': True,
412 }, {
413 # promo_video_convo card
414 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
415 'only_matching': True,
416 }, {
417 # appplayer card
418 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
419 'only_matching': True,
420 }, {
421 # video_direct_message card
422 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
423 'only_matching': True,
424 }, {
425 # poll2choice_video card
426 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
427 'only_matching': True,
428 }, {
429 # poll3choice_video card
430 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
431 'only_matching': True,
432 }, {
433 # poll4choice_video card
434 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
435 'only_matching': True,
436 }]
437
438 def _real_extract(self, url):
439 twid = self._match_id(url)
440 status = self._call_api(
441 'statuses/show/%s.json' % twid, twid, {
442 'cards_platform': 'Web-12',
443 'include_cards': 1,
444 'include_reply_count': 1,
445 'include_user_entities': 0,
446 'tweet_mode': 'extended',
447 })
448
449 title = description = status['full_text'].replace('\n', ' ')
450 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
451 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
452 user = status.get('user') or {}
453 uploader = user.get('name')
454 if uploader:
455 title = '%s - %s' % (uploader, title)
456 uploader_id = user.get('screen_name')
457
458 tags = []
459 for hashtag in (try_get(status, lambda x: x['entities']['hashtags'], list) or []):
460 hashtag_text = hashtag.get('text')
461 if not hashtag_text:
462 continue
463 tags.append(hashtag_text)
464
465 info = {
466 'id': twid,
467 'title': title,
468 'description': description,
469 'uploader': uploader,
470 'timestamp': unified_timestamp(status.get('created_at')),
471 'uploader_id': uploader_id,
472 'uploader_url': 'https://twitter.com/' + uploader_id if uploader_id else None,
473 'like_count': int_or_none(status.get('favorite_count')),
474 'repost_count': int_or_none(status.get('retweet_count')),
475 'comment_count': int_or_none(status.get('reply_count')),
476 'age_limit': 18 if status.get('possibly_sensitive') else 0,
477 'tags': tags,
478 }
479
480 def extract_from_video_info(media):
481 video_info = media.get('video_info') or {}
482
483 formats = []
484 subtitles = {}
485 for variant in video_info.get('variants', []):
486 fmts, subs = self._extract_variant_formats(variant, twid)
487 subtitles = self._merge_subtitles(subtitles, subs)
488 formats.extend(fmts)
489 self._sort_formats(formats, ('res', 'br', 'size', 'proto')) # The codec of http formats are unknown
490
491 thumbnails = []
492 media_url = media.get('media_url_https') or media.get('media_url')
493 if media_url:
494 def add_thumbnail(name, size):
495 thumbnails.append({
496 'id': name,
497 'url': update_url_query(media_url, {'name': name}),
498 'width': int_or_none(size.get('w') or size.get('width')),
499 'height': int_or_none(size.get('h') or size.get('height')),
500 })
501 for name, size in media.get('sizes', {}).items():
502 add_thumbnail(name, size)
503 add_thumbnail('orig', media.get('original_info') or {})
504
505 info.update({
506 'formats': formats,
507 'subtitles': subtitles,
508 'thumbnails': thumbnails,
509 'duration': float_or_none(video_info.get('duration_millis'), 1000),
510 })
511
512 media = traverse_obj(status, ((None, 'quoted_status'), 'extended_entities', 'media', 0), get_all=False)
513 if media and media.get('type') != 'photo':
514 extract_from_video_info(media)
515 else:
516 card = status.get('card')
517 if card:
518 binding_values = card['binding_values']
519
520 def get_binding_value(k):
521 o = binding_values.get(k) or {}
522 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
523
524 card_name = card['name'].split(':')[-1]
525 if card_name == 'player':
526 info.update({
527 '_type': 'url',
528 'url': get_binding_value('player_url'),
529 })
530 elif card_name == 'periscope_broadcast':
531 info.update({
532 '_type': 'url',
533 'url': get_binding_value('url') or get_binding_value('player_url'),
534 'ie_key': PeriscopeIE.ie_key(),
535 })
536 elif card_name == 'broadcast':
537 info.update({
538 '_type': 'url',
539 'url': get_binding_value('broadcast_url'),
540 'ie_key': TwitterBroadcastIE.ie_key(),
541 })
542 elif card_name == 'summary':
543 info.update({
544 '_type': 'url',
545 'url': get_binding_value('card_url'),
546 })
547 elif card_name == 'unified_card':
548 media_entities = self._parse_json(get_binding_value('unified_card'), twid)['media_entities']
549 extract_from_video_info(next(iter(media_entities.values())))
550 # amplify, promo_video_website, promo_video_convo, appplayer,
551 # video_direct_message, poll2choice_video, poll3choice_video,
552 # poll4choice_video, ...
553 else:
554 is_amplify = card_name == 'amplify'
555 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
556 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
557 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
558 self._sort_formats(formats)
559
560 thumbnails = []
561 for suffix in ('_small', '', '_large', '_x_large', '_original'):
562 image = get_binding_value('player_image' + suffix) or {}
563 image_url = image.get('url')
564 if not image_url or '/player-placeholder' in image_url:
565 continue
566 thumbnails.append({
567 'id': suffix[1:] if suffix else 'medium',
568 'url': image_url,
569 'width': int_or_none(image.get('width')),
570 'height': int_or_none(image.get('height')),
571 })
572
573 info.update({
574 'formats': formats,
575 'subtitles': subtitles,
576 'thumbnails': thumbnails,
577 'duration': int_or_none(get_binding_value(
578 'content_duration_seconds')),
579 })
580 else:
581 expanded_url = try_get(status, lambda x: x['entities']['urls'][0]['expanded_url'])
582 if not expanded_url:
583 raise ExtractorError("There's no video in this tweet.")
584 info.update({
585 '_type': 'url',
586 'url': expanded_url,
587 })
588 return info
589
590
591 class TwitterAmplifyIE(TwitterBaseIE):
592 IE_NAME = 'twitter:amplify'
593 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
594
595 _TEST = {
596 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
597 'md5': '7df102d0b9fd7066b86f3159f8e81bf6',
598 'info_dict': {
599 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
600 'ext': 'mp4',
601 'title': 'Twitter Video',
602 'thumbnail': 're:^https?://.*',
603 },
604 }
605
606 def _real_extract(self, url):
607 video_id = self._match_id(url)
608 webpage = self._download_webpage(url, video_id)
609
610 vmap_url = self._html_search_meta(
611 'twitter:amplify:vmap', webpage, 'vmap url')
612 formats = self._extract_formats_from_vmap_url(vmap_url, video_id)
613
614 thumbnails = []
615 thumbnail = self._html_search_meta(
616 'twitter:image:src', webpage, 'thumbnail', fatal=False)
617
618 def _find_dimension(target):
619 w = int_or_none(self._html_search_meta(
620 'twitter:%s:width' % target, webpage, fatal=False))
621 h = int_or_none(self._html_search_meta(
622 'twitter:%s:height' % target, webpage, fatal=False))
623 return w, h
624
625 if thumbnail:
626 thumbnail_w, thumbnail_h = _find_dimension('image')
627 thumbnails.append({
628 'url': thumbnail,
629 'width': thumbnail_w,
630 'height': thumbnail_h,
631 })
632
633 video_w, video_h = _find_dimension('player')
634 formats[0].update({
635 'width': video_w,
636 'height': video_h,
637 })
638
639 return {
640 'id': video_id,
641 'title': 'Twitter Video',
642 'formats': formats,
643 'thumbnails': thumbnails,
644 }
645
646
647 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
648 IE_NAME = 'twitter:broadcast'
649 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
650
651 _TEST = {
652 # untitled Periscope video
653 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
654 'info_dict': {
655 'id': '1yNGaQLWpejGj',
656 'ext': 'mp4',
657 'title': 'Andrea May Sahouri - Periscope Broadcast',
658 'uploader': 'Andrea May Sahouri',
659 'uploader_id': '1PXEdBZWpGwKe',
660 },
661 }
662
663 def _real_extract(self, url):
664 broadcast_id = self._match_id(url)
665 broadcast = self._call_api(
666 'broadcasts/show.json', broadcast_id,
667 {'ids': broadcast_id})['broadcasts'][broadcast_id]
668 info = self._parse_broadcast_data(broadcast, broadcast_id)
669 media_key = broadcast['media_key']
670 source = self._call_api(
671 'live_video_stream/status/' + media_key, media_key)['source']
672 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
673 if '/live_video_stream/geoblocked/' in m3u8_url:
674 self.raise_geo_restricted()
675 m3u8_id = compat_parse_qs(compat_urllib_parse_urlparse(
676 m3u8_url).query).get('type', [None])[0]
677 state, width, height = self._extract_common_format_info(broadcast)
678 info['formats'] = self._extract_pscp_m3u8_formats(
679 m3u8_url, broadcast_id, m3u8_id, state, width, height)
680 return info
681
682
683 class TwitterShortenerIE(TwitterBaseIE):
684 IE_NAME = 'twitter:shortener'
685 _VALID_URL = r'https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+)'
686 _BASE_URL = 'https://t.co/'
687
688 def _real_extract(self, url):
689 mobj = self._match_valid_url(url)
690 eid, id = mobj.group('eid', 'id')
691 if eid:
692 id = eid
693 url = self._BASE_URL + id
694 new_url = self._request_webpage(url, id, headers={'User-Agent': 'curl'}).geturl()
695 __UNSAFE_LINK = "https://twitter.com/safety/unsafe_link_warning?unsafe_link="
696 if new_url.startswith(__UNSAFE_LINK):
697 new_url = new_url.replace(__UNSAFE_LINK, "")
698 return self.url_result(new_url)