]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/vimeo.py
Merge pull request #8061 from dstftw/introduce-chapter-and-series-fields
[yt-dlp.git] / youtube_dl / extractor / vimeo.py
CommitLineData
93b22c78 1# encoding: utf-8
9148eb00
PH
2from __future__ import unicode_literals
3
b3d14cbf
PH
4import json
5import re
caeefc29 6import itertools
b3d14cbf
PH
7
8from .common import InfoExtractor
8c25f81b 9from ..compat import (
1060425c 10 compat_HTTPError,
9c44d242 11 compat_urlparse,
8c25f81b
PH
12)
13from ..utils import (
852fad92 14 encode_dict,
b3d14cbf 15 ExtractorError,
9c44d242
PH
16 InAdvancePagedList,
17 int_or_none,
55b3e45b 18 RegexNotFoundError,
67dda517 19 sanitized_Request,
30965ac6 20 smuggle_url,
b3d14cbf 21 std_headers,
3c6f2450 22 unified_strdate,
9d4660ca 23 unsmuggle_url,
a980bc43 24 urlencode_postdata,
b407e173 25 unescapeHTML,
eb4f2740 26 parse_filesize,
b3d14cbf
PH
27)
28
bbafbe20 29
efb7e119
JMF
30class VimeoBaseInfoExtractor(InfoExtractor):
31 _NETRC_MACHINE = 'vimeo'
32 _LOGIN_REQUIRED = False
f6c3664d 33 _LOGIN_URL = 'https://vimeo.com/log_in'
efb7e119
JMF
34
35 def _login(self):
36 (username, password) = self._get_login_info()
37 if username is None:
38 if self._LOGIN_REQUIRED:
4f3e9430 39 raise ExtractorError('No login info available, needed for using %s.' % self.IE_NAME, expected=True)
efb7e119
JMF
40 return
41 self.report_login()
f6c3664d 42 webpage = self._download_webpage(self._LOGIN_URL, None, False)
7c845629 43 token, vuid = self._extract_xsrft_and_vuid(webpage)
3fa3ff1b 44 data = urlencode_postdata(encode_dict({
f6c3664d 45 'action': 'login',
efb7e119
JMF
46 'email': username,
47 'password': password,
efb7e119
JMF
48 'service': 'vimeo',
49 'token': token,
3fa3ff1b 50 }))
67dda517 51 login_request = sanitized_Request(self._LOGIN_URL, data)
efb7e119 52 login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
f6c3664d 53 login_request.add_header('Referer', self._LOGIN_URL)
9eab37dc 54 self._set_vimeo_cookie('vuid', vuid)
efb7e119
JMF
55 self._download_webpage(login_request, None, False, 'Wrong login info')
56
7c845629
S
57 def _extract_xsrft_and_vuid(self, webpage):
58 xsrft = self._search_regex(
f6c3664d
S
59 r'xsrft\s*[=:]\s*(?P<q>["\'])(?P<xsrft>.+?)(?P=q)',
60 webpage, 'login token', group='xsrft')
7c845629
S
61 vuid = self._search_regex(
62 r'["\']vuid["\']\s*:\s*(["\'])(?P<vuid>.+?)\1',
63 webpage, 'vuid', group='vuid')
64 return xsrft, vuid
f6c3664d 65
9eab37dc
S
66 def _set_vimeo_cookie(self, name, value):
67 self._set_cookie('vimeo.com', name, value)
68
efb7e119 69
65469a7f 70class VimeoIE(VimeoBaseInfoExtractor):
b3d14cbf
PH
71 """Information extractor for vimeo.com."""
72
73 # _VALID_URL matches Vimeo URLs
bbafbe20 74 _VALID_URL = r'''(?x)
3357110a 75 https?://
bbafbe20
PH
76 (?:(?:www|(?P<player>player))\.)?
77 vimeo(?P<pro>pro)?\.com/
2929b3e7 78 (?!channels/[^/?#]+/?(?:$|[?#])|album/)
bbafbe20 79 (?:.*?/)?
7115ca84 80 (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)?
bbafbe20
PH
81 (?:videos?/)?
82 (?P<id>[0-9]+)
7115ca84 83 /?(?:[?&].*)?(?:[#].*)?$'''
9148eb00 84 IE_NAME = 'vimeo'
a91b954b
JMF
85 _TESTS = [
86 {
9148eb00 87 'url': 'http://vimeo.com/56015672#at=0',
9148eb00
PH
88 'md5': '8879b6cc097e987f02484baf890129e5',
89 'info_dict': {
ad5976b4
PH
90 'id': '56015672',
91 'ext': 'mp4',
68f3b61f
S
92 'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
93 'description': 'md5:2d3305bad981a06ff79f027f19865021',
94 'upload_date': '20121220',
95 'uploader_id': 'user7108434',
96 'uploader': 'Filippo Valsorda',
97 'duration': 10,
a91b954b
JMF
98 },
99 },
100 {
9148eb00 101 'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876',
9148eb00
PH
102 'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82',
103 'note': 'Vimeo Pro video (#1197)',
104 'info_dict': {
4f3e9430
JMF
105 'id': '68093876',
106 'ext': 'mp4',
9148eb00
PH
107 'uploader_id': 'openstreetmapus',
108 'uploader': 'OpenStreetMap US',
109 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography',
68f3b61f 110 'description': 'md5:fd69a7b8d8c34a4e1d2ec2e4afd6ec30',
69c8fb9e 111 'duration': 1595,
a91b954b
JMF
112 },
113 },
aa32314d 114 {
9148eb00 115 'url': 'http://player.vimeo.com/video/54469442',
9148eb00
PH
116 'md5': '619b811a4417aa4abe78dc653becf511',
117 'note': 'Videos that embed the url in the player page',
118 'info_dict': {
4f3e9430
JMF
119 'id': '54469442',
120 'ext': 'mp4',
0e6ebc13 121 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012',
9148eb00
PH
122 'uploader': 'The BLN & Business of Software',
123 'uploader_id': 'theblnbusinessofsoftware',
69c8fb9e 124 'duration': 3610,
58ea7ec8 125 'description': None,
aa32314d 126 },
93b22c78
JMF
127 },
128 {
9148eb00 129 'url': 'http://vimeo.com/68375962',
9148eb00
PH
130 'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7',
131 'note': 'Video protected with password',
132 'info_dict': {
4f3e9430
JMF
133 'id': '68375962',
134 'ext': 'mp4',
9148eb00
PH
135 'title': 'youtube-dl password protected test video',
136 'upload_date': '20130614',
137 'uploader_id': 'user18948128',
138 'uploader': 'Jaime Marquínez Ferrándiz',
69c8fb9e 139 'duration': 10,
8bea039b 140 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people\u2026',
93b22c78 141 },
9148eb00
PH
142 'params': {
143 'videopassword': 'youtube-dl',
93b22c78
JMF
144 },
145 },
548f31d9
S
146 {
147 'url': 'http://vimeo.com/channels/keypeele/75629013',
148 'md5': '2f86a05afe9d7abc0b9126d229bbe15d',
149 'note': 'Video is freely available via original URL '
150 'and protected with password when accessed via http://vimeo.com/75629013',
151 'info_dict': {
152 'id': '75629013',
153 'ext': 'mp4',
154 'title': 'Key & Peele: Terrorist Interrogation',
155 'description': 'md5:8678b246399b070816b12313e8b4eb5c',
156 'uploader_id': 'atencio',
157 'uploader': 'Peter Atencio',
3c6f2450 158 'upload_date': '20130927',
548f31d9
S
159 'duration': 187,
160 },
161 },
1eac553e
S
162 {
163 'url': 'http://vimeo.com/76979871',
1eac553e
S
164 'note': 'Video with subtitles',
165 'info_dict': {
166 'id': '76979871',
167 'ext': 'mp4',
168 'title': 'The New Vimeo Player (You Know, For Videos)',
169 'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
170 'upload_date': '20131015',
171 'uploader_id': 'staff',
172 'uploader': 'Vimeo Staff',
69c8fb9e 173 'duration': 62,
1eac553e
S
174 }
175 },
4698f0d8
JMF
176 {
177 # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/
178 'url': 'https://player.vimeo.com/video/98044508',
179 'note': 'The js code contains assignments to the same variable as the config',
180 'info_dict': {
181 'id': '98044508',
182 'ext': 'mp4',
183 'title': 'Pier Solar OUYA Official Trailer',
184 'uploader': 'Tulio Gonçalves',
185 'uploader_id': 'user28849593',
186 },
187 },
323f82a7 188 {
189 # contains original format
190 'url': 'https://vimeo.com/33951933',
191 'md5': '53c688fa95a55bf4b7293d37a89c5c53',
192 'info_dict': {
193 'id': '33951933',
194 'ext': 'mp4',
195 'title': 'FOX CLASSICS - Forever Classic ID - A Full Minute',
196 'uploader': 'The DMCI',
197 'uploader_id': 'dmci',
198 'upload_date': '20111220',
199 'description': 'md5:ae23671e82d05415868f7ad1aec21147',
200 },
201 },
8bea039b
LL
202 {
203 'url': 'https://vimeo.com/109815029',
204 'note': 'Video not completely processed, "failed" seed status',
205 'only_matching': True,
206 },
6b7ceee1
S
207 {
208 'url': 'https://vimeo.com/groups/travelhd/videos/22439234',
209 'only_matching': True,
210 },
a91b954b 211 ]
b3d14cbf 212
b407e173
YCH
213 @staticmethod
214 def _extract_vimeo_url(url, webpage):
215 # Look for embedded (iframe) Vimeo player
216 mobj = re.search(
217 r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage)
218 if mobj:
219 player_url = unescapeHTML(mobj.group('url'))
220 surl = smuggle_url(player_url, {'Referer': url})
221 return surl
222 # Look for embedded (swf embed) Vimeo player
223 mobj = re.search(
224 r'<embed[^>]+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage)
225 if mobj:
226 return mobj.group(1)
227
b3d14cbf 228 def _verify_video_password(self, url, video_id, webpage):
c6c19746 229 password = self._downloader.params.get('videopassword', None)
b3d14cbf 230 if password is None:
93a16ba2 231 raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True)
7c845629 232 token, vuid = self._extract_xsrft_and_vuid(webpage)
852fad92 233 data = urlencode_postdata(encode_dict({
4f3e9430
JMF
234 'password': password,
235 'token': token,
852fad92 236 }))
9c85b537
JMF
237 if url.startswith('http://'):
238 # vimeo only supports https now, but the user can give an http url
239 url = url.replace('http://', 'https://')
67dda517 240 password_request = sanitized_Request(url + '/password', data)
b3d14cbf 241 password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
12bb392a 242 password_request.add_header('Referer', url)
9eab37dc 243 self._set_vimeo_cookie('vuid', vuid)
bf8f082a
PH
244 return self._download_webpage(
245 password_request, video_id,
246 'Verifying the password', 'Wrong password')
b3d14cbf 247
0eecc6a4
PH
248 def _verify_player_video_password(self, url, video_id):
249 password = self._downloader.params.get('videopassword', None)
250 if password is None:
251 raise ExtractorError('This video is protected by a password, use the --video-password option')
0a0110fc 252 data = urlencode_postdata(encode_dict({'password': password}))
0eecc6a4 253 pass_url = url + '/check-password'
67dda517 254 password_request = sanitized_Request(pass_url, data)
0eecc6a4
PH
255 password_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
256 return self._download_json(
257 password_request, video_id,
258 'Verifying the password',
259 'Wrong password')
260
fc79158d
JMF
261 def _real_initialize(self):
262 self._login()
263
a0088bdf 264 def _real_extract(self, url):
9d4660ca
PH
265 url, data = unsmuggle_url(url)
266 headers = std_headers
267 if data is not None:
268 headers = headers.copy()
269 headers.update(data)
ba5d51b3
PH
270 if 'Referer' not in headers:
271 headers['Referer'] = url
9d4660ca 272
b3d14cbf
PH
273 # Extract ID from URL
274 mobj = re.match(self._VALID_URL, url)
b3d14cbf 275 video_id = mobj.group('id')
58ea7ec8 276 orig_url = url
9103bbc5 277 if mobj.group('pro') or mobj.group('player'):
61e00a97 278 url = 'https://player.vimeo.com/video/' + video_id
10831b5e 279 else:
280 url = 'https://vimeo.com/' + video_id
b3d14cbf
PH
281
282 # Retrieve video webpage to extract further information
67dda517 283 request = sanitized_Request(url, None, headers)
1060425c
PH
284 try:
285 webpage = self._download_webpage(request, video_id)
286 except ExtractorError as ee:
287 if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
288 errmsg = ee.cause.read()
289 if b'Because of its privacy settings, this video cannot be played here' in errmsg:
290 raise ExtractorError(
291 'Cannot download embed-only video without embedding '
292 'URL. Please call youtube-dl with the URL of the page '
293 'that embeds this video.',
294 expected=True)
295 raise
b3d14cbf
PH
296
297 # Now we begin extracting as much information as we can from what we
298 # retrieved. First we extract the information common to all extractors,
299 # and latter we extract those that are Vimeo specific.
300 self.report_extraction(video_id)
301
998e6cdb 302 vimeo_config = self._search_regex(
b6aa99af 303 r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage,
998e6cdb
S
304 'vimeo config', default=None)
305 if vimeo_config:
306 seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {})
307 if seed_status.get('state') == 'failed':
308 raise ExtractorError(
b6aa99af 309 '%s said: %s' % (self.IE_NAME, seed_status['title']),
998e6cdb
S
310 expected=True)
311
b3d14cbf
PH
312 # Extract the config JSON
313 try:
93b22c78
JMF
314 try:
315 config_url = self._html_search_regex(
41a7b00f
LL
316 r' data-config-url="(.+?)"', webpage,
317 'config URL', default=None)
318 if not config_url:
dd841752
S
319 # Sometimes new react-based page is served instead of old one that require
320 # different config URL extraction approach (see
321 # https://github.com/rg3/youtube-dl/pull/7209)
41a7b00f
LL
322 vimeo_clip_page_config = self._search_regex(
323 r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
324 'vimeo clip page config')
dd841752
S
325 config_url = self._parse_json(
326 vimeo_clip_page_config, video_id)['player']['config_url']
93b22c78
JMF
327 config_json = self._download_webpage(config_url, video_id)
328 config = json.loads(config_json)
329 except RegexNotFoundError:
330 # For pro videos or player.vimeo.com urls
48ad51b2
JMF
331 # We try to find out to which variable is assigned the config dic
332 m_variable_name = re.search('(\w)\.video\.id', webpage)
333 if m_variable_name is not None:
4698f0d8 334 config_re = r'%s=({[^}].+?});' % re.escape(m_variable_name.group(1))
48ad51b2
JMF
335 else:
336 config_re = [r' = {config:({.+?}),assets:', r'(?:[abc])=({.+?});']
9148eb00 337 config = self._search_regex(config_re, webpage, 'info section',
9e1a5b84 338 flags=re.DOTALL)
93b22c78 339 config = json.loads(config)
71907db3 340 except Exception as e:
b3d14cbf 341 if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage):
9148eb00 342 raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option')
b3d14cbf 343
bf8f082a 344 if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
30965ac6
PH
345 if data and '_video_password_verified' in data:
346 raise ExtractorError('video password verification failed!')
b3d14cbf 347 self._verify_video_password(url, video_id, webpage)
30965ac6
PH
348 return self._real_extract(
349 smuggle_url(url, {'_video_password_verified': 'verified'}))
b3d14cbf 350 else:
9148eb00 351 raise ExtractorError('Unable to extract info section',
71907db3 352 cause=e)
559e370f
PH
353 else:
354 if config.get('view') == 4:
0eecc6a4 355 config = self._verify_player_video_password(url, video_id)
b3d14cbf
PH
356
357 # Extract title
358 video_title = config["video"]["title"]
359
360 # Extract uploader and uploader_id
361 video_uploader = config["video"]["owner"]["name"]
362 video_uploader_id = config["video"]["owner"]["url"].split('/')[-1] if config["video"]["owner"]["url"] else None
363
364 # Extract video thumbnail
aa32314d 365 video_thumbnail = config["video"].get("thumbnail")
c0e5d856
S
366 if video_thumbnail is None:
367 video_thumbs = config["video"].get("thumbs")
368 if video_thumbs and isinstance(video_thumbs, dict):
3e510af3 369 _, video_thumbnail = sorted((int(width if width.isdigit() else 0), t_url) for (width, t_url) in video_thumbs.items())[-1]
b3d14cbf
PH
370
371 # Extract video description
58ea7ec8 372
25930395 373 video_description = self._html_search_regex(
58ea7ec8
PH
374 r'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>',
375 webpage, 'description', default=None)
376 if not video_description:
377 video_description = self._html_search_meta(
378 'description', webpage, default=None)
379 if not video_description and mobj.group('pro'):
380 orig_webpage = self._download_webpage(
381 orig_url, video_id,
382 note='Downloading webpage for description',
383 fatal=False)
384 if orig_webpage:
385 video_description = self._html_search_meta(
386 'description', orig_webpage, default=None)
387 if not video_description and not mobj.group('player'):
388 self._downloader.report_warning('Cannot find video description')
b3d14cbf 389
69c8fb9e
S
390 # Extract video duration
391 video_duration = int_or_none(config["video"].get("duration"))
392
b3d14cbf
PH
393 # Extract upload date
394 video_upload_date = None
3c6f2450 395 mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
b3d14cbf 396 if mobj is not None:
3c6f2450 397 video_upload_date = unified_strdate(mobj.group(1))
b3d14cbf 398
4e761794 399 try:
9148eb00
PH
400 view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
401 like_count = int(self._search_regex(r'UserLikes:(\d+)', webpage, 'like count'))
402 comment_count = int(self._search_regex(r'UserComments:(\d+)', webpage, 'comment count'))
4e761794
JMF
403 except RegexNotFoundError:
404 # This info is only available in vimeo.com/{id} urls
405 view_count = None
406 like_count = None
407 comment_count = None
408
a6387bfd 409 formats = []
eb4f2740 410 download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={
411 'X-Requested-With': 'XMLHttpRequest'})
412 download_data = self._download_json(download_request, video_id, fatal=False)
413 if download_data:
414 source_file = download_data.get('source_file')
415 if source_file and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
416 formats.append({
417 'url': source_file['download_url'],
418 'ext': source_file['extension'].lower(),
419 'width': int_or_none(source_file.get('width')),
420 'height': int_or_none(source_file.get('height')),
421 'filesize': parse_filesize(source_file.get('size')),
422 'format_id': source_file.get('public_name', 'Original'),
8534bf1f 423 'preference': 1,
eb4f2740 424 })
fcd817a3
S
425 config_files = config['video'].get('files') or config['request'].get('files', {})
426 for f in config_files.get('progressive', []):
427 video_url = f.get('url')
428 if not video_url:
429 continue
430 formats.append({
431 'url': video_url,
432 'format_id': 'http-%s' % f.get('quality'),
433 'width': int_or_none(f.get('width')),
434 'height': int_or_none(f.get('height')),
435 'fps': int_or_none(f.get('fps')),
436 'tbr': int_or_none(f.get('bitrate')),
437 })
438 m3u8_url = config_files.get('hls', {}).get('url')
35a3ff1d 439 if m3u8_url:
7e5edcfd
S
440 formats.extend(self._extract_m3u8_formats(
441 m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
fcd817a3
S
442 # Bitrates are completely broken. Single m3u8 may contain entries in kbps and bps
443 # at the same time without actual units specified. This lead to wrong sorting.
8534bf1f 444 self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'format_id'))
b3d14cbf 445
1eac553e
S
446 subtitles = {}
447 text_tracks = config['request'].get('text_tracks')
448 if text_tracks:
449 for tt in text_tracks:
65469a7f
JMF
450 subtitles[tt['lang']] = [{
451 'ext': 'vtt',
3946864c 452 'url': 'https://vimeo.com' + tt['url'],
65469a7f 453 }]
1eac553e 454
9103bbc5 455 return {
b0268cb6 456 'id': video_id,
b3d14cbf
PH
457 'uploader': video_uploader,
458 'uploader_id': video_uploader_id,
b0268cb6
S
459 'upload_date': video_upload_date,
460 'title': video_title,
461 'thumbnail': video_thumbnail,
462 'description': video_description,
69c8fb9e 463 'duration': video_duration,
a6387bfd 464 'formats': formats,
9103bbc5 465 'webpage_url': url,
4e761794
JMF
466 'view_count': view_count,
467 'like_count': like_count,
468 'comment_count': comment_count,
65469a7f 469 'subtitles': subtitles,
9103bbc5 470 }
caeefc29
JMF
471
472
f6c3664d 473class VimeoChannelIE(VimeoBaseInfoExtractor):
9148eb00 474 IE_NAME = 'vimeo:channel'
3946864c 475 _VALID_URL = r'https://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])'
caeefc29 476 _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
84458766 477 _TITLE = None
55a10eab 478 _TITLE_RE = r'<link rel="alternate"[^>]+?title="(.*?)"'
2929b3e7 479 _TESTS = [{
3946864c 480 'url': 'https://vimeo.com/channels/tributes',
2929b3e7 481 'info_dict': {
a3fa5da4 482 'id': 'tributes',
2929b3e7
PH
483 'title': 'Vimeo Tributes',
484 },
485 'playlist_mincount': 25,
486 }]
caeefc29 487
5cc14c2f
JMF
488 def _page_url(self, base_url, pagenum):
489 return '%s/videos/page:%d/' % (base_url, pagenum)
490
fb30ec22 491 def _extract_list_title(self, webpage):
84458766 492 return self._TITLE or self._html_search_regex(self._TITLE_RE, webpage, 'list title')
fb30ec22 493
bf8f082a
PH
494 def _login_list_password(self, page_url, list_id, webpage):
495 login_form = self._search_regex(
496 r'(?s)<form[^>]+?id="pw_form"(.*?)</form>',
497 webpage, 'login form', default=None)
498 if not login_form:
499 return webpage
500
501 password = self._downloader.params.get('videopassword', None)
502 if password is None:
503 raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True)
f8da79f8 504 fields = self._hidden_inputs(login_form)
7c845629 505 token, vuid = self._extract_xsrft_and_vuid(webpage)
bf8f082a
PH
506 fields['token'] = token
507 fields['password'] = password
bfdf891f 508 post = urlencode_postdata(encode_dict(fields))
bf8f082a
PH
509 password_path = self._search_regex(
510 r'action="([^"]+)"', login_form, 'password URL')
511 password_url = compat_urlparse.urljoin(page_url, password_path)
67dda517 512 password_request = sanitized_Request(password_url, post)
bf8f082a 513 password_request.add_header('Content-type', 'application/x-www-form-urlencoded')
9eab37dc
S
514 self._set_vimeo_cookie('vuid', vuid)
515 self._set_vimeo_cookie('xsrft', token)
bf8f082a
PH
516
517 return self._download_webpage(
518 password_request, list_id,
519 'Verifying the password', 'Wrong password')
520
2c94198e 521 def _title_and_entries(self, list_id, base_url):
caeefc29 522 for pagenum in itertools.count(1):
bf8f082a 523 page_url = self._page_url(base_url, pagenum)
55a10eab 524 webpage = self._download_webpage(
bf8f082a 525 page_url, list_id,
9148eb00 526 'Downloading page %s' % pagenum)
bf8f082a
PH
527
528 if pagenum == 1:
529 webpage = self._login_list_password(page_url, list_id, webpage)
2c94198e
S
530 yield self._extract_list_title(webpage)
531
532 for video_id in re.findall(r'id="clip_(\d+?)"', webpage):
533 yield self.url_result('https://vimeo.com/%s' % video_id, 'Vimeo')
bf8f082a 534
caeefc29
JMF
535 if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
536 break
537
2c94198e
S
538 def _extract_videos(self, list_id, base_url):
539 title_and_entries = self._title_and_entries(list_id, base_url)
540 list_title = next(title_and_entries)
541 return self.playlist_result(title_and_entries, list_id, list_title)
55a10eab
JMF
542
543 def _real_extract(self, url):
544 mobj = re.match(self._VALID_URL, url)
4f3e9430 545 channel_id = mobj.group('id')
3946864c 546 return self._extract_videos(channel_id, 'https://vimeo.com/channels/%s' % channel_id)
55a10eab
JMF
547
548
549class VimeoUserIE(VimeoChannelIE):
9148eb00 550 IE_NAME = 'vimeo:user'
b29440ae 551 _VALID_URL = r'https://vimeo\.com/(?!(?:[0-9]+|watchlater)(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)'
55a10eab 552 _TITLE_RE = r'<a[^>]+?class="user">([^<>]+?)</a>'
2929b3e7 553 _TESTS = [{
3946864c 554 'url': 'https://vimeo.com/nkistudio/videos',
2929b3e7
PH
555 'info_dict': {
556 'title': 'Nki',
a3fa5da4 557 'id': 'nkistudio',
2929b3e7
PH
558 },
559 'playlist_mincount': 66,
560 }]
55a10eab
JMF
561
562 def _real_extract(self, url):
563 mobj = re.match(self._VALID_URL, url)
564 name = mobj.group('name')
3946864c 565 return self._extract_videos(name, 'https://vimeo.com/%s' % name)
5cc14c2f
JMF
566
567
568class VimeoAlbumIE(VimeoChannelIE):
9148eb00 569 IE_NAME = 'vimeo:album'
d1508cd6 570 _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)'
5cc14c2f 571 _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>'
2929b3e7 572 _TESTS = [{
d1508cd6 573 'url': 'https://vimeo.com/album/2632481',
2929b3e7 574 'info_dict': {
a3fa5da4 575 'id': '2632481',
2929b3e7
PH
576 'title': 'Staff Favorites: November 2013',
577 },
578 'playlist_mincount': 13,
bf8f082a
PH
579 }, {
580 'note': 'Password-protected album',
581 'url': 'https://vimeo.com/album/3253534',
582 'info_dict': {
583 'title': 'test',
584 'id': '3253534',
585 },
586 'playlist_count': 1,
587 'params': {
588 'videopassword': 'youtube-dl',
589 }
2929b3e7 590 }]
5cc14c2f
JMF
591
592 def _page_url(self, base_url, pagenum):
593 return '%s/page:%d/' % (base_url, pagenum)
594
595 def _real_extract(self, url):
bf8f082a 596 album_id = self._match_id(url)
d1508cd6 597 return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id)
fb30ec22
JMF
598
599
600class VimeoGroupsIE(VimeoAlbumIE):
9148eb00 601 IE_NAME = 'vimeo:group'
fdb20a27 602 _VALID_URL = r'https://vimeo\.com/groups/(?P<name>[^/]+)(?:/(?!videos?/\d+)|$)'
2929b3e7 603 _TESTS = [{
3946864c 604 'url': 'https://vimeo.com/groups/rolexawards',
2929b3e7 605 'info_dict': {
a3fa5da4 606 'id': 'rolexawards',
2929b3e7
PH
607 'title': 'Rolex Awards for Enterprise',
608 },
609 'playlist_mincount': 73,
610 }]
fb30ec22
JMF
611
612 def _extract_list_title(self, webpage):
613 return self._og_search_title(webpage)
614
615 def _real_extract(self, url):
616 mobj = re.match(self._VALID_URL, url)
617 name = mobj.group('name')
3946864c 618 return self._extract_videos(name, 'https://vimeo.com/groups/%s' % name)
fcea44c6
PH
619
620
621class VimeoReviewIE(InfoExtractor):
9148eb00
PH
622 IE_NAME = 'vimeo:review'
623 IE_DESC = 'Review pages on vimeo'
3946864c 624 _VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
d36d3f42 625 _TESTS = [{
fcea44c6 626 'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
fcea44c6
PH
627 'md5': 'c507a72f780cacc12b2248bb4006d253',
628 'info_dict': {
fc09240e
PH
629 'id': '75524534',
630 'ext': 'mp4',
fcea44c6
PH
631 'title': "DICK HARDWICK 'Comedian'",
632 'uploader': 'Richard Hardwick',
633 }
d36d3f42
PH
634 }, {
635 'note': 'video player needs Referer',
3946864c 636 'url': 'https://vimeo.com/user22258446/review/91613211/13f927e053',
d36d3f42
PH
637 'md5': '6295fdab8f4bf6a002d058b2c6dce276',
638 'info_dict': {
639 'id': '91613211',
640 'ext': 'mp4',
9dec9930 641 'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn',
d36d3f42
PH
642 'uploader': 'DevWeek Events',
643 'duration': 2773,
644 'thumbnail': 're:^https?://.*\.jpg$',
645 }
646 }]
fcea44c6
PH
647
648 def _real_extract(self, url):
649 mobj = re.match(self._VALID_URL, url)
650 video_id = mobj.group('id')
651 player_url = 'https://player.vimeo.com/player/' + video_id
652 return self.url_result(player_url, 'Vimeo', video_id)
efb7e119
JMF
653
654
f6c3664d 655class VimeoWatchLaterIE(VimeoChannelIE):
efb7e119
JMF
656 IE_NAME = 'vimeo:watchlater'
657 IE_DESC = 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)'
84458766
S
658 _VALID_URL = r'https://vimeo\.com/(?:home/)?watchlater|:vimeowatchlater'
659 _TITLE = 'Watch Later'
efb7e119 660 _LOGIN_REQUIRED = True
2929b3e7 661 _TESTS = [{
84458766 662 'url': 'https://vimeo.com/watchlater',
2929b3e7
PH
663 'only_matching': True,
664 }]
efb7e119
JMF
665
666 def _real_initialize(self):
667 self._login()
668
669 def _page_url(self, base_url, pagenum):
670 url = '%s/page:%d/' % (base_url, pagenum)
67dda517 671 request = sanitized_Request(url)
efb7e119
JMF
672 # Set the header to get a partial html page with the ids,
673 # the normal page doesn't contain them.
674 request.add_header('X-Requested-With', 'XMLHttpRequest')
675 return request
676
677 def _real_extract(self, url):
84458766 678 return self._extract_videos('watchlater', 'https://vimeo.com/watchlater')
d6e6a422
PH
679
680
681class VimeoLikesIE(InfoExtractor):
3946864c 682 _VALID_URL = r'https://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)'
d6e6a422
PH
683 IE_NAME = 'vimeo:likes'
684 IE_DESC = 'Vimeo user likes'
685 _TEST = {
9c44d242
PH
686 'url': 'https://vimeo.com/user755559/likes/',
687 'playlist_mincount': 293,
d6e6a422 688 "info_dict": {
a3fa5da4 689 'id': 'user755559_likes',
9c44d242
PH
690 "description": "See all the videos urza likes",
691 "title": 'Videos urza likes',
d6e6a422
PH
692 },
693 }
694
695 def _real_extract(self, url):
696 user_id = self._match_id(url)
9c44d242
PH
697 webpage = self._download_webpage(url, user_id)
698 page_count = self._int(
699 self._search_regex(
700 r'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)">
701 .*?</a></li>\s*<li\s+class="pagination_next">
702 ''', webpage, 'page count'),
703 'page count', fatal=True)
704 PAGE_SIZE = 12
705 title = self._html_search_regex(
706 r'(?s)<h1>(.+?)</h1>', webpage, 'title', fatal=False)
707 description = self._html_search_meta('description', webpage)
708
709 def _get_page(idx):
3946864c
JMF
710 page_url = 'https://vimeo.com/user%s/likes/page:%d/sort:date' % (
711 user_id, idx + 1)
9c44d242
PH
712 webpage = self._download_webpage(
713 page_url, user_id,
714 note='Downloading page %d/%d' % (idx + 1, page_count))
715 video_list = self._search_regex(
716 r'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>',
717 webpage, 'video content')
718 paths = re.findall(
719 r'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list)
720 for path in paths:
721 yield {
722 '_type': 'url',
723 'url': compat_urlparse.urljoin(page_url, path),
724 }
725
726 pl = InAdvancePagedList(_get_page, page_count, PAGE_SIZE)
d6e6a422
PH
727
728 return {
9c44d242
PH
729 '_type': 'playlist',
730 'id': 'user%s_likes' % user_id,
731 'title': title,
732 'description': description,
733 'entries': pl,
d6e6a422 734 }