]> jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/lynda.py
Merge remote-tracking branch 'rzhxeo/blip2'
[yt-dlp.git] / youtube_dl / extractor / lynda.py
1 from __future__ import unicode_literals
2
3 import re
4 import json
5
6 from .common import InfoExtractor
7 from ..utils import ExtractorError
8
9
10 class LyndaIE(InfoExtractor):
11 IE_NAME = 'lynda'
12 IE_DESC = 'lynda.com videos'
13 _VALID_URL = r'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html'
14
15 _TEST = {
16 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
17 'file': '114408.mp4',
18 'md5': 'ecfc6862da89489161fb9cd5f5a6fac1',
19 u"info_dict": {
20 'title': 'Using the exercise files',
21 'duration': 68
22 }
23 }
24
25 def _real_extract(self, url):
26 mobj = re.match(self._VALID_URL, url)
27 video_id = mobj.group(1)
28
29 page = self._download_webpage('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id,
30 video_id, 'Downloading video JSON')
31 video_json = json.loads(page)
32
33 if 'Status' in video_json and video_json['Status'] == 'NotFound':
34 raise ExtractorError('Video %s does not exist' % video_id, expected=True)
35
36 if video_json['HasAccess'] is False:
37 raise ExtractorError('Video %s is only available for members' % video_id, expected=True)
38
39 video_id = video_json['ID']
40 duration = video_json['DurationInSeconds']
41 title = video_json['Title']
42
43 formats = [{'url': fmt['Url'],
44 'ext': fmt['Extension'],
45 'width': fmt['Width'],
46 'height': fmt['Height'],
47 'filesize': fmt['FileSize'],
48 'format_id': fmt['Resolution']
49 } for fmt in video_json['Formats']]
50
51 self._sort_formats(formats)
52
53 return {
54 'id': video_id,
55 'title': title,
56 'duration': duration,
57 'formats': formats
58 }
59
60
61 class LyndaCourseIE(InfoExtractor):
62 IE_NAME = 'lynda:course'
63 IE_DESC = 'lynda.com online courses'
64
65 # Course link equals to welcome/introduction video link of same course
66 # We will recognize it as course link
67 _VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
68
69 def _real_extract(self, url):
70 mobj = re.match(self._VALID_URL, url)
71 course_path = mobj.group('coursepath')
72 course_id = mobj.group('courseid')
73
74 page = self._download_webpage('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id,
75 course_id, 'Downloading course JSON')
76 course_json = json.loads(page)
77
78 if 'Status' in course_json and course_json['Status'] == 'NotFound':
79 raise ExtractorError('Course %s does not exist' % course_id, expected=True)
80
81 unaccessible_videos = 0
82 videos = []
83
84 for chapter in course_json['Chapters']:
85 for video in chapter['Videos']:
86 if video['HasAccess'] is not True:
87 unaccessible_videos += 1
88 continue
89 videos.append(video['ID'])
90
91 if unaccessible_videos > 0:
92 self._downloader.report_warning('%s videos are only available for members and will not be downloaded' % unaccessible_videos)
93
94 entries = [
95 self.url_result('http://www.lynda.com/%s/%s-4.html' %
96 (course_path, video_id),
97 'Lynda')
98 for video_id in videos]
99
100 course_title = course_json['Title']
101
102 return self.playlist_result(entries, course_id, course_title)