]>
Commit | Line | Data |
---|---|---|
5d49d879 S |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from .vimeo import VimeoIE | |
24e0cd70 | 5 | from ..compat import compat_str |
5d49d879 | 6 | from ..utils import ( |
5d49d879 | 7 | ExtractorError, |
24e0cd70 S |
8 | int_or_none, |
9 | merge_dicts, | |
10 | try_get, | |
11 | unescapeHTML, | |
12 | unified_timestamp, | |
5d49d879 S |
13 | urljoin, |
14 | ) | |
15 | ||
16 | ||
17 | class RayWenderlichIE(InfoExtractor): | |
24e0cd70 S |
18 | _VALID_URL = r'''(?x) |
19 | https?:// | |
20 | (?: | |
21 | videos\.raywenderlich\.com/courses| | |
22 | (?:www\.)?raywenderlich\.com | |
23 | )/ | |
24 | (?P<course_id>[^/]+)/lessons/(?P<id>\d+) | |
25 | ''' | |
5d49d879 S |
26 | |
27 | _TESTS = [{ | |
24e0cd70 | 28 | 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1', |
5d49d879 S |
29 | 'info_dict': { |
30 | 'id': '248377018', | |
31 | 'ext': 'mp4', | |
24e0cd70 S |
32 | 'title': 'Introduction', |
33 | 'description': 'md5:804d031b3efa9fcb49777d512d74f722', | |
34 | 'timestamp': 1513906277, | |
35 | 'upload_date': '20171222', | |
5d49d879 S |
36 | 'duration': 133, |
37 | 'uploader': 'Ray Wenderlich', | |
38 | 'uploader_id': 'user3304672', | |
39 | }, | |
40 | 'params': { | |
41 | 'noplaylist': True, | |
42 | 'skip_download': True, | |
43 | }, | |
44 | 'add_ie': [VimeoIE.ie_key()], | |
45 | 'expected_warnings': ['HTTP Error 403: Forbidden'], | |
46 | }, { | |
47 | 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', | |
24e0cd70 S |
48 | 'only_matching': True, |
49 | }] | |
50 | ||
51 | @staticmethod | |
52 | def _extract_video_id(data, lesson_id): | |
53 | if not data: | |
54 | return | |
55 | groups = try_get(data, lambda x: x['groups'], list) or [] | |
56 | if not groups: | |
57 | return | |
58 | for group in groups: | |
59 | if not isinstance(group, dict): | |
60 | continue | |
61 | contents = try_get(data, lambda x: x['contents'], list) or [] | |
62 | for content in contents: | |
63 | if not isinstance(content, dict): | |
64 | continue | |
65 | ordinal = int_or_none(content.get('ordinal')) | |
66 | if ordinal != lesson_id: | |
67 | continue | |
68 | video_id = content.get('identifier') | |
69 | if video_id: | |
70 | return compat_str(video_id) | |
71 | ||
72 | def _real_extract(self, url): | |
5ad28e7f | 73 | mobj = self._match_valid_url(url) |
24e0cd70 S |
74 | course_id, lesson_id = mobj.group('course_id', 'id') |
75 | display_id = '%s/%s' % (course_id, lesson_id) | |
76 | ||
77 | webpage = self._download_webpage(url, display_id) | |
78 | ||
79 | thumbnail = self._og_search_thumbnail( | |
80 | webpage, default=None) or self._html_search_meta( | |
81 | 'twitter:image', webpage, 'thumbnail') | |
82 | ||
83 | if '>Subscribe to unlock' in webpage: | |
84 | raise ExtractorError( | |
85 | 'This content is only available for subscribers', | |
86 | expected=True) | |
87 | ||
88 | info = { | |
89 | 'thumbnail': thumbnail, | |
90 | } | |
91 | ||
92 | vimeo_id = self._search_regex( | |
93 | r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None) | |
94 | ||
95 | if not vimeo_id: | |
96 | data = self._parse_json( | |
97 | self._search_regex( | |
98 | r'data-collection=(["\'])(?P<data>{.+?})\1', webpage, | |
99 | 'data collection', default='{}', group='data'), | |
100 | display_id, transform_source=unescapeHTML, fatal=False) | |
101 | video_id = self._extract_video_id( | |
102 | data, lesson_id) or self._search_regex( | |
103 | r'/videos/(\d+)/', thumbnail, 'video id') | |
104 | headers = { | |
105 | 'Referer': url, | |
106 | 'X-Requested-With': 'XMLHttpRequest', | |
107 | } | |
108 | csrf_token = self._html_search_meta( | |
109 | 'csrf-token', webpage, 'csrf token', default=None) | |
110 | if csrf_token: | |
111 | headers['X-CSRF-Token'] = csrf_token | |
112 | video = self._download_json( | |
113 | 'https://videos.raywenderlich.com/api/v1/videos/%s.json' | |
114 | % video_id, display_id, headers=headers)['video'] | |
115 | vimeo_id = video['clips'][0]['provider_id'] | |
116 | info.update({ | |
117 | '_type': 'url_transparent', | |
118 | 'title': video.get('name'), | |
119 | 'description': video.get('description') or video.get( | |
120 | 'meta_description'), | |
121 | 'duration': int_or_none(video.get('duration')), | |
122 | 'timestamp': unified_timestamp(video.get('created_at')), | |
123 | }) | |
124 | ||
125 | return merge_dicts(info, self.url_result( | |
126 | VimeoIE._smuggle_referrer( | |
127 | 'https://player.vimeo.com/video/%s' % vimeo_id, url), | |
128 | ie=VimeoIE.ie_key(), video_id=vimeo_id)) | |
129 | ||
130 | ||
131 | class RayWenderlichCourseIE(InfoExtractor): | |
132 | _VALID_URL = r'''(?x) | |
133 | https?:// | |
134 | (?: | |
135 | videos\.raywenderlich\.com/courses| | |
136 | (?:www\.)?raywenderlich\.com | |
137 | )/ | |
138 | (?P<id>[^/]+) | |
139 | ''' | |
140 | ||
141 | _TEST = { | |
142 | 'url': 'https://www.raywenderlich.com/3530-testing-in-ios', | |
5d49d879 S |
143 | 'info_dict': { |
144 | 'title': 'Testing in iOS', | |
24e0cd70 | 145 | 'id': '3530-testing-in-ios', |
5d49d879 S |
146 | }, |
147 | 'params': { | |
148 | 'noplaylist': False, | |
149 | }, | |
150 | 'playlist_count': 29, | |
24e0cd70 S |
151 | } |
152 | ||
153 | @classmethod | |
154 | def suitable(cls, url): | |
155 | return False if RayWenderlichIE.suitable(url) else super( | |
156 | RayWenderlichCourseIE, cls).suitable(url) | |
5d49d879 S |
157 | |
158 | def _real_extract(self, url): | |
24e0cd70 | 159 | course_id = self._match_id(url) |
5d49d879 | 160 | |
24e0cd70 | 161 | webpage = self._download_webpage(url, course_id) |
5d49d879 S |
162 | |
163 | entries = [] | |
24e0cd70 S |
164 | lesson_urls = set() |
165 | for lesson_url in re.findall( | |
166 | r'<a[^>]+\bhref=["\'](/%s/lessons/\d+)' % course_id, webpage): | |
167 | if lesson_url in lesson_urls: | |
168 | continue | |
169 | lesson_urls.add(lesson_url) | |
5d49d879 | 170 | entries.append(self.url_result( |
24e0cd70 | 171 | urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key())) |
5d49d879 | 172 | |
24e0cd70 S |
173 | title = self._og_search_title( |
174 | webpage, default=None) or self._html_search_meta( | |
175 | 'twitter:title', webpage, 'title', default=None) | |
5d49d879 S |
176 | |
177 | return self.playlist_result(entries, course_id, title) |