]>
Commit | Line | Data |
---|---|---|
5d49d879 S |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
4 | from .vimeo import VimeoIE | |
5 | from ..utils import ( | |
5d49d879 | 6 | ExtractorError, |
24e0cd70 S |
7 | int_or_none, |
8 | merge_dicts, | |
9 | try_get, | |
10 | unescapeHTML, | |
11 | unified_timestamp, | |
5d49d879 S |
12 | urljoin, |
13 | ) | |
14 | ||
15 | ||
16 | class RayWenderlichIE(InfoExtractor): | |
24e0cd70 S |
17 | _VALID_URL = r'''(?x) |
18 | https?:// | |
19 | (?: | |
20 | videos\.raywenderlich\.com/courses| | |
21 | (?:www\.)?raywenderlich\.com | |
22 | )/ | |
23 | (?P<course_id>[^/]+)/lessons/(?P<id>\d+) | |
24 | ''' | |
5d49d879 S |
25 | |
26 | _TESTS = [{ | |
24e0cd70 | 27 | 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1', |
5d49d879 S |
28 | 'info_dict': { |
29 | 'id': '248377018', | |
30 | 'ext': 'mp4', | |
24e0cd70 S |
31 | 'title': 'Introduction', |
32 | 'description': 'md5:804d031b3efa9fcb49777d512d74f722', | |
33 | 'timestamp': 1513906277, | |
34 | 'upload_date': '20171222', | |
5d49d879 S |
35 | 'duration': 133, |
36 | 'uploader': 'Ray Wenderlich', | |
37 | 'uploader_id': 'user3304672', | |
38 | }, | |
39 | 'params': { | |
40 | 'noplaylist': True, | |
41 | 'skip_download': True, | |
42 | }, | |
43 | 'add_ie': [VimeoIE.ie_key()], | |
44 | 'expected_warnings': ['HTTP Error 403: Forbidden'], | |
45 | }, { | |
46 | 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', | |
24e0cd70 S |
47 | 'only_matching': True, |
48 | }] | |
49 | ||
50 | @staticmethod | |
51 | def _extract_video_id(data, lesson_id): | |
52 | if not data: | |
53 | return | |
54 | groups = try_get(data, lambda x: x['groups'], list) or [] | |
55 | if not groups: | |
56 | return | |
57 | for group in groups: | |
58 | if not isinstance(group, dict): | |
59 | continue | |
60 | contents = try_get(data, lambda x: x['contents'], list) or [] | |
61 | for content in contents: | |
62 | if not isinstance(content, dict): | |
63 | continue | |
64 | ordinal = int_or_none(content.get('ordinal')) | |
65 | if ordinal != lesson_id: | |
66 | continue | |
67 | video_id = content.get('identifier') | |
68 | if video_id: | |
add96eb9 | 69 | return str(video_id) |
24e0cd70 S |
70 | |
71 | def _real_extract(self, url): | |
5ad28e7f | 72 | mobj = self._match_valid_url(url) |
24e0cd70 | 73 | course_id, lesson_id = mobj.group('course_id', 'id') |
add96eb9 | 74 | display_id = f'{course_id}/{lesson_id}' |
24e0cd70 S |
75 | |
76 | webpage = self._download_webpage(url, display_id) | |
77 | ||
78 | thumbnail = self._og_search_thumbnail( | |
79 | webpage, default=None) or self._html_search_meta( | |
80 | 'twitter:image', webpage, 'thumbnail') | |
81 | ||
82 | if '>Subscribe to unlock' in webpage: | |
83 | raise ExtractorError( | |
84 | 'This content is only available for subscribers', | |
85 | expected=True) | |
86 | ||
87 | info = { | |
88 | 'thumbnail': thumbnail, | |
89 | } | |
90 | ||
91 | vimeo_id = self._search_regex( | |
92 | r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None) | |
93 | ||
94 | if not vimeo_id: | |
95 | data = self._parse_json( | |
96 | self._search_regex( | |
97 | r'data-collection=(["\'])(?P<data>{.+?})\1', webpage, | |
98 | 'data collection', default='{}', group='data'), | |
99 | display_id, transform_source=unescapeHTML, fatal=False) | |
100 | video_id = self._extract_video_id( | |
101 | data, lesson_id) or self._search_regex( | |
102 | r'/videos/(\d+)/', thumbnail, 'video id') | |
103 | headers = { | |
104 | 'Referer': url, | |
105 | 'X-Requested-With': 'XMLHttpRequest', | |
106 | } | |
107 | csrf_token = self._html_search_meta( | |
108 | 'csrf-token', webpage, 'csrf token', default=None) | |
109 | if csrf_token: | |
110 | headers['X-CSRF-Token'] = csrf_token | |
111 | video = self._download_json( | |
add96eb9 | 112 | f'https://videos.raywenderlich.com/api/v1/videos/{video_id}.json', |
113 | display_id, headers=headers)['video'] | |
24e0cd70 S |
114 | vimeo_id = video['clips'][0]['provider_id'] |
115 | info.update({ | |
116 | '_type': 'url_transparent', | |
117 | 'title': video.get('name'), | |
118 | 'description': video.get('description') or video.get( | |
119 | 'meta_description'), | |
120 | 'duration': int_or_none(video.get('duration')), | |
121 | 'timestamp': unified_timestamp(video.get('created_at')), | |
122 | }) | |
123 | ||
124 | return merge_dicts(info, self.url_result( | |
125 | VimeoIE._smuggle_referrer( | |
add96eb9 | 126 | f'https://player.vimeo.com/video/{vimeo_id}', url), |
24e0cd70 S |
127 | ie=VimeoIE.ie_key(), video_id=vimeo_id)) |
128 | ||
129 | ||
130 | class RayWenderlichCourseIE(InfoExtractor): | |
131 | _VALID_URL = r'''(?x) | |
132 | https?:// | |
133 | (?: | |
134 | videos\.raywenderlich\.com/courses| | |
135 | (?:www\.)?raywenderlich\.com | |
136 | )/ | |
137 | (?P<id>[^/]+) | |
138 | ''' | |
139 | ||
140 | _TEST = { | |
141 | 'url': 'https://www.raywenderlich.com/3530-testing-in-ios', | |
5d49d879 S |
142 | 'info_dict': { |
143 | 'title': 'Testing in iOS', | |
24e0cd70 | 144 | 'id': '3530-testing-in-ios', |
5d49d879 S |
145 | }, |
146 | 'params': { | |
147 | 'noplaylist': False, | |
148 | }, | |
149 | 'playlist_count': 29, | |
24e0cd70 S |
150 | } |
151 | ||
152 | @classmethod | |
153 | def suitable(cls, url): | |
add96eb9 | 154 | return False if RayWenderlichIE.suitable(url) else super().suitable(url) |
5d49d879 S |
155 | |
156 | def _real_extract(self, url): | |
24e0cd70 | 157 | course_id = self._match_id(url) |
5d49d879 | 158 | |
24e0cd70 | 159 | webpage = self._download_webpage(url, course_id) |
5d49d879 S |
160 | |
161 | entries = [] | |
24e0cd70 S |
162 | lesson_urls = set() |
163 | for lesson_url in re.findall( | |
add96eb9 | 164 | rf'<a[^>]+\bhref=["\'](/{course_id}/lessons/\d+)', webpage): |
24e0cd70 S |
165 | if lesson_url in lesson_urls: |
166 | continue | |
167 | lesson_urls.add(lesson_url) | |
5d49d879 | 168 | entries.append(self.url_result( |
24e0cd70 | 169 | urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key())) |
5d49d879 | 170 | |
24e0cd70 S |
171 | title = self._og_search_title( |
172 | webpage, default=None) or self._html_search_meta( | |
173 | 'twitter:title', webpage, 'title', default=None) | |
5d49d879 S |
174 | |
175 | return self.playlist_result(entries, course_id, title) |