]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/raywenderlich.py
3 from .common
import InfoExtractor
4 from .vimeo
import VimeoIE
5 from ..compat
import compat_str
17 class RayWenderlichIE(InfoExtractor
):
21 videos\.raywenderlich\.com/courses|
22 (?:www\.)?raywenderlich\.com
24 (?P<course_id>[^/]+)/lessons/(?P<id>\d+)
28 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1',
32 'title': 'Introduction',
33 'description': 'md5:804d031b3efa9fcb49777d512d74f722',
34 'timestamp': 1513906277,
35 'upload_date': '20171222',
37 'uploader': 'Ray Wenderlich',
38 'uploader_id': 'user3304672',
42 'skip_download': True,
44 'add_ie': [VimeoIE
.ie_key()],
45 'expected_warnings': ['HTTP Error 403: Forbidden'],
47 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1',
48 'only_matching': True,
52 def _extract_video_id(data
, lesson_id
):
55 groups
= try_get(data
, lambda x
: x
['groups'], list) or []
59 if not isinstance(group
, dict):
61 contents
= try_get(data
, lambda x
: x
['contents'], list) or []
62 for content
in contents
:
63 if not isinstance(content
, dict):
65 ordinal
= int_or_none(content
.get('ordinal'))
66 if ordinal
!= lesson_id
:
68 video_id
= content
.get('identifier')
70 return compat_str(video_id
)
72 def _real_extract(self
, url
):
73 mobj
= self
._match
_valid
_url
(url
)
74 course_id
, lesson_id
= mobj
.group('course_id', 'id')
75 display_id
= '%s/%s' % (course_id
, lesson_id
)
77 webpage
= self
._download
_webpage
(url
, display_id
)
79 thumbnail
= self
._og
_search
_thumbnail
(
80 webpage
, default
=None) or self
._html
_search
_meta
(
81 'twitter:image', webpage
, 'thumbnail')
83 if '>Subscribe to unlock' in webpage
:
85 'This content is only available for subscribers',
89 'thumbnail': thumbnail
,
92 vimeo_id
= self
._search
_regex
(
93 r
'data-vimeo-id=["\'](\d
+)', webpage, 'vimeo
id', default=None)
96 data = self._parse_json(
98 r'data
-collection
=(["\'])(?P<data>{.+?})\1', webpage,
99 'data collection', default='{}', group='data'),
100 display_id, transform_source=unescapeHTML, fatal=False)
101 video_id = self._extract_video_id(
102 data, lesson_id) or self._search_regex(
103 r'/videos/(\d+)/', thumbnail, 'video id')
106 'X-Requested-With': 'XMLHttpRequest',
108 csrf_token = self._html_search_meta(
109 'csrf-token', webpage, 'csrf token', default=None)
111 headers['X-CSRF-Token'] = csrf_token
112 video = self._download_json(
113 'https://videos.raywenderlich.com/api/v1/videos/%s.json'
114 % video_id, display_id, headers=headers)['video']
115 vimeo_id = video['clips'][0]['provider_id']
117 '_type': 'url_transparent',
118 'title': video.get('name'),
119 'description': video.get('description') or video.get(
121 'duration': int_or_none(video.get('duration')),
122 'timestamp': unified_timestamp(video.get('created_at')),
125 return merge_dicts(info, self.url_result(
126 VimeoIE._smuggle_referrer(
127 'https://player.vimeo.com/video/%s' % vimeo_id, url),
128 ie=VimeoIE.ie_key(), video_id=vimeo_id))
131 class RayWenderlichCourseIE(InfoExtractor):
132 _VALID_URL = r'''(?x)
135 videos\.raywenderlich\.com/courses|
136 (?:www\.)?raywenderlich\.com
142 'url': 'https://www.raywenderlich.com/3530-testing-in-ios',
144 'title': 'Testing in iOS',
145 'id': '3530-testing-in-ios',
150 'playlist_count': 29,
154 def suitable(cls, url):
155 return False if RayWenderlichIE.suitable(url) else super(
156 RayWenderlichCourseIE, cls).suitable(url)
158 def _real_extract(self, url):
159 course_id = self._match_id(url)
161 webpage = self._download_webpage(url, course_id)
165 for lesson_url in re.findall(
166 r'<a[^>]+\bhref=["\'](/%s/lessons
/\d
+)' % course_id, webpage):
167 if lesson_url in lesson_urls:
169 lesson_urls.add(lesson_url)
170 entries.append(self.url_result(
171 urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key()))
173 title = self._og_search_title(
174 webpage, default=None) or self._html_search_meta(
175 'twitter
:title
', webpage, 'title
', default=None)
177 return self.playlist_result(entries, course_id, title)