]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/teamtreehouse.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
21 class TeamTreeHouseIE(InfoExtractor
):
22 _VALID_URL
= r
'https?://(?:www\.)?teamtreehouse\.com/library/(?P<id>[^/]+)'
25 'url': 'https://teamtreehouse.com/library/introduction-to-user-authentication-in-php',
27 'id': 'introduction-to-user-authentication-in-php',
28 'title': 'Introduction to User Authentication in PHP',
29 'description': 'md5:405d7b4287a159b27ddf30ca72b5b053',
31 'playlist_mincount': 24,
34 'url': 'https://teamtreehouse.com/library/deploying-a-react-app',
36 'id': 'deploying-a-react-app',
37 'title': 'Deploying a React App',
38 'description': 'md5:10a82e3ddff18c14ac13581c9b8e5921',
40 'playlist_mincount': 4,
43 'url': 'https://teamtreehouse.com/library/application-overview-2',
45 'id': 'application-overview-2',
47 'title': 'Application Overview',
48 'description': 'md5:4b0a234385c27140a4378de5f1e15127',
50 'expected_warnings': ['This is just a preview'],
52 _NETRC_MACHINE
= 'teamtreehouse'
54 def _perform_login(self
, username
, password
):
56 signin_page
= self
._download
_webpage
(
57 'https://teamtreehouse.com/signin',
58 None, 'Downloading signin page')
59 data
= self
._form
_hidden
_inputs
('new_user_session', signin_page
)
61 'user_session[email]': username
,
62 'user_session[password]': password
,
64 error_message
= get_element_by_class('error-message', self
._download
_webpage
(
65 'https://teamtreehouse.com/person_session',
66 None, 'Logging in', data
=urlencode_postdata(data
)))
68 raise ExtractorError(clean_html(error_message
), expected
=True)
70 def _real_extract(self
, url
):
71 display_id
= self
._match
_id
(url
)
72 webpage
= self
._download
_webpage
(url
, display_id
)
73 title
= self
._html
_search
_meta
(['og:title', 'twitter:title'], webpage
)
74 description
= self
._html
_search
_meta
(
75 ['description', 'og:description', 'twitter:description'], webpage
)
76 entries
= self
._parse
_html
5_media
_entries
(url
, webpage
, display_id
)
80 for subtitles
in info
.get('subtitles', {}).values():
81 for subtitle
in subtitles
:
82 subtitle
['ext'] = determine_ext(subtitle
['url'], 'srt')
84 is_preview
= 'data-preview="true"' in webpage
87 'This is just a preview. You need to be signed in with a Basic account to download the entire video.', display_id
)
90 duration
= float_or_none(self
._search
_regex
(
91 r
'data-duration="(\d+)"', webpage
, 'duration'), 1000)
93 duration
= parse_duration(get_element_by_id(
94 'video-duration', webpage
))
99 'description': description
,
100 'duration': duration
,
104 def extract_urls(html
, extract_info
=None):
105 for path
in re
.findall(r
'<a[^>]+href="([^"]+)"', html
):
106 page_url
= urljoin(url
, path
)
108 '_type': 'url_transparent',
109 'id': self
._match
_id
(page_url
),
111 'id_key': self
.ie_key(),
114 entry
.update(extract_info
)
115 entries
.append(entry
)
117 workshop_videos
= self
._search
_regex
(
118 r
'(?s)<ul[^>]+id="workshop-videos"[^>]*>(.+?)</ul>',
119 webpage
, 'workshop videos', default
=None)
121 extract_urls(workshop_videos
)
123 stages_path
= self
._search
_regex
(
124 r
'(?s)<div[^>]+id="syllabus-stages"[^>]+data-url="([^"]+)"',
125 webpage
, 'stages path')
127 stages_page
= self
._download
_webpage
(
128 urljoin(url
, stages_path
), display_id
, 'Downloading stages page')
129 for chapter_number
, (chapter
, steps_list
) in enumerate(re
.findall(r
'(?s)<h2[^>]*>\s*(.+?)\s*</h2>.+?<ul[^>]*>(.+?)</ul>', stages_page
), 1):
130 extract_urls(steps_list
, {
132 'chapter_number': chapter_number
,
134 title
= remove_end(title
, ' Course')
136 return self
.playlist_result(
137 entries
, display_id
, title
, description
)