]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/thisoldhouse.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / thisoldhouse.py
CommitLineData
c76c9667 1import json
2
0df63cce 3from .brightcove import BrightcoveNewIE
c1084ddb 4from .common import InfoExtractor
c76c9667 5from .zype import ZypeIE
3d2623a8 6from ..networking import HEADRequest
c76c9667 7from ..networking.exceptions import HTTPError
8from ..utils import (
9 ExtractorError,
10 filter_dict,
11 parse_qs,
0df63cce 12 smuggle_url,
c76c9667 13 try_call,
14 urlencode_postdata,
15)
c1084ddb
RA
16
17
18class ThisOldHouseIE(InfoExtractor):
c76c9667 19 _NETRC_MACHINE = 'thisoldhouse'
20 _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)'
c1084ddb 21 _TESTS = [{
0df63cce 22 # Unresolved Brightcove URL embed (formerly Zype), free
c76c9667 23 'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
c1084ddb 24 'info_dict': {
0df63cce 25 'id': '6325298523112',
c1084ddb
RA
26 'ext': 'mp4',
27 'title': 'How to Build a Storage Bench',
28 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
0df63cce 29 'timestamp': 1681793639,
30 'upload_date': '20230418',
31 'duration': 674.54,
32 'tags': 'count:11',
33 'uploader_id': '6314471934001',
34 'thumbnail': r're:^https?://.*\.jpg',
705b1cda
RA
35 },
36 'params': {
37 'skip_download': True,
38 },
0df63cce 39 }, {
40 # Brightcove embed, authwalled
41 'url': 'https://www.thisoldhouse.com/glen-ridge-generational/99537/s45-e17-multi-generational',
42 'info_dict': {
43 'id': '6349675446112',
44 'ext': 'mp4',
45 'title': 'E17 | Glen Ridge Generational | Multi-Generational',
46 'description': 'md5:53c6bc2e8031f3033d693d9a3563222c',
47 'timestamp': 1711382202,
48 'upload_date': '20240325',
49 'duration': 1422.229,
50 'tags': 'count:13',
51 'uploader_id': '6314471934001',
52 'thumbnail': r're:^https?://.*\.jpg',
53 },
54 'expected_warnings': ['Login with password is not supported for this website'],
55 'params': {
56 'skip_download': True,
57 },
58 'skip': 'Requires subscription',
c1084ddb 59 }, {
c76c9667 60 # Page no longer has video
c1084ddb
RA
61 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
62 'only_matching': True,
d17bfe40 63 }, {
c76c9667 64 # 404 Not Found
d17bfe40
YCH
65 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric',
66 'only_matching': True,
705b1cda 67 }, {
c76c9667 68 # 404 Not Found
69 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench',
705b1cda
RA
70 'only_matching': True,
71 }, {
72 'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost',
73 'only_matching': True,
2f198357
S
74 }, {
75 # iframe www.thisoldhouse.com
76 'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project',
77 'only_matching': True,
c1084ddb 78 }]
c76c9667 79
80 _LOGIN_URL = 'https://login.thisoldhouse.com/usernamepassword/login'
81
82 def _perform_login(self, username, password):
83 self._request_webpage(
84 HEADRequest('https://www.thisoldhouse.com/insider'), None, 'Requesting session cookies')
85 urlh = self._request_webpage(
86 'https://www.thisoldhouse.com/wp-login.php', None, 'Requesting login info',
87 errnote='Unable to login', query={'redirect_to': 'https://www.thisoldhouse.com/insider'})
88
89 try:
90 auth_form = self._download_webpage(
91 self._LOGIN_URL, None, 'Submitting credentials', headers={
92 'Content-Type': 'application/json',
93 'Referer': urlh.url,
94 }, data=json.dumps(filter_dict({
95 **{('client_id' if k == 'client' else k): v[0] for k, v in parse_qs(urlh.url).items()},
96 'tenant': 'thisoldhouse',
97 'username': username,
98 'password': password,
99 'popup_options': {},
100 'sso': True,
101 '_csrf': try_call(lambda: self._get_cookies(self._LOGIN_URL)['_csrf'].value),
102 '_intstate': 'deprecated',
103 }), separators=(',', ':')).encode())
104 except ExtractorError as e:
105 if isinstance(e.cause, HTTPError) and e.cause.status == 401:
106 raise ExtractorError('Invalid username or password', expected=True)
107 raise
108
109 self._request_webpage(
110 'https://login.thisoldhouse.com/login/callback', None, 'Completing login',
111 data=urlencode_postdata(self._hidden_inputs(auth_form)))
c1084ddb
RA
112
113 def _real_extract(self, url):
114 display_id = self._match_id(url)
115 webpage = self._download_webpage(url, display_id)
eea1b035 116 if 'To Unlock This content' in webpage:
c76c9667 117 self.raise_login_required(
118 'This video is only available for subscribers. '
119 'Note that --cookies-from-browser may not work due to this site using session cookies')
120
121 video_url, video_id = self._search_regex(
eea1b035 122 r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
0df63cce 123 webpage, 'zype url', group=(1, 2), default=(None, None))
124 if video_url:
125 video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
126 return self.url_result(video_url, ZypeIE, video_id)
c76c9667 127
0df63cce 128 video_url, video_id = self._search_regex([
129 r'<iframe[^>]+src=[\'"]((?:https?:)?//players\.brightcove\.net/\d+/\w+/index\.html\?videoId=(\d+))',
130 r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)thisoldhouse\.com/videos/brightcove/(\d+))'],
131 webpage, 'iframe url', group=(1, 2))
132 if not parse_qs(video_url).get('videoId'):
133 video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Brightcove URL').url
134 return self.url_result(smuggle_url(video_url, {'referrer': url}), BrightcoveNewIE, video_id)