]>
Commit | Line | Data |
---|---|---|
c76c9667 | 1 | import json |
2 | ||
0df63cce | 3 | from .brightcove import BrightcoveNewIE |
c1084ddb | 4 | from .common import InfoExtractor |
c76c9667 | 5 | from .zype import ZypeIE |
3d2623a8 | 6 | from ..networking import HEADRequest |
c76c9667 | 7 | from ..networking.exceptions import HTTPError |
8 | from ..utils import ( | |
9 | ExtractorError, | |
10 | filter_dict, | |
11 | parse_qs, | |
0df63cce | 12 | smuggle_url, |
c76c9667 | 13 | try_call, |
14 | urlencode_postdata, | |
15 | ) | |
c1084ddb RA |
16 | |
17 | ||
18 | class ThisOldHouseIE(InfoExtractor): | |
c76c9667 | 19 | _NETRC_MACHINE = 'thisoldhouse' |
20 | _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)' | |
c1084ddb | 21 | _TESTS = [{ |
0df63cce | 22 | # Unresolved Brightcove URL embed (formerly Zype), free |
c76c9667 | 23 | 'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench', |
c1084ddb | 24 | 'info_dict': { |
0df63cce | 25 | 'id': '6325298523112', |
c1084ddb RA |
26 | 'ext': 'mp4', |
27 | 'title': 'How to Build a Storage Bench', | |
28 | 'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.', | |
0df63cce | 29 | 'timestamp': 1681793639, |
30 | 'upload_date': '20230418', | |
31 | 'duration': 674.54, | |
32 | 'tags': 'count:11', | |
33 | 'uploader_id': '6314471934001', | |
34 | 'thumbnail': r're:^https?://.*\.jpg', | |
705b1cda RA |
35 | }, |
36 | 'params': { | |
37 | 'skip_download': True, | |
38 | }, | |
0df63cce | 39 | }, { |
40 | # Brightcove embed, authwalled | |
41 | 'url': 'https://www.thisoldhouse.com/glen-ridge-generational/99537/s45-e17-multi-generational', | |
42 | 'info_dict': { | |
43 | 'id': '6349675446112', | |
44 | 'ext': 'mp4', | |
45 | 'title': 'E17 | Glen Ridge Generational | Multi-Generational', | |
46 | 'description': 'md5:53c6bc2e8031f3033d693d9a3563222c', | |
47 | 'timestamp': 1711382202, | |
48 | 'upload_date': '20240325', | |
49 | 'duration': 1422.229, | |
50 | 'tags': 'count:13', | |
51 | 'uploader_id': '6314471934001', | |
52 | 'thumbnail': r're:^https?://.*\.jpg', | |
53 | }, | |
54 | 'expected_warnings': ['Login with password is not supported for this website'], | |
55 | 'params': { | |
56 | 'skip_download': True, | |
57 | }, | |
58 | 'skip': 'Requires subscription', | |
c1084ddb | 59 | }, { |
c76c9667 | 60 | # Page no longer has video |
c1084ddb RA |
61 | 'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins', |
62 | 'only_matching': True, | |
d17bfe40 | 63 | }, { |
c76c9667 | 64 | # 404 Not Found |
d17bfe40 YCH |
65 | 'url': 'https://www.thisoldhouse.com/tv-episode/ask-toh-shelf-rough-electric', |
66 | 'only_matching': True, | |
705b1cda | 67 | }, { |
c76c9667 | 68 | # 404 Not Found |
69 | 'url': 'https://www.thisoldhouse.com/how-to/how-to-build-storage-bench', | |
705b1cda RA |
70 | 'only_matching': True, |
71 | }, { | |
72 | 'url': 'https://www.thisoldhouse.com/21113884/s41-e13-paradise-lost', | |
73 | 'only_matching': True, | |
2f198357 S |
74 | }, { |
75 | # iframe www.thisoldhouse.com | |
76 | 'url': 'https://www.thisoldhouse.com/21083431/seaside-transformation-the-westerly-project', | |
77 | 'only_matching': True, | |
c1084ddb | 78 | }] |
c76c9667 | 79 | |
80 | _LOGIN_URL = 'https://login.thisoldhouse.com/usernamepassword/login' | |
81 | ||
82 | def _perform_login(self, username, password): | |
83 | self._request_webpage( | |
84 | HEADRequest('https://www.thisoldhouse.com/insider'), None, 'Requesting session cookies') | |
85 | urlh = self._request_webpage( | |
86 | 'https://www.thisoldhouse.com/wp-login.php', None, 'Requesting login info', | |
87 | errnote='Unable to login', query={'redirect_to': 'https://www.thisoldhouse.com/insider'}) | |
88 | ||
89 | try: | |
90 | auth_form = self._download_webpage( | |
91 | self._LOGIN_URL, None, 'Submitting credentials', headers={ | |
92 | 'Content-Type': 'application/json', | |
93 | 'Referer': urlh.url, | |
94 | }, data=json.dumps(filter_dict({ | |
95 | **{('client_id' if k == 'client' else k): v[0] for k, v in parse_qs(urlh.url).items()}, | |
96 | 'tenant': 'thisoldhouse', | |
97 | 'username': username, | |
98 | 'password': password, | |
99 | 'popup_options': {}, | |
100 | 'sso': True, | |
101 | '_csrf': try_call(lambda: self._get_cookies(self._LOGIN_URL)['_csrf'].value), | |
102 | '_intstate': 'deprecated', | |
103 | }), separators=(',', ':')).encode()) | |
104 | except ExtractorError as e: | |
105 | if isinstance(e.cause, HTTPError) and e.cause.status == 401: | |
106 | raise ExtractorError('Invalid username or password', expected=True) | |
107 | raise | |
108 | ||
109 | self._request_webpage( | |
110 | 'https://login.thisoldhouse.com/login/callback', None, 'Completing login', | |
111 | data=urlencode_postdata(self._hidden_inputs(auth_form))) | |
c1084ddb RA |
112 | |
113 | def _real_extract(self, url): | |
114 | display_id = self._match_id(url) | |
115 | webpage = self._download_webpage(url, display_id) | |
eea1b035 | 116 | if 'To Unlock This content' in webpage: |
c76c9667 | 117 | self.raise_login_required( |
118 | 'This video is only available for subscribers. ' | |
119 | 'Note that --cookies-from-browser may not work due to this site using session cookies') | |
120 | ||
121 | video_url, video_id = self._search_regex( | |
eea1b035 | 122 | r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]', |
0df63cce | 123 | webpage, 'zype url', group=(1, 2), default=(None, None)) |
124 | if video_url: | |
125 | video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url | |
126 | return self.url_result(video_url, ZypeIE, video_id) | |
c76c9667 | 127 | |
0df63cce | 128 | video_url, video_id = self._search_regex([ |
129 | r'<iframe[^>]+src=[\'"]((?:https?:)?//players\.brightcove\.net/\d+/\w+/index\.html\?videoId=(\d+))', | |
130 | r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)thisoldhouse\.com/videos/brightcove/(\d+))'], | |
131 | webpage, 'iframe url', group=(1, 2)) | |
132 | if not parse_qs(video_url).get('videoId'): | |
133 | video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Brightcove URL').url | |
134 | return self.url_result(smuggle_url(video_url, {'referrer': url}), BrightcoveNewIE, video_id) |