]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/thisoldhouse.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / thisoldhouse.py
index 15f8380d3678a3a89fa0713595ad4f2271a9ac1d..fbc12d55d901df387af4de0ae2f8a0e63431aa15 100644 (file)
@@ -1,5 +1,6 @@
 import json
 
+from .brightcove import BrightcoveNewIE
 from .common import InfoExtractor
 from .zype import ZypeIE
 from ..networking import HEADRequest
@@ -8,6 +9,7 @@
     ExtractorError,
     filter_dict,
     parse_qs,
+    smuggle_url,
     try_call,
     urlencode_postdata,
 )
@@ -17,23 +19,43 @@ class ThisOldHouseIE(InfoExtractor):
     _NETRC_MACHINE = 'thisoldhouse'
     _VALID_URL = r'https?://(?:www\.)?thisoldhouse\.com/(?:watch|how-to|tv-episode|(?:[^/?#]+/)?\d+)/(?P<id>[^/?#]+)'
     _TESTS = [{
+        # Unresolved Brightcove URL embed (formerly Zype), free
         'url': 'https://www.thisoldhouse.com/furniture/21017078/how-to-build-a-storage-bench',
         'info_dict': {
-            'id': '5dcdddf673c3f956ef5db202',
+            'id': '6325298523112',
             'ext': 'mp4',
             'title': 'How to Build a Storage Bench',
             'description': 'In the workshop, Tom Silva and Kevin O\'Connor build a storage bench for an entryway.',
-            'timestamp': 1442548800,
-            'upload_date': '20150918',
-            'duration': 674,
-            'view_count': int,
-            'average_rating': 0,
-            'thumbnail': r're:^https?://.*\.jpg\?\d+$',
-            'display_id': 'how-to-build-a-storage-bench',
+            'timestamp': 1681793639,
+            'upload_date': '20230418',
+            'duration': 674.54,
+            'tags': 'count:11',
+            'uploader_id': '6314471934001',
+            'thumbnail': r're:^https?://.*\.jpg',
         },
         'params': {
             'skip_download': True,
         },
+    }, {
+        # Brightcove embed, authwalled
+        'url': 'https://www.thisoldhouse.com/glen-ridge-generational/99537/s45-e17-multi-generational',
+        'info_dict': {
+            'id': '6349675446112',
+            'ext': 'mp4',
+            'title': 'E17 | Glen Ridge Generational | Multi-Generational',
+            'description': 'md5:53c6bc2e8031f3033d693d9a3563222c',
+            'timestamp': 1711382202,
+            'upload_date': '20240325',
+            'duration': 1422.229,
+            'tags': 'count:13',
+            'uploader_id': '6314471934001',
+            'thumbnail': r're:^https?://.*\.jpg',
+        },
+        'expected_warnings': ['Login with password is not supported for this website'],
+        'params': {
+            'skip_download': True,
+        },
+        'skip': 'Requires subscription',
     }, {
         # Page no longer has video
         'url': 'https://www.thisoldhouse.com/watch/arlington-arts-crafts-arts-and-crafts-class-begins',
@@ -98,7 +120,15 @@ def _real_extract(self, url):
 
         video_url, video_id = self._search_regex(
             r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)?thisoldhouse\.(?:chorus\.build|com)/videos/zype/([0-9a-f]{24})[^\'"]*)[\'"]',
-            webpage, 'video url', group=(1, 2))
-        video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
+            webpage, 'zype url', group=(1, 2), default=(None, None))
+        if video_url:
+            video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Zype URL').url
+            return self.url_result(video_url, ZypeIE, video_id)
 
-        return self.url_result(video_url, ZypeIE, video_id)
+        video_url, video_id = self._search_regex([
+            r'<iframe[^>]+src=[\'"]((?:https?:)?//players\.brightcove\.net/\d+/\w+/index\.html\?videoId=(\d+))',
+            r'<iframe[^>]+src=[\'"]((?:https?:)?//(?:www\.)thisoldhouse\.com/videos/brightcove/(\d+))'],
+            webpage, 'iframe url', group=(1, 2))
+        if not parse_qs(video_url).get('videoId'):
+            video_url = self._request_webpage(HEADRequest(video_url), video_id, 'Resolving Brightcove URL').url
+        return self.url_result(smuggle_url(video_url, {'referrer': url}), BrightcoveNewIE, video_id)