]> jfr.im git - yt-dlp.git/commitdiff
[CBS] Add fallback (#579)
authorLE <redacted>
Mon, 2 Aug 2021 02:16:12 +0000 (22:16 -0400)
committerGitHub <redacted>
Mon, 2 Aug 2021 02:16:12 +0000 (07:46 +0530)
Related: https://github.com/ytdl-org/youtube-dl/issues/29564
Authored-by: llacb47, pukkandan
yt_dlp/extractor/cbs.py
yt_dlp/extractor/common.py

index 716e945197b798e8efe551990416a07ae521c8cd..fbbbe5545bb4bf9653e6e33d53365b1a89242a78 100644 (file)
@@ -53,6 +53,54 @@ class CBSIE(CBSBaseIE):
             'skip_download': True,
         },
         '_skip': 'Blocked outside the US',
+    }, {
+        'url': 'https://www.paramountplus.com/shows/catdog/video/Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k/catdog-climb-every-catdog-the-canine-mutiny/',
+        'info_dict': {
+            'id': 'Oe44g5_NrlgiZE3aQVONleD6vXc8kP0k',
+            'ext': 'mp4',
+            'title': 'CatDog - Climb Every CatDog/The Canine Mutiny',
+            'description': 'md5:7ac835000645a69933df226940e3c859',
+            'duration': 1418,
+            'timestamp': 920264400,
+            'upload_date': '19990301',
+            'uploader': 'CBSI-NEW',
+        },
+        'params': {
+            'skip_download': 'm3u8',
+        },
+        '_skip': 'Blocked outside the US',
+    }, {
+        'url': 'https://www.paramountplus.com/shows/tooning-out-the-news/video/6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd/7-23-21-week-in-review-rep-jahana-hayes-howard-fineman-sen-michael-bennet-sheera-frenkel-cecilia-kang-/',
+        'info_dict': {
+            'id': '6hSWYWRrR9EUTz7IEe5fJKBhYvSUfexd',
+            'ext': 'mp4',
+            'title': '7/23/21 WEEK IN REVIEW (Rep. Jahana Hayes/Howard Fineman/Sen. Michael Bennet/Sheera Frenkel & Cecilia Kang)',
+            'description': 'md5:f4adcea3e8b106192022e121f1565bae',
+            'duration': 2506,
+            'timestamp': 1627063200,
+            'upload_date': '20210723',
+            'uploader': 'CBSI-NEW',
+        },
+        'params': {
+            'skip_download': 'm3u8',
+        },
+        '_skip': 'Blocked outside the US',
+    }, {
+        'url': 'https://www.cbs.com/shows/the-late-show-with-stephen-colbert/video/60icOhMb9NcjbcWnF_gub9XXHdeBcNk2/the-late-show-6-23-21-christine-baranski-joy-oladokun-',
+        'info_dict': {
+            'id': '60icOhMb9NcjbcWnF_gub9XXHdeBcNk2',
+            'title': 'The Late Show - 6/23/21 (Christine Baranski, Joy Oladokun)',
+            'timestamp': 1624507140,
+            'description': 'md5:e01af24e95c74d55e8775aef86117b95',
+            'uploader': 'CBSI-NEW',
+            'upload_date': '20210624',
+        },
+        'params': {
+            'ignore_no_formats_error': True,
+            'skip_download': True,
+        },
+        'expected_warnings': [
+            'This content expired on', 'No video formats found', 'Requested format is not available'],
     }, {
         'url': 'http://colbertlateshow.com/video/8GmB0oY0McANFvp2aEffk9jZZZ2YyXxy/the-colbeard/',
         'only_matching': True,
@@ -79,17 +127,26 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
         asset_types = []
         subtitles = {}
         formats = []
+        useXMLmetadata = True
         last_e = None
         for item in items_data.findall('.//item'):
             asset_type = xpath_text(item, 'assetType')
-            if not asset_type or asset_type in asset_types or 'HLS_FPS' in asset_type or 'DASH_CENC' in asset_type:
-                continue
-            asset_types.append(asset_type)
             query = {
                 'mbr': 'true',
                 'assetTypes': asset_type,
             }
-            if asset_type.startswith('HLS') or asset_type in ('OnceURL', 'StreamPack'):
+            if not asset_type:
+                # fallback for content_ids that videoPlayerService doesn't return anything for
+                useXMLmetadata = False
+                asset_type = 'fallback'
+                query['formats'] = 'M3U+none,MPEG4,M3U+appleHlsEncryption,MP3'
+                del query['assetTypes']
+            elif asset_type in asset_types:
+                continue
+            elif any(excluded in asset_type for excluded in ('HLS_FPS', 'DASH_CENC', 'OnceURL')):
+                continue
+            asset_types.append(asset_type)
+            if asset_type.startswith('HLS') or 'StreamPack' in asset_type:
                 query['formats'] = 'MPEG4,M3U'
             elif asset_type in ('RTMP', 'WIFI', '3G'):
                 query['formats'] = 'MPEG4,FLV'
@@ -99,25 +156,37 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
                     'Downloading %s SMIL data' % asset_type)
             except ExtractorError as e:
                 last_e = e
-                continue
+                if useXMLmetadata:
+                    continue
+                query['formats'] = ''  # blank query to check if expired
+                try:
+                    tp_formats, tp_subtitles = self._extract_theplatform_smil(
+                        update_url_query(tp_release_url, query), content_id,
+                        'Downloading %s SMIL data, trying again with another format' % asset_type)
+                except ExtractorError as e:
+                    last_e = e
+                    continue
             formats.extend(tp_formats)
             subtitles = self._merge_subtitles(subtitles, tp_subtitles)
         if last_e and not formats:
-            raise last_e
+            self.raise_no_formats(last_e, True, content_id)
         self._sort_formats(formats)
 
         info = self._extract_theplatform_metadata(tp_path, content_id)
         info.update({
-            'id': content_id,
-            'title': title,
-            'series': xpath_text(video_data, 'seriesTitle'),
-            'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
-            'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
-            'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
-            'thumbnail': xpath_text(video_data, 'previewImageURL'),
             'formats': formats,
             'subtitles': subtitles,
+            'id': content_id
         })
+        if useXMLmetadata:
+            info.update({
+                'title': title,
+                'series': xpath_text(video_data, 'seriesTitle'),
+                'season_number': int_or_none(xpath_text(video_data, 'seasonNumber')),
+                'episode_number': int_or_none(xpath_text(video_data, 'episodeNumber')),
+                'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
+                'thumbnail': xpath_text(video_data, 'previewImageURL')
+            })
         return info
 
     def _real_extract(self, url):
index a3ac9dfb7d9f60bb1ebadb3dee012402f948383f..a4a5b37aaf096c66a56bcec7c47f5c2c754586bf 100644 (file)
@@ -1052,6 +1052,8 @@ def raise_geo_restricted(
     def raise_no_formats(self, msg, expected=False, video_id=None):
         if expected and self.get_param('ignore_no_formats_error'):
             self.report_warning(msg, video_id)
+        elif isinstance(msg, ExtractorError):
+            raise msg
         else:
             raise ExtractorError(msg, expected=expected, video_id=video_id)