[extractor/cbs] Add `ParamountPressExpress` extractor (#6604)

author bashonly <redacted>

Thu, 23 Mar 2023 16:18:42 +0000 (11:18 -0500)

committer GitHub <redacted>

Thu, 23 Mar 2023 16:18:42 +0000 (16:18 +0000)
author bashonly <redacted>
Thu, 23 Mar 2023 16:18:42 +0000 (11:18 -0500)
committer GitHub <redacted>
Thu, 23 Mar 2023 16:18:42 +0000 (16:18 +0000)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py

index 01281b5a15c480aa4c4d2cd30786e158581a9fc0..6c948e5fcee23851b4c85c3b6dce3e2e383430be 100644 (file)
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -298,7 +298,10 @@
      CBCGemPlaylistIE,
      CBCGemLiveIE,
  )
-from .cbs import CBSIE
+from .cbs import (
+    CBSIE,
+    ParamountPressExpressIE,
+)
  from .cbslocal import (
      CBSLocalIE,
      CBSLocalArticleIE,
diff --git a/yt_dlp/extractor/brightcove.py b/yt_dlp/extractor/brightcove.py

index 2b7ddcae8d866bf2b3ff37a70d62a860a74a18c5..cd0e8ff275f3025a4d9d48d09e418ae5e199a654 100644 (file)
--- a/yt_dlp/extractor/brightcove.py
+++ b/yt_dlp/extractor/brightcove.py
@@ -575,6 +575,7 @@ def build_format_id(kind):
                  self.raise_no_formats(
                      error.get('message') or error.get('error_subcode') or error['error_code'], expected=True)
  
+        headers.pop('Authorization', None)  # or else http formats will give error 400
          for f in formats:
              f.setdefault('http_headers', {}).update(headers)
  
@@ -895,8 +896,9 @@ def extract_policy_key():
              store_pk(policy_key)
              return policy_key
  
-        api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id)
-        headers = {}
+        token = smuggled_data.get('token')
+        api_url = f'https://{"edge-auth" if token else "edge"}.api.brightcove.com/playback/v1/accounts/{account_id}/{content_type}s/{video_id}'
+        headers = {'Authorization': f'Bearer {token}'} if token else {}
          referrer = smuggled_data.get('referrer')  # XXX: notice the spelling/case of the key
          if referrer:
              headers.update({
diff --git a/yt_dlp/extractor/cbs.py b/yt_dlp/extractor/cbs.py

index 9aacd50c4565fe75bc43437dc695e96a3fb41ae5..1c0dbdea9444c370bf8d5b7811ad39ccc9905cf9 100644 (file)
--- a/yt_dlp/extractor/cbs.py
+++ b/yt_dlp/extractor/cbs.py
@@ -1,8 +1,14 @@
+from .brightcove import BrightcoveNewIE
+from .common import InfoExtractor
  from .theplatform import ThePlatformFeedIE
+from .youtube import YoutubeIE
  from ..utils import (
      ExtractorError,
+    extract_attributes,
+    get_element_html_by_id,
      int_or_none,
      find_xpath_attr,
+    smuggle_url,
      xpath_element,
      xpath_text,
      update_url_query,
@@ -162,3 +168,110 @@ def _extract_video_info(self, content_id, site='cbs', mpx_acc=2198311517):
              'duration': int_or_none(xpath_text(video_data, 'videoLength'), 1000),
              'thumbnail': url_or_none(xpath_text(video_data, 'previewImageURL')),
          })
+
+
+class ParamountPressExpressIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?paramountpressexpress\.com(?:/[\w-]+)+/(?P<yt>yt-)?video/?\?watch=(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://www.paramountpressexpress.com/cbs-entertainment/shows/survivor/video/?watch=pnzew7e2hx',
+        'md5': '56631dbcadaab980d1fc47cb7b76cba4',
+        'info_dict': {
+            'id': '6322981580112',
+            'ext': 'mp4',
+            'title': 'I’m Felicia',
+            'description': 'md5:88fad93f8eede1c9c8f390239e4c6290',
+            'uploader_id': '6055873637001',
+            'upload_date': '20230320',
+            'timestamp': 1679334960,
+            'duration': 49.557,
+            'thumbnail': r're:^https://.+\.jpg',
+            'tags': [],
+        },
+    }, {
+        'url': 'https://www.paramountpressexpress.com/cbs-entertainment/video/?watch=2s5eh8kppc',
+        'md5': 'edcb03e3210b88a3e56c05aa863e0e5b',
+        'info_dict': {
+            'id': '6323036027112',
+            'ext': 'mp4',
+            'title': '‘Y&R’ Set Visit: Jerry O’Connell Quizzes Cast on Pre-Love Scene Rituals and More',
+            'description': 'md5:b929867a357aac5544b783d834c78383',
+            'uploader_id': '6055873637001',
+            'upload_date': '20230321',
+            'timestamp': 1679430180,
+            'duration': 132.032,
+            'thumbnail': r're:^https://.+\.jpg',
+            'tags': [],
+        },
+    }, {
+        'url': 'https://www.paramountpressexpress.com/paramount-plus/yt-video/?watch=OX9wJWOcqck',
+        'info_dict': {
+            'id': 'OX9wJWOcqck',
+            'ext': 'mp4',
+            'title': 'Rugrats | Season 2 Official Trailer | Paramount+',
+            'description': 'md5:1f7e26f5625a9f0d6564d9ad97a9f7de',
+            'uploader': 'Paramount Plus',
+            'uploader_id': '@paramountplus',
+            'uploader_url': 'http://www.youtube.com/@paramountplus',
+            'channel': 'Paramount Plus',
+            'channel_id': 'UCrRttZIypNTA1Mrfwo745Sg',
+            'channel_url': 'https://www.youtube.com/channel/UCrRttZIypNTA1Mrfwo745Sg',
+            'upload_date': '20230316',
+            'duration': 88,
+            'age_limit': 0,
+            'availability': 'public',
+            'live_status': 'not_live',
+            'playable_in_embed': True,
+            'view_count': int,
+            'like_count': int,
+            'channel_follower_count': int,
+            'thumbnail': 'https://i.ytimg.com/vi/OX9wJWOcqck/maxresdefault.jpg',
+            'categories': ['Entertainment'],
+            'tags': ['Rugrats'],
+        },
+    }, {
+        'url': 'https://www.paramountpressexpress.com/showtime/yt-video/?watch=_ljssSoDLkw',
+        'info_dict': {
+            'id': '_ljssSoDLkw',
+            'ext': 'mp4',
+            'title': 'Lavell Crawford: THEE Lavell Crawford Comedy Special Official Trailer | SHOWTIME',
+            'description': 'md5:39581bcc3fd810209b642609f448af70',
+            'uploader': 'SHOWTIME',
+            'uploader_id': '@Showtime',
+            'uploader_url': 'http://www.youtube.com/@Showtime',
+            'channel': 'SHOWTIME',
+            'channel_id': 'UCtwMWJr2BFPkuJTnSvCESSQ',
+            'channel_url': 'https://www.youtube.com/channel/UCtwMWJr2BFPkuJTnSvCESSQ',
+            'upload_date': '20230209',
+            'duration': 49,
+            'age_limit': 0,
+            'availability': 'public',
+            'live_status': 'not_live',
+            'playable_in_embed': True,
+            'view_count': int,
+            'like_count': int,
+            'comment_count': int,
+            'channel_follower_count': int,
+            'thumbnail': 'https://i.ytimg.com/vi_webp/_ljssSoDLkw/maxresdefault.webp',
+            'categories': ['People & Blogs'],
+            'tags': 'count:27',
+        },
+    }]
+
+    def _real_extract(self, url):
+        display_id, is_youtube = self._match_valid_url(url).group('id', 'yt')
+        if is_youtube:
+            return self.url_result(display_id, YoutubeIE)
+
+        webpage = self._download_webpage(url, display_id)
+        video_id = self._search_regex(
+            r'\bvideo_id\s*=\s*["\'](\d+)["\']\s*,', webpage, 'Brightcove ID')
+        token = self._search_regex(r'\btoken\s*=\s*["\']([\w.-]+)["\']', webpage, 'token')
+
+        player = extract_attributes(get_element_html_by_id('vcbrightcoveplayer', webpage) or '')
+        account_id = player.get('data-account') or '6055873637001'
+        player_id = player.get('data-player') or 'OtLKgXlO9F'
+        embed = player.get('data-embed') or 'default'
+
+        return self.url_result(smuggle_url(
+            f'https://players.brightcove.net/{account_id}/{player_id}_{embed}/index.html?videoId={video_id}',
+            {'token': token}), BrightcoveNewIE)
author	bashonly <redacted>
	Thu, 23 Mar 2023 16:18:42 +0000 (11:18 -0500)
committer	GitHub <redacted>
	Thu, 23 Mar 2023 16:18:42 +0000 (16:18 +0000)
yt_dlp/extractor/_extractors.py		patch \| blob \| blame \| history
yt_dlp/extractor/brightcove.py		patch \| blob \| blame \| history
yt_dlp/extractor/cbs.py		patch \| blob \| blame \| history