[extractor/clipchamp] Add extractor (#6978)

author bashonly <redacted>

Wed, 3 May 2023 20:46:37 +0000 (15:46 -0500)

committer GitHub <redacted>

Wed, 3 May 2023 20:46:37 +0000 (20:46 +0000)
author bashonly <redacted>
Wed, 3 May 2023 20:46:37 +0000 (15:46 -0500)
committer GitHub <redacted>
Wed, 3 May 2023 20:46:37 +0000 (20:46 +0000)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py

index 2d582f67f32da88a0631d2a150563a2807ecf2f8..974c8a2548d8ff6489539679cfb16b317f1abb75 100644 (file)
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -356,6 +356,7 @@
  )
  from .ciscowebex import CiscoWebexIE
  from .cjsw import CJSWIE
+from .clipchamp import ClipchampIE
  from .cliphunter import CliphunterIE
  from .clippit import ClippitIE
  from .cliprs import ClipRsIE
diff --git a/yt_dlp/extractor/clipchamp.py b/yt_dlp/extractor/clipchamp.py

new file mode 100644 (file)

index 0000000..a8bdf7e
--- /dev/null
+++ b/yt_dlp/extractor/clipchamp.py
@@ -0,0 +1,61 @@
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    traverse_obj,
+    unified_timestamp,
+    url_or_none,
+)
+
+
+class ClipchampIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
+    _TESTS = [{
+        'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
+        'info_dict': {
+            'id': 'gRXZ4ZhdDaU',
+            'ext': 'mp4',
+            'title': 'Untitled video',
+            'uploader': 'Alexander Schwartz',
+            'timestamp': 1680805580,
+            'upload_date': '20230406',
+            'thumbnail': r're:^https?://.+\.jpg',
+        },
+        'params': {'skip_download': 'm3u8'},
+    }]
+
+    _STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
+    _STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        webpage = self._download_webpage(url, video_id)
+        data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
+
+        storage_location = data.get('storage_location')
+        if storage_location != 'cf_stream':
+            raise ExtractorError(f'Unsupported clip storage location "{storage_location}"')
+
+        path = data['download_url']
+        iframe = self._download_webpage(
+            f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe')
+        subdomain = self._search_regex(
+            r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe,
+            'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
+
+        formats = self._extract_mpd_formats(
+            self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
+            query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
+        formats.extend(self._extract_m3u8_formats(
+            self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
+            query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None,
+            **traverse_obj(data, {
+                'title': ('project', 'project_name', {str}),
+                'timestamp': ('created_at', {unified_timestamp}),
+                'thumbnail': ('thumbnail_url', {url_or_none}),
+            }),
+        }
author	bashonly <redacted>
	Wed, 3 May 2023 20:46:37 +0000 (15:46 -0500)
committer	GitHub <redacted>
	Wed, 3 May 2023 20:46:37 +0000 (20:46 +0000)
yt_dlp/extractor/_extractors.py		patch \| blob \| blame \| history
yt_dlp/extractor/clipchamp.py	[new file with mode: 0644]	patch \| blob