[extractor/camtasia] Separate into own extractor (#4307)

author pukkandan <redacted>

Mon, 1 Aug 2022 19:30:55 +0000 (01:00 +0530)

committer pukkandan <redacted>

Mon, 1 Aug 2022 19:38:16 +0000 (01:08 +0530)
author pukkandan <redacted>
Mon, 1 Aug 2022 19:30:55 +0000 (01:00 +0530)
committer pukkandan <redacted>
Mon, 1 Aug 2022 19:38:16 +0000 (01:08 +0530)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py

index b105437c334598999fdf36024fb969481c85f8b1..b62b8113c12b9d4fdc360c09ec2f999c456b2448 100644 (file)
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -219,6 +219,7 @@
      CamdemyFolderIE
  )
  from .cammodels import CamModelsIE
+from .camtasia import CamtasiaEmbedIE
  from .camwithher import CamWithHerIE
  from .canalalpha import CanalAlphaIE
  from .canalplus import CanalplusIE
diff --git a/yt_dlp/extractor/camtasia.py b/yt_dlp/extractor/camtasia.py

new file mode 100644 (file)

index 0000000..70ab6c6
--- /dev/null
+++ b/yt_dlp/extractor/camtasia.py
@@ -0,0 +1,71 @@
+import os
+import urllib.parse
+
+from .common import InfoExtractor
+from ..utils import float_or_none
+
+
+class CamtasiaEmbedIE(InfoExtractor):
+    _VALID_URL = False
+    _WEBPAGE_TESTS = [
+        {
+            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
+            'playlist': [{
+                'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
+                'info_dict': {
+                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
+                    'ext': 'flv',
+                    'duration': 2235.90,
+                }
+            }, {
+                'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
+                'info_dict': {
+                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
+                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
+                    'ext': 'flv',
+                    'duration': 2235.93,
+                }
+            }],
+            'info_dict': {
+                'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
+            },
+            'skip': 'webpage dead'
+        },
+
+    ]
+
+    def _extract_from_webpage(self, url, webpage):
+        camtasia_cfg = self._search_regex(
+            r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
+            webpage, 'camtasia configuration file', default=None)
+        if camtasia_cfg is None:
+            return None
+
+        title = self._html_search_meta('DC.title', webpage, fatal=True)
+
+        camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
+        camtasia_cfg = self._download_xml(
+            camtasia_url, self._generic_id(url),
+            note='Downloading camtasia configuration',
+            errnote='Failed to download camtasia configuration')
+        fileset_node = camtasia_cfg.find('./playlist/array/fileset')
+
+        entries = []
+        for n in fileset_node.getchildren():
+            url_n = n.find('./uri')
+            if url_n is None:
+                continue
+
+            entries.append({
+                'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
+                'title': f'{title} - {n.tag}',
+                'url': urllib.parse.urljoin(url, url_n.text),
+                'duration': float_or_none(n.find('./duration').text),
+            })
+
+        return {
+            '_type': 'playlist',
+            'entries': entries,
+            'title': title,
+        }
diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py

index 0dc9ae0da6c956c7cbe5c4bda6e64f545d707e88..3d574cd0224775a5e91be47aba56f40c1ac304df 100644 (file)
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@@ -933,30 +933,6 @@ class GenericIE(InfoExtractor):
                  'skip_download': True,
              }
          },
-        # Camtasia studio
-        {
-            'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
-            'playlist': [{
-                'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
-                'info_dict': {
-                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
-                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
-                    'ext': 'flv',
-                    'duration': 2235.90,
-                }
-            }, {
-                'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
-                'info_dict': {
-                    'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
-                    'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
-                    'ext': 'flv',
-                    'duration': 2235.93,
-                }
-            }],
-            'info_dict': {
-                'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
-            }
-        },
          # Flowplayer
          {
              'url': 'http://www.handjobhub.com/video/busty-blonde-siri-tit-fuck-while-wank-6313.html',
@@ -2680,43 +2656,6 @@ def itunes(key):
              'entries': entries,
          }
  
-    def _extract_camtasia(self, url, video_id, webpage):
-        """ Returns None if no camtasia video can be found. """
-
-        camtasia_cfg = self._search_regex(
-            r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
-            webpage, 'camtasia configuration file', default=None)
-        if camtasia_cfg is None:
-            return None
-
-        title = self._html_search_meta('DC.title', webpage, fatal=True)
-
-        camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
-        camtasia_cfg = self._download_xml(
-            camtasia_url, video_id,
-            note='Downloading camtasia configuration',
-            errnote='Failed to download camtasia configuration')
-        fileset_node = camtasia_cfg.find('./playlist/array/fileset')
-
-        entries = []
-        for n in fileset_node.getchildren():
-            url_n = n.find('./uri')
-            if url_n is None:
-                continue
-
-            entries.append({
-                'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
-                'title': f'{title} - {n.tag}',
-                'url': urllib.parse.urljoin(url, url_n.text),
-                'duration': float_or_none(n.find('./duration').text),
-            })
-
-        return {
-            '_type': 'playlist',
-            'entries': entries,
-            'title': title,
-        }
-
      def _kvs_getrealurl(self, video_url, license_code):
          if not video_url.startswith('function/0/'):
              return video_url  # not obfuscated
@@ -2920,12 +2859,6 @@ def _real_extract(self, url):
          except xml.etree.ElementTree.ParseError:
              pass
  
-        # Is it a Camtasia project?
-        camtasia_res = self._extract_camtasia(url, video_id, webpage)
-        if camtasia_res is not None:
-            self.report_detected('Camtasia video')
-            return camtasia_res
-
          info_dict.update({
              # it's tempting to parse this further, but you would
              # have to take into account all the variations like
author	pukkandan <redacted>
	Mon, 1 Aug 2022 19:30:55 +0000 (01:00 +0530)
committer	pukkandan <redacted>
	Mon, 1 Aug 2022 19:38:16 +0000 (01:08 +0530)
yt_dlp/extractor/_extractors.py		patch \| blob \| blame \| history
yt_dlp/extractor/camtasia.py	[new file with mode: 0644]	patch \| blob
yt_dlp/extractor/generic.py		patch \| blob \| blame \| history