[extractors] Use new framework for existing embeds (#4307)

[yt-dlp.git] / yt_dlp / extractor / theplatform.py
diff --git a/yt_dlp/extractor/theplatform.py b/yt_dlp/extractor/theplatform.py

index c56b708b8208b5a08c0c87755c9a8ad3741c1f36..c8026d2941340b67c94ea3c2393dc99c2aa87594 100644 (file)
--- a/yt_dlp/extractor/theplatform.py
+++ b/yt_dlp/extractor/theplatform.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
  import re
  import time
  import hmac
@@ -10,15 +7,12 @@
  
  from .once import OnceIE
  from .adobepass import AdobePassIE
-from ..compat import (
-    compat_parse_qs,
-    compat_urllib_parse_urlparse,
-)
  from ..utils import (
      determine_ext,
      ExtractorError,
      float_or_none,
      int_or_none,
+    parse_qs,
      sanitized_Request,
      unsmuggle_url,
      update_url_query,
@@ -129,6 +123,13 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
          (?:https?://(?:link|player)\.theplatform\.com/[sp]/(?P<provider_id>[^/]+)/
             (?:(?:(?:[^/]+/)+select/)?(?P<media>media/(?:guid/\d+/)?)?|(?P<config>(?:[^/\?]+/(?:swf|config)|onsite)/select/))?
           |theplatform:)(?P<id>[^/\?&]+)'''
+    _EMBED_REGEX = [
+        r'''(?x)
+            <meta\s+
+                property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
+                content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2''',
+        r'(?s)<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//player\.theplatform\.com/p/.+?)\1'
+    ]
  
      _TESTS = [{
          # from http://www.metacafe.com/watch/cb-e9I_cZgTgIPd/blackberrys_big_bold_z30/
@@ -198,22 +199,11 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
      }]
  
      @classmethod
-    def _extract_urls(cls, webpage):
-        m = re.search(
-            r'''(?x)
-                    <meta\s+
-                        property=(["'])(?:og:video(?::(?:secure_)?url)?|twitter:player)\1\s+
-                        content=(["'])(?P<url>https?://player\.theplatform\.com/p/.+?)\2
-            ''', webpage)
-        if m:
-            return [m.group('url')]
-
+    def _extract_embed_urls(cls, url, webpage):
          # Are whitespaces ignored in URLs?
          # https://github.com/ytdl-org/youtube-dl/issues/12044
-        matches = re.findall(
-            r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
-        if matches:
-            return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
+        for embed_url in super()._extract_embed_urls(url, webpage):
+            yield re.sub(r'\s', '', embed_url)
  
      @staticmethod
      def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
@@ -250,7 +240,7 @@ def _real_extract(self, url):
              path += mobj.group('media')
          path += video_id
  
-        qs_dict = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        qs_dict = parse_qs(url)
          if 'guid' in qs_dict:
              webpage = self._download_webpage(url, video_id)
              scripts = re.findall(r'<script[^>]+src="([^"]+)"', webpage)
@@ -359,7 +349,7 @@ def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custo
              if first_video_id is None:
                  first_video_id = cur_video_id
                  duration = float_or_none(item.get('plfile$duration'))
-            file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes']
+            file_asset_types = item.get('plfile$assetTypes') or parse_qs(smil_url)['assetTypes']
              for asset_type in file_asset_types:
                  if asset_type in asset_types:
                      continue