[ie/matchtv] Fix extractor (#10190)

[yt-dlp.git] / yt_dlp / extractor / threeqsdn.py
diff --git a/yt_dlp/extractor/threeqsdn.py b/yt_dlp/extractor/threeqsdn.py

index bb7610352d58fdea9b51b86a7fe989c0a1d44b3e..45fdef9078f072c2cba6d0fd23a2e269d9ecfb6a 100644 (file)
--- a/yt_dlp/extractor/threeqsdn.py
+++ b/yt_dlp/extractor/threeqsdn.py
@@ -1,14 +1,11 @@
-from __future__ import unicode_literals
-
-import re
-
  from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
  from ..utils import (
-    determine_ext,
      ExtractorError,
+    determine_ext,
      float_or_none,
      int_or_none,
+    join_nonempty,
      parse_iso8601,
  )
  
@@ -17,6 +14,7 @@ class ThreeQSDNIE(InfoExtractor):
      IE_NAME = '3qsdn'
      IE_DESC = '3Q SDN'
      _VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _EMBED_REGEX = [rf'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>{_VALID_URL}.*?)\1']
      _TESTS = [{
          # https://player.3qsdn.com/demo.html
          'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be',
@@ -77,12 +75,13 @@ class ThreeQSDNIE(InfoExtractor):
          'only_matching': True,
      }]
  
-    @staticmethod
-    def _extract_url(webpage):
-        mobj = re.search(
-            r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % ThreeQSDNIE._VALID_URL, webpage)
-        if mobj:
-            return mobj.group('url')
+    def _extract_from_webpage(self, url, webpage):
+        for res in super()._extract_from_webpage(url, webpage):
+            yield {
+                **res,
+                '_type': 'url_transparent',
+                'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'),
+            }
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
@@ -91,7 +90,7 @@ def _real_extract(self, url):
              config = self._download_json(
                  url.replace('://playout.3qsdn.com/', '://playout.3qsdn.com/config/'), video_id)
          except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                  self.raise_geo_restricted()
              raise
  
@@ -110,8 +109,7 @@ def _real_extract(self, url):
                  subtitles = self._merge_subtitles(subtitles, subs)
              elif source_type == 'hls':
                  fmts, subs = self._extract_m3u8_formats_and_subtitles(
-                    source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native',
-                    m3u8_id='hls', fatal=False)
+                    source, video_id, 'mp4', live=live, m3u8_id='hls', fatal=False)
                  formats.extend(fmts)
                  subtitles = self._merge_subtitles(subtitles, subs)
              elif source_type == 'progressive':
@@ -119,29 +117,17 @@ def _real_extract(self, url):
                      src = s.get('src')
                      if not (src and self._is_valid_url(src, video_id)):
                          continue
-                    width = None
-                    format_id = ['http']
                      ext = determine_ext(src)
-                    if ext:
-                        format_id.append(ext)
                      height = int_or_none(s.get('height'))
-                    if height:
-                        format_id.append('%dp' % height)
-                        if aspect:
-                            width = int(height * aspect)
                      formats.append({
                          'ext': ext,
-                        'format_id': '-'.join(format_id),
+                        'format_id': join_nonempty('http', ext, height and f'{height}p'),
                          'height': height,
                          'source_preference': 0,
                          'url': src,
                          'vcodec': 'none' if height == 0 else None,
-                        'width': width,
+                        'width': int(height * aspect) if height and aspect else None,
                      })
-        # It seems like this would be correctly handled by default
-        # However, unless someone can confirm this, the old
-        # behaviour is being kept as-is
-        self._sort_formats(formats, ('res', 'source_preference'))
  
          for subtitle in (config.get('subtitles') or []):
              src = subtitle.get('src')
@@ -155,7 +141,7 @@ def _real_extract(self, url):
  
          return {
              'id': video_id,
-            'title': self._live_title(title) if live else title,
+            'title': title,
              'thumbnail': config.get('poster') or None,
              'description': config.get('description') or None,
              'timestamp': parse_iso8601(config.get('upload_date')),
@@ -163,4 +149,8 @@ def _real_extract(self, url):
              'is_live': live,
              'formats': formats,
              'subtitles': subtitles,
+            # It seems like this would be correctly handled by default
+            # However, unless someone can confirm this, the old
+            # behaviour is being kept as-is
+            '_format_sort_fields': ('res', 'source_preference'),
          }