]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/threeqsdn.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / threeqsdn.py
index bb7610352d58fdea9b51b86a7fe989c0a1d44b3e..45fdef9078f072c2cba6d0fd23a2e269d9ecfb6a 100644 (file)
@@ -1,14 +1,11 @@
-from __future__ import unicode_literals
-
-import re
-
 from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
 from ..utils import (
-    determine_ext,
     ExtractorError,
+    determine_ext,
     float_or_none,
     int_or_none,
+    join_nonempty,
     parse_iso8601,
 )
 
@@ -17,6 +14,7 @@ class ThreeQSDNIE(InfoExtractor):
     IE_NAME = '3qsdn'
     IE_DESC = '3Q SDN'
     _VALID_URL = r'https?://playout\.3qsdn\.com/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _EMBED_REGEX = [rf'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>{_VALID_URL}.*?)\1']
     _TESTS = [{
         # https://player.3qsdn.com/demo.html
         'url': 'https://playout.3qsdn.com/7201c779-6b3c-11e7-a40e-002590c750be',
@@ -77,12 +75,13 @@ class ThreeQSDNIE(InfoExtractor):
         'only_matching': True,
     }]
 
-    @staticmethod
-    def _extract_url(webpage):
-        mobj = re.search(
-            r'<iframe[^>]+\b(?:data-)?src=(["\'])(?P<url>%s.*?)\1' % ThreeQSDNIE._VALID_URL, webpage)
-        if mobj:
-            return mobj.group('url')
+    def _extract_from_webpage(self, url, webpage):
+        for res in super()._extract_from_webpage(url, webpage):
+            yield {
+                **res,
+                '_type': 'url_transparent',
+                'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'),
+            }
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
@@ -91,7 +90,7 @@ def _real_extract(self, url):
             config = self._download_json(
                 url.replace('://playout.3qsdn.com/', '://playout.3qsdn.com/config/'), video_id)
         except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 401:
                 self.raise_geo_restricted()
             raise
 
@@ -110,8 +109,7 @@ def _real_extract(self, url):
                 subtitles = self._merge_subtitles(subtitles, subs)
             elif source_type == 'hls':
                 fmts, subs = self._extract_m3u8_formats_and_subtitles(
-                    source, video_id, 'mp4', 'm3u8' if live else 'm3u8_native',
-                    m3u8_id='hls', fatal=False)
+                    source, video_id, 'mp4', live=live, m3u8_id='hls', fatal=False)
                 formats.extend(fmts)
                 subtitles = self._merge_subtitles(subtitles, subs)
             elif source_type == 'progressive':
@@ -119,29 +117,17 @@ def _real_extract(self, url):
                     src = s.get('src')
                     if not (src and self._is_valid_url(src, video_id)):
                         continue
-                    width = None
-                    format_id = ['http']
                     ext = determine_ext(src)
-                    if ext:
-                        format_id.append(ext)
                     height = int_or_none(s.get('height'))
-                    if height:
-                        format_id.append('%dp' % height)
-                        if aspect:
-                            width = int(height * aspect)
                     formats.append({
                         'ext': ext,
-                        'format_id': '-'.join(format_id),
+                        'format_id': join_nonempty('http', ext, height and f'{height}p'),
                         'height': height,
                         'source_preference': 0,
                         'url': src,
                         'vcodec': 'none' if height == 0 else None,
-                        'width': width,
+                        'width': int(height * aspect) if height and aspect else None,
                     })
-        # It seems like this would be correctly handled by default
-        # However, unless someone can confirm this, the old
-        # behaviour is being kept as-is
-        self._sort_formats(formats, ('res', 'source_preference'))
 
         for subtitle in (config.get('subtitles') or []):
             src = subtitle.get('src')
@@ -155,7 +141,7 @@ def _real_extract(self, url):
 
         return {
             'id': video_id,
-            'title': self._live_title(title) if live else title,
+            'title': title,
             'thumbnail': config.get('poster') or None,
             'description': config.get('description') or None,
             'timestamp': parse_iso8601(config.get('upload_date')),
@@ -163,4 +149,8 @@ def _real_extract(self, url):
             'is_live': live,
             'formats': formats,
             'subtitles': subtitles,
+            # It seems like this would be correctly handled by default
+            # However, unless someone can confirm this, the old
+            # behaviour is being kept as-is
+            '_format_sort_fields': ('res', 'source_preference'),
         }