]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/xtube.py
[extractor/youtube] Fix `live_status` extraction for playlist videos
[yt-dlp.git] / yt_dlp / extractor / xtube.py
index 98d2adb995b933f964bb619df9897b73d46ce21d..93a6a3f3353340950cfc395fed7b538875d6f310 100644 (file)
@@ -1,5 +1,3 @@
-from __future__ import unicode_literals
-
 import itertools
 import re
 
@@ -11,6 +9,7 @@
     parse_duration,
     sanitized_Request,
     str_to_int,
+    url_or_none,
 )
 
 
@@ -54,7 +53,7 @@ class XTubeIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        mobj = self._match_valid_url(url)
         video_id = mobj.group('id')
         display_id = mobj.group('display_id')
 
@@ -71,10 +70,10 @@ def _real_extract(self, url):
                 'Cookie': 'age_verified=1; cookiesAccepted=1',
             })
 
-        title, thumbnail, duration = [None] * 3
+        title, thumbnail, duration, sources, media_definition = [None] * 5
 
         config = self._parse_json(self._search_regex(
-            r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf)', webpage, 'config',
+            r'playerConf\s*=\s*({.+?})\s*,\s*(?:\n|loaderConf|playerWrapper)', webpage, 'config',
             default='{}'), video_id, transform_source=js_to_json, fatal=False)
         if config:
             config = config.get('mainRoll')
@@ -83,20 +82,52 @@ def _real_extract(self, url):
                 thumbnail = config.get('poster')
                 duration = int_or_none(config.get('duration'))
                 sources = config.get('sources') or config.get('format')
+                media_definition = config.get('mediaDefinition')
 
-        if not isinstance(sources, dict):
+        if not isinstance(sources, dict) and not media_definition:
             sources = self._parse_json(self._search_regex(
                 r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),',
                 webpage, 'sources', group='sources'), video_id,
                 transform_source=js_to_json)
 
         formats = []
-        for format_id, format_url in sources.items():
-            formats.append({
-                'url': format_url,
-                'format_id': format_id,
-                'height': int_or_none(format_id),
-            })
+        format_urls = set()
+
+        if isinstance(sources, dict):
+            for format_id, format_url in sources.items():
+                format_url = url_or_none(format_url)
+                if not format_url:
+                    continue
+                if format_url in format_urls:
+                    continue
+                format_urls.add(format_url)
+                formats.append({
+                    'url': format_url,
+                    'format_id': format_id,
+                    'height': int_or_none(format_id),
+                })
+
+        if isinstance(media_definition, list):
+            for media in media_definition:
+                video_url = url_or_none(media.get('videoUrl'))
+                if not video_url:
+                    continue
+                if video_url in format_urls:
+                    continue
+                format_urls.add(video_url)
+                format_id = media.get('format')
+                if format_id == 'hls':
+                    formats.extend(self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls', fatal=False))
+                elif format_id == 'mp4':
+                    height = int_or_none(media.get('quality'))
+                    formats.append({
+                        'url': video_url,
+                        'format_id': '%s-%d' % (format_id, height) if height else format_id,
+                        'height': height,
+                    })
+
         self._remove_duplicate_formats(formats)
         self._sort_formats(formats)