[extractor/youtube] Fix `live_status` extraction for playlist videos

[yt-dlp.git] / yt_dlp / extractor / mtv.py
diff --git a/yt_dlp/extractor/mtv.py b/yt_dlp/extractor/mtv.py

index 141dd7deb364db02f59d07f51d5c8ee5d679f738..10cd304eb0b011ab02376e2fdd424e48441e9ed5 100644 (file)
--- a/yt_dlp/extractor/mtv.py
+++ b/yt_dlp/extractor/mtv.py
@@ -1,13 +1,7 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
  import re
  
  from .common import InfoExtractor
-from ..compat import (
-    compat_str,
-    compat_xpath,
-)
+from ..compat import compat_str
  from ..utils import (
      ExtractorError,
      find_xpath_attr,
@@ -15,6 +9,7 @@
      float_or_none,
      HEADRequest,
      int_or_none,
+    join_nonempty,
      RegexNotFoundError,
      sanitized_Request,
      strip_or_none,
@@ -99,9 +94,9 @@ def _extract_video_formats(self, mdoc, mtvn_id, video_id):
                      formats.extend([{
                          'ext': 'flv' if rtmp_video_url.startswith('rtmp') else ext,
                          'url': rtmp_video_url,
-                        'format_id': '-'.join(filter(None, [
+                        'format_id': join_nonempty(
                              'rtmp' if rtmp_video_url.startswith('rtmp') else None,
-                            rendition.get('bitrate')])),
+                            rendition.get('bitrate')),
                          'width': int(rendition.get('width')),
                          'height': int(rendition.get('height')),
                      }])
@@ -166,9 +161,9 @@ def _get_video_info(self, itemdoc, use_hls=True):
                  itemdoc, './/{http://search.yahoo.com/mrss/}category',
                  'scheme', 'urn:mtvn:video_title')
          if title_el is None:
-            title_el = itemdoc.find(compat_xpath('.//{http://search.yahoo.com/mrss/}title'))
+            title_el = itemdoc.find('.//{http://search.yahoo.com/mrss/}title')
          if title_el is None:
-            title_el = itemdoc.find(compat_xpath('.//title'))
+            title_el = itemdoc.find('.//title')
              if title_el.text is None:
                  title_el = None
  
@@ -305,21 +300,23 @@ def _extract_mgid(self, webpage):
          if not mgid:
              mgid = self._extract_triforce_mgid(webpage)
  
-        if not mgid:
-            mgid = self._search_regex(
-                r'"videoConfig":{"videoId":"(mgid:.*?)"', webpage, 'mgid', default=None)
-
-        if not mgid:
-            mgid = self._search_regex(
-                r'"media":{"video":{"config":{"uri":"(mgid:.*?)"', webpage, 'mgid', default=None)
-
          if not mgid:
              data = self._parse_json(self._search_regex(
                  r'__DATA__\s*=\s*({.+?});', webpage, 'data'), None)
              main_container = self._extract_child_with_type(data, 'MainContainer')
              ab_testing = self._extract_child_with_type(main_container, 'ABTesting')
              video_player = self._extract_child_with_type(ab_testing or main_container, 'VideoPlayer')
-            mgid = video_player['props']['media']['video']['config']['uri']
+            if video_player:
+                mgid = try_get(video_player, lambda x: x['props']['media']['video']['config']['uri'])
+            else:
+                flex_wrapper = self._extract_child_with_type(ab_testing or main_container, 'FlexWrapper')
+                auth_suite_wrapper = self._extract_child_with_type(flex_wrapper, 'AuthSuiteWrapper')
+                player = self._extract_child_with_type(auth_suite_wrapper or flex_wrapper, 'Player')
+                if player:
+                    mgid = try_get(player, lambda x: x['props']['videoDetail']['mgid'])
+
+        if not mgid:
+            raise ExtractorError('Could not extract mgid')
  
          return mgid
  
@@ -334,6 +331,7 @@ def _real_extract(self, url):
  class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
      IE_NAME = 'mtvservices:embedded'
      _VALID_URL = r'https?://media\.mtvnservices\.com/embed/(?P<mgid>.+?)(\?|/|$)'
+    _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1']
  
      _TEST = {
          # From http://www.thewrap.com/peter-dinklage-sums-up-game-of-thrones-in-45-seconds-video/
@@ -349,13 +347,6 @@ class MTVServicesEmbeddedIE(MTVServicesInfoExtractor):
          },
      }
  
-    @staticmethod
-    def _extract_url(webpage):
-        mobj = re.search(
-            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//media\.mtvnservices\.com/embed/.+?)\1', webpage)
-        if mobj:
-            return mobj.group('url')
-
      def _get_feed_url(self, uri, url=None):
          video_id = self._id_from_uri(uri)
          config = self._download_json(