[ie/mlbtv] Fix extraction (#10296)

[yt-dlp.git] / yt_dlp / extractor / vidlii.py
diff --git a/yt_dlp/extractor/vidlii.py b/yt_dlp/extractor/vidlii.py

index a63919ff2409f3424be5d5312b3ced8d98f8003d..d9e33ca9e58b9f60235ee1b79aa25918503cd350 100644 (file)
--- a/yt_dlp/extractor/vidlii.py
+++ b/yt_dlp/extractor/vidlii.py
@@ -1,13 +1,10 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
  import re
  
  from .common import InfoExtractor
+from ..networking import HEADRequest
  from ..utils import (
-    HEADRequest,
-    format_field,
      float_or_none,
+    format_field,
      get_element_by_id,
      int_or_none,
      str_to_int,
@@ -37,7 +34,7 @@ class VidLiiIE(InfoExtractor):
              'average_rating': float,
              'categories': ['News & Politics'],
              'tags': ['Vidlii', 'Jan', 'Videogames'],
-        }
+        },
      }, {
          'url': 'https://www.vidlii.com/watch?v=zTAtaAgOLKt',
          'md5': '5778f7366aa4c569b77002f8bf6b614f',
@@ -66,13 +63,14 @@ def _real_extract(self, url):
          video_id = self._match_id(url)
  
          webpage = self._download_webpage(
-            'https://www.vidlii.com/watch?v=%s' % video_id, video_id)
+            f'https://www.vidlii.com/watch?v={video_id}', video_id)
          formats = []
  
          sources = [source[1] for source in re.findall(
              r'src\s*:\s*(["\'])(?P<url>(?:https?://)?(?:(?!\1).)+)\1',
              webpage) or []]
          for source in sources:
+            source = urljoin(url, source)
              height = int(self._search_regex(r'(\d+).mp4', source, 'height', default=360))
              if self._request_webpage(HEADRequest(source), video_id, f'Checking {height}p url', errnote=False):
                  formats.append({
@@ -80,7 +78,6 @@ def _real_extract(self, url):
                      'format_id': f'{height}p',
                      'height': height,
                  })
-        self._sort_formats(formats)
  
          title = self._search_regex(
              (r'<h1>([^<]+)</h1>', r'<title>([^<]+) - VidLii<'), webpage,
@@ -103,7 +100,7 @@ def _real_extract(self, url):
          uploader = self._search_regex(
              r'<div[^>]+class=["\']wt_person[^>]+>\s*<a[^>]+\bhref=["\']/user/[^>]+>([^<]+)',
              webpage, 'uploader', fatal=False)
-        uploader_url = format_field(uploader, template='https://www.vidlii.com/user/%s')
+        uploader_url = format_field(uploader, None, 'https://www.vidlii.com/user/%s')
  
          upload_date = unified_strdate(self._html_search_meta(
              'datePublished', webpage, default=None) or self._search_regex(