]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/adobetv.py
[ie/matchtv] Fix extractor (#10190)
[yt-dlp.git] / yt_dlp / extractor / adobetv.py
index 941254243f2124305db3cc64c226ebfb9ea07456..4608e5c13d86efe54f094ce3450eed49fb2d78d2 100644 (file)
@@ -2,13 +2,12 @@
 import re
 
 from .common import InfoExtractor
-from ..compat import compat_str
 from ..utils import (
+    ISO639Utils,
+    OnDemandPagedList,
     float_or_none,
     int_or_none,
-    ISO639Utils,
     join_nonempty,
-    OnDemandPagedList,
     parse_duration,
     str_or_none,
     str_to_int,
@@ -36,7 +35,7 @@ def _parse_subtitles(self, video_data, url_key):
         return subtitles
 
     def _parse_video_data(self, video_data):
-        video_id = compat_str(video_data['id'])
+        video_id = str(video_data['id'])
         title = video_data['title']
 
         s3_extracted = False
@@ -70,7 +69,6 @@ def _parse_video_data(self, video_data):
                     })
                     s3_extracted = True
             formats.append(f)
-        self._sort_formats(formats)
 
         return {
             'id': video_id,
@@ -152,7 +150,7 @@ def _fetch_page(self, display_id, query, page):
         page += 1
         query['page'] = page
         for element_data in self._call_api(
-                self._RESOURCE, display_id, query, 'Download Page %d' % page):
+                self._RESOURCE, display_id, query, f'Download Page {page}'):
             yield self._process_data(element_data)
 
     def _extract_playlist_entries(self, display_id, query):
@@ -232,6 +230,7 @@ def _real_extract(self, url):
 class AdobeTVVideoIE(AdobeTVBaseIE):
     IE_NAME = 'adobetv:video'
     _VALID_URL = r'https?://video\.tv\.adobe\.com/v/(?P<id>\d+)'
+    _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)?//video\.tv\.adobe\.com/v/\d+[^"]+)[\'"]']
 
     _TEST = {
         # From https://helpx.adobe.com/acrobat/how-to/new-experience-acrobat-dc.html?set=acrobat--get-started--essential-beginners
@@ -268,7 +267,6 @@ def _real_extract(self, url):
                 'width': int_or_none(source.get('width') or None),
                 'url': source_src,
             })
-        self._sort_formats(formats)
 
         # For both metadata and downloaded files the duration varies among
         # formats. I just pick the max one