]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/expressen.py
[ie/mlbtv] Fix extraction (#10296)
[yt-dlp.git] / yt_dlp / extractor / expressen.py
index dc8b855d2332521b505a53a49737e9364296876e..33b829845bdba0f795015b295753cf45cd18a389 100644 (file)
@@ -1,8 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
@@ -16,15 +11,17 @@ class ExpressenIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.)?(?:expressen|di)\.se/
-                        (?:(?:tvspelare/video|videoplayer/embed)/)?
-                        tv/(?:[^/]+/)*
+                        (?:(?:tvspelare/video|video-?player/embed)/)?
+                        (?:tv|nyheter)/(?:[^/?#]+/)*
                         (?P<id>[^/?#&]+)
                     '''
+    _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1']
     _TESTS = [{
         'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
-        'md5': '2fbbe3ca14392a6b1b36941858d33a45',
+        'md5': 'deb2ca62e7b1dcd19fa18ba37523f66e',
         'info_dict': {
-            'id': '8690962',
+            'id': 'ba90f5a9-78d1-4511-aa02-c177b9c99136',
+            'display_id': 'ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden',
             'ext': 'mp4',
             'title': 'Ledarsnack: Om arbetslösheten bland kvinnor i speciellt utsatta områden',
             'description': 'md5:f38c81ff69f3de4d269bbda012fcbbba',
@@ -45,15 +42,14 @@ class ExpressenIE(InfoExtractor):
     }, {
         'url': 'https://www.di.se/videoplayer/embed/tv/ditv/borsmorgon/implantica-rusar-70--under-borspremiaren-hor-styrelsemedlemmen/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
         'only_matching': True,
+    }, {
+        'url': 'https://www.expressen.se/video-player/embed/tv/nyheter/ekero-fodda-olof-gustafsson-forvaltar-knarkbaronen-pablo-escobars-namn',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.expressen.se/nyheter/efter-egna-telefonbluffen-escobar-stammer-klarna/',
+        'only_matching': True,
     }]
 
-    @staticmethod
-    def _extract_urls(webpage):
-        return [
-            mobj.group('url') for mobj in re.finditer(
-                r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?(?:expressen|di)\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
-                webpage)]
-
     def _real_extract(self, url):
         display_id = self._match_id(url)
 
@@ -62,12 +58,12 @@ def _real_extract(self, url):
         def extract_data(name):
             return self._parse_json(
                 self._search_regex(
-                    r'data-%s=(["\'])(?P<value>(?:(?!\1).)+)\1' % name,
+                    rf'data-{name}=(["\'])(?P<value>(?:(?!\1).)+)\1',
                     webpage, 'info', group='value'),
                 display_id, transform_source=unescapeHTML)
 
         info = extract_data('video-tracking-info')
-        video_id = info['videoId']
+        video_id = info['contentId']
 
         data = extract_data('article-data')
         stream = data['stream']
@@ -80,7 +76,6 @@ def extract_data(name):
             formats = [{
                 'url': stream,
             }]
-        self._sort_formats(formats)
 
         title = info.get('titleRaw') or data['title']
         description = info.get('descriptionRaw')