]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/ruutu.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / ruutu.py
index d9cf39d712e547668c906cf68bdc2f25b32da094..2dddb39f95d411e0e17c68ac314827fe35785281 100644 (file)
@@ -1,13 +1,15 @@
-# coding: utf-8
-from __future__ import unicode_literals
+import json
+import re
+import urllib.parse
 
 from .common import InfoExtractor
-from ..compat import compat_urllib_parse_urlparse
 from ..utils import (
-    determine_ext,
     ExtractorError,
+    determine_ext,
     find_xpath_attr,
     int_or_none,
+    traverse_obj,
+    try_call,
     unified_strdate,
     url_or_none,
     xpath_attr,
@@ -36,6 +38,7 @@ class RuutuIE(InfoExtractor):
                 'thumbnail': r're:^https?://.*\.jpg$',
                 'duration': 114,
                 'age_limit': 0,
+                'upload_date': '20150508',
             },
         },
         {
@@ -49,6 +52,9 @@ class RuutuIE(InfoExtractor):
                 'thumbnail': r're:^https?://.*\.jpg$',
                 'duration': 40,
                 'age_limit': 0,
+                'upload_date': '20150507',
+                'series': 'Superpesis',
+                'categories': ['Urheilu'],
             },
         },
         {
@@ -61,6 +67,8 @@ class RuutuIE(InfoExtractor):
                 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe',
                 'thumbnail': r're:^https?://.*\.jpg$',
                 'age_limit': 0,
+                'upload_date': '20151012',
+                'series': 'Läpivalaisu',
             },
         },
         # Episode where <SourceFile> is "NOT-USED", but has other
@@ -80,6 +88,9 @@ class RuutuIE(InfoExtractor):
                 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52',
                 'thumbnail': r're:^https?://.*\.jpg$',
                 'age_limit': 0,
+                'upload_date': '20190320',
+                'series': 'Mysteeritarinat',
+                'duration': 1324,
             },
             'expected_warnings': [
                 'HTTP Error 502: Bad Gateway',
@@ -123,11 +134,37 @@ class RuutuIE(InfoExtractor):
     ]
     _API_BASE = 'https://gatling.nelonenmedia.fi'
 
+    @classmethod
+    def _extract_embed_urls(cls, url, webpage):
+        # nelonen.fi
+        settings = try_call(
+            lambda: json.loads(re.search(
+                r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False))
+        if settings:
+            video_id = traverse_obj(settings, (
+                'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value'))
+            if video_id:
+                return [f'http://www.ruutu.fi/video/{video_id}']
+        # hs.fi and is.fi
+        settings = try_call(
+            lambda: json.loads(re.search(
+                '(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>',
+                webpage).group(1), strict=False))
+        if settings:
+            video_ids = set(traverse_obj(settings, (
+                'props', 'pageProps', 'page', 'assetData', 'splitBody', ..., 'video', 'sourceId')) or [])
+            if video_ids:
+                return [f'http://www.ruutu.fi/video/{v}' for v in video_ids]
+            video_id = traverse_obj(settings, (
+                'props', 'pageProps', 'page', 'assetData', 'mainVideo', 'sourceId'))
+            if video_id:
+                return [f'http://www.ruutu.fi/video/{video_id}']
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
 
         video_xml = self._download_xml(
-            '%s/media-xml-cache' % self._API_BASE, video_id,
+            f'{self._API_BASE}/media-xml-cache', video_id,
             query={'id': video_id})
 
         formats = []
@@ -145,8 +182,8 @@ def extract_formats(node):
                     processed_urls.append(video_url)
                     ext = determine_ext(video_url)
                     auth_video_url = url_or_none(self._download_webpage(
-                        '%s/auth/access/v2' % self._API_BASE, video_id,
-                        note='Downloading authenticated %s stream URL' % ext,
+                        f'{self._API_BASE}/auth/access/v2', video_id,
+                        note=f'Downloading authenticated {ext} stream URL',
                         fatal=False, query={'stream': video_url}))
                     if auth_video_url:
                         processed_urls.append(auth_video_url)
@@ -172,16 +209,16 @@ def extract_formats(node):
                             'vcodec': 'none',
                         })
                     else:
-                        proto = compat_urllib_parse_urlparse(video_url).scheme
+                        proto = urllib.parse.urlparse(video_url).scheme
                         if not child.tag.startswith('HTTP') and proto != 'rtmp':
                             continue
                         preference = -1 if proto == 'rtmp' else 1
                         label = child.get('label')
                         tbr = int_or_none(child.get('bitrate'))
-                        format_id = '%s-%s' % (proto, label if label else tbr) if label or tbr else proto
+                        format_id = f'{proto}-{label if label else tbr}' if label or tbr else proto
                         if not self._is_valid_url(video_url, video_id, format_id):
                             continue
-                        width, height = [int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]]
+                        width, height = (int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2])
                         formats.append({
                             'format_id': format_id,
                             'url': video_url,
@@ -194,10 +231,10 @@ def extract_formats(node):
         extract_formats(video_xml.find('./Clip'))
 
         def pv(name):
-            node = find_xpath_attr(
-                video_xml, './Clip/PassthroughVariables/variable', 'name', name)
-            if node is not None:
-                return node.get('value')
+            value = try_call(lambda: find_xpath_attr(
+                video_xml, './Clip/PassthroughVariables/variable', 'name', name).get('value'))
+            if value != 'NA':
+                return value or None
 
         if not formats:
             if (not self.get_param('allow_unplayable_formats')
@@ -205,9 +242,7 @@ def pv(name):
                 self.report_drm(video_id)
             ns_st_cds = pv('ns_st_cds')
             if ns_st_cds != 'free':
-                raise ExtractorError('This video is %s.' % ns_st_cds, expected=True)
-
-        self._sort_formats(formats)
+                raise ExtractorError(f'This video is {ns_st_cds}.', expected=True)
 
         themes = pv('themes')
 
@@ -222,6 +257,6 @@ def pv(name):
             'series': pv('series_name'),
             'season_number': int_or_none(pv('season_number')),
             'episode_number': int_or_none(pv('episode_number')),
-            'categories': themes.split(',') if themes else [],
+            'categories': themes.split(',') if themes else None,
             'formats': formats,
         }