]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/tvplay.py
[extractor/BiliIntlSeries] Fix `_VALID_URL`
[yt-dlp.git] / yt_dlp / extractor / tvplay.py
index 9b6d17f61936167509acbecb6adb3ba12f6e4d36..f815b5137910ffde50dbca3fea74ba3eefba4d71 100644 (file)
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import re
 
 from .common import InfoExtractor
@@ -12,9 +9,9 @@
     determine_ext,
     ExtractorError,
     int_or_none,
-    parse_duration,
     parse_iso8601,
     qualities,
+    traverse_obj,
     try_get,
     update_url_query,
     url_or_none,
@@ -336,8 +333,8 @@ class ViafreeIE(InfoExtractor):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:www\.)?
-                        viafree\.(?P<country>dk|no|se)
-                        /(?P<id>program(?:mer)?/(?:[^/]+/)+[^/?#&]+)
+                        viafree\.(?P<country>dk|no|se|fi)
+                        /(?P<id>(?:program(?:mer)?|ohjelmat)?/(?:[^/]+/)+[^/?#&]+)
                     '''
     _TESTS = [{
         'url': 'http://www.viafree.no/programmer/underholdning/det-beste-vorspielet/sesong-2/episode-1',
@@ -369,7 +366,6 @@ class ViafreeIE(InfoExtractor):
             'upload_date': '20201217'
         },
         'params': {
-            'format': 'bestvideo',
             'skip_download': True
         }
     }, {
@@ -389,6 +385,9 @@ class ViafreeIE(InfoExtractor):
     }, {
         'url': 'http://www.viafree.se/program/underhallning/i-like-radio-live/sasong-1/676869',
         'only_matching': True,
+    }, {
+        'url': 'https://www.viafree.fi/ohjelmat/entertainment/amazing-makeovers/kausi-7/jakso-2',
+        'only_matching': True,
     }]
     _GEO_BYPASS = False
 
@@ -429,77 +428,96 @@ def _real_extract(self, url):
 
 
 class TVPlayHomeIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:tv3?)?play\.(?:tv3\.lt|skaties\.lv|tv3\.ee)/(?:[^/]+/)*[^/?#&]+-(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+            https?://
+            (?:tv3?)?
+            play\.(?:tv3|skaties)\.(?P<country>lv|lt|ee)/
+            (?P<live>lives/)?
+            [^?#&]+(?:episode|programme|clip)-(?P<id>\d+)
+    '''
     _TESTS = [{
-        'url': 'https://tvplay.tv3.lt/aferistai-n-7/aferistai-10047125/',
+        'url': 'https://play.tv3.lt/series/gauju-karai-karveliai,serial-2343791/serija-8,episode-2343828',
         'info_dict': {
-            'id': '366367',
+            'id': '2343828',
             'ext': 'mp4',
-            'title': 'Aferistai',
-            'description': 'Aferistai. Kalėdinė pasaka.',
-            'series': 'Aferistai [N-7]',
-            'season': '1 sezonas',
+            'title': 'Gaujų karai. Karveliai (2021) | S01E08: Serija 8',
+            'description': 'md5:f6fcfbb236429f05531131640dfa7c81',
+            'duration': 2710,
+            'season': 'Gaujų karai. Karveliai',
             'season_number': 1,
-            'duration': 464,
-            'timestamp': 1394209658,
-            'upload_date': '20140307',
-            'age_limit': 18,
+            'release_year': 2021,
+            'episode': 'Serija 8',
+            'episode_number': 8,
         },
         'params': {
-            'skip_download': True,
+            'skip_download': 'm3u8',
         },
     }, {
-        'url': 'https://tvplay.skaties.lv/vinas-melo-labak/vinas-melo-labak-10280317/',
-        'only_matching': True,
+        'url': 'https://play.tv3.lt/series/moterys-meluoja-geriau-n-7,serial-2574652/serija-25,episode-3284937',
+        'info_dict': {
+            'id': '3284937',
+            'ext': 'mp4',
+            'season': 'Moterys meluoja geriau [N-7]',
+            'season_number': 14,
+            'release_year': 2021,
+            'episode': 'Serija 25',
+            'episode_number': 25,
+            'title': 'Moterys meluoja geriau [N-7] (2021) | S14|E25: Serija 25',
+            'description': 'md5:c6926e9710f1a126f028fbe121eddb79',
+            'duration': 2440,
+        },
+        'skip': '404'
     }, {
-        'url': 'https://tvplay.tv3.ee/cool-d-ga-mehhikosse/cool-d-ga-mehhikosse-10044354/',
+        'url': 'https://play.tv3.lt/lives/tv6-lt,live-2838694/optibet-a-lygos-rungtynes-marijampoles-suduva--vilniaus-riteriai,programme-3422014',
         'only_matching': True,
     }, {
-        'url': 'https://play.tv3.lt/aferistai-10047125',
+        'url': 'https://tv3play.skaties.lv/series/women-lie-better-lv,serial-1024464/women-lie-better-lv,episode-1038762',
         'only_matching': True,
     }, {
-        'url': 'https://tv3play.skaties.lv/vinas-melo-labak-10280317',
+        'url': 'https://play.tv3.ee/series/_,serial-2654462/_,episode-2654474',
         'only_matching': True,
     }, {
-        'url': 'https://play.tv3.ee/cool-d-ga-mehhikosse-10044354',
+        'url': 'https://tv3play.skaties.lv/clips/tv3-zinas-valsti-lidz-15novembrim-bus-majsede,clip-3464509',
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        country, is_live, video_id = self._match_valid_url(url).groups()
 
-        asset = self._download_json(
-            urljoin(url, '/sb/public/asset/' + video_id), video_id)
+        api_path = 'lives/programmes' if is_live else 'vods'
+        data = self._download_json(
+            urljoin(url, f'/api/products/{api_path}/{video_id}?platform=BROWSER&lang={country.upper()}'),
+            video_id)
 
-        m3u8_url = asset['movie']['contentUrl']
-        video_id = asset['assetId']
-        asset_title = asset['title']
-        title = asset_title['title']
-
-        formats = self._extract_m3u8_formats(
-            m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
+        video_type = 'CATCHUP' if is_live else 'MOVIE'
+        stream_id = data['programRecordingId'] if is_live else video_id
+        stream = self._download_json(
+            urljoin(url, f'/api/products/{stream_id}/videos/playlist?videoType={video_type}&platform=BROWSER'), video_id)
+        formats, subtitles = self._extract_m3u8_formats_and_subtitles(
+            stream['sources']['HLS'][0]['src'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
         self._sort_formats(formats)
 
-        thumbnails = None
-        image_url = asset.get('imageUrl')
-        if image_url:
-            thumbnails = [{
-                'url': urljoin(url, image_url),
-                'ext': 'jpg',
-            }]
-
-        metadata = asset.get('metadata') or {}
+        thumbnails = set(traverse_obj(
+            data, (('galary', 'images', 'artworks'), ..., ..., ('miniUrl', 'mainUrl')), expected_type=url_or_none))
 
         return {
             'id': video_id,
-            'title': title,
-            'description': asset_title.get('summaryLong') or asset_title.get('summaryShort'),
-            'thumbnails': thumbnails,
-            'duration': parse_duration(asset_title.get('runTime')),
-            'series': asset.get('tvSeriesTitle'),
-            'season': asset.get('tvSeasonTitle'),
-            'season_number': int_or_none(metadata.get('seasonNumber')),
-            'episode': asset_title.get('titleBrief'),
-            'episode_number': int_or_none(metadata.get('episodeNumber')),
+            'title': self._resolve_title(data),
+            'description': traverse_obj(data, 'description', 'lead'),
+            'duration': int_or_none(data.get('duration')),
+            'season': traverse_obj(data, ('season', 'serial', 'title')),
+            'season_number': int_or_none(traverse_obj(data, ('season', 'number'))),
+            'episode': data.get('title'),
+            'episode_number': int_or_none(data.get('episode')),
+            'release_year': int_or_none(traverse_obj(data, ('season', 'serial', 'year'))),
+            'thumbnails': [{'url': url, 'ext': 'jpg'} for url in thumbnails],
             'formats': formats,
+            'subtitles': subtitles,
         }
+
+    @staticmethod
+    def _resolve_title(data):
+        return try_get(data, lambda x: (
+            f'{data["season"]["serial"]["title"]} ({data["season"]["serial"]["year"]}) | '
+            f'S{data["season"]["number"]:02d}E{data["episode"]:02d}: {data["title"]}'
+        )) or data.get('title')