]> jfr.im git - yt-dlp.git/commitdiff
[tvp] Add TVPStreamIE (#1401)
authorLauren Liberda <redacted>
Sun, 31 Oct 2021 05:33:04 +0000 (11:03 +0530)
committerpukkandan <redacted>
Wed, 10 Nov 2021 00:46:51 +0000 (06:16 +0530)
Authored by: selfisekai

yt_dlp/extractor/extractors.py
yt_dlp/extractor/tvp.py

index d47c06647642e70ab23c59e221b7fde76e1b1f91..4f9de71e27dff9dbc5f2aa628216be64fcb0fd62 100644 (file)
 from .tvp import (
     TVPEmbedIE,
     TVPIE,
+    TVPStreamIE,
     TVPWebsiteIE,
 )
 from .tvplay import (
index 22cfbd25e0fa86562f3e8cf8658c492ec07f4f92..48e2c6e764a454491f50a9e991c6bdee968b0c24 100644 (file)
@@ -251,6 +251,52 @@ def _real_extract(self, url):
         }
 
 
+class TVPStreamIE(InfoExtractor):
+    IE_NAME = 'tvp:stream'
+    _VALID_URL = r'(?:tvpstream:|https?://tvpstream\.vod\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P<id>\d*)'
+    _TESTS = [{
+        # untestable as "video" id changes many times across a day
+        'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455',
+        'only_matching': True,
+    }, {
+        'url': 'tvpstream:39821455',
+        'only_matching': True,
+    }, {
+        # the default stream when you provide no channel_id, most probably TVP Info
+        'url': 'tvpstream:',
+        'only_matching': True,
+    }, {
+        'url': 'https://tvpstream.vod.tvp.pl/',
+        'only_matching': True,
+    }]
+
+    _PLAYER_BOX_RE = r'<div\s[^>]*id\s*=\s*["\']?tvp_player_box["\']?[^>]+data-%s-id\s*=\s*["\']?(\d+)'
+    _BUTTON_RE = r'<div\s[^>]*data-channel-id=["\']?%s["\']?[^>]*\sdata-title=(?:"([^"]*)"|\'([^\']*)\')[^>]*\sdata-stationname=(?:"([^"]*)"|\'([^\']*)\')'
+
+    def _real_extract(self, url):
+        channel_id = self._match_id(url)
+        channel_url = self._proto_relative_url('//tvpstream.vod.tvp.pl/?channel_id=%s' % channel_id or 'default')
+        webpage = self._download_webpage(channel_url, channel_id, 'Downloading channel webpage')
+        if not channel_id:
+            channel_id = self._search_regex(self._PLAYER_BOX_RE % 'channel',
+                                            webpage, 'default channel id')
+        video_id = self._search_regex(self._PLAYER_BOX_RE % 'video',
+                                      webpage, 'video id')
+        audition_title, station_name = self._search_regex(
+            self._BUTTON_RE % (re.escape(channel_id)), webpage,
+            'audition title and station name',
+            group=(1, 2))
+        return {
+            '_type': 'url_transparent',
+            'id': channel_id,
+            'url': 'tvp:%s' % video_id,
+            'title': audition_title,
+            'alt_title': station_name,
+            'is_live': True,
+            'ie_key': 'TVPEmbed',
+        }
+
+
 class TVPEmbedIE(InfoExtractor):
     IE_NAME = 'tvp:embed'
     IE_DESC = 'Telewizja Polska'