[extractor/tvp] Support `stream.tvp.pl` (#6139)

author lauren n. liberda <redacted>

Sun, 12 Feb 2023 04:43:10 +0000 (05:43 +0100)

committer GitHub <redacted>

Sun, 12 Feb 2023 04:43:10 +0000 (10:13 +0530)
author lauren n. liberda <redacted>
Sun, 12 Feb 2023 04:43:10 +0000 (05:43 +0100)
committer GitHub <redacted>
Sun, 12 Feb 2023 04:43:10 +0000 (10:13 +0530)
diff --git a/yt_dlp/extractor/tvp.py b/yt_dlp/extractor/tvp.py

index 8483564f74b398dfeb71c95466a8c2e416ebb9a7..f8ded2646320a0a02ff90f340cb43e3b00da0c13 100644 (file)
--- a/yt_dlp/extractor/tvp.py
+++ b/yt_dlp/extractor/tvp.py
@@ -268,8 +268,11 @@ def _real_extract(self, url):
  
  class TVPStreamIE(InfoExtractor):
      IE_NAME = 'tvp:stream'
-    _VALID_URL = r'(?:tvpstream:|https?://tvpstream\.vod\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P<id>\d*)'
+    _VALID_URL = r'(?:tvpstream:|https?://(?:tvpstream\.vod|stream)\.tvp\.pl/(?:\?(?:[^&]+[&;])*channel_id=)?)(?P<id>\d*)'
      _TESTS = [{
+        'url': 'https://stream.tvp.pl/?channel_id=56969941',
+        'only_matching': True,
+    }, {
          # untestable as "video" id changes many times across a day
          'url': 'https://tvpstream.vod.tvp.pl/?channel_id=1455',
          'only_matching': True,
@@ -285,28 +288,21 @@ class TVPStreamIE(InfoExtractor):
          'only_matching': True,
      }]
  
-    _PLAYER_BOX_RE = r'<div\s[^>]*id\s*=\s*["\']?tvp_player_box["\']?[^>]+data-%s-id\s*=\s*["\']?(\d+)'
-    _BUTTON_RE = r'<div\s[^>]*data-channel-id=["\']?%s["\']?[^>]*\sdata-title=(?:"([^"]*)"|\'([^\']*)\')[^>]*\sdata-stationname=(?:"([^"]*)"|\'([^\']*)\')'
-
      def _real_extract(self, url):
          channel_id = self._match_id(url)
-        channel_url = self._proto_relative_url('//tvpstream.vod.tvp.pl/?channel_id=%s' % channel_id or 'default')
-        webpage = self._download_webpage(channel_url, channel_id, 'Downloading channel webpage')
-        if not channel_id:
-            channel_id = self._search_regex(self._PLAYER_BOX_RE % 'channel',
-                                            webpage, 'default channel id')
-        video_id = self._search_regex(self._PLAYER_BOX_RE % 'video',
-                                      webpage, 'video id')
-        audition_title, station_name = self._search_regex(
-            self._BUTTON_RE % (re.escape(channel_id)), webpage,
-            'audition title and station name',
-            group=(1, 2))
+        channel_url = self._proto_relative_url('//stream.tvp.pl/?channel_id=%s' % channel_id or 'default')
+        webpage = self._download_webpage(channel_url, channel_id or 'default', 'Downloading channel webpage')
+        channels = self._search_json(
+            r'window\.__channels\s*=', webpage, 'channel list', channel_id,
+            contains_pattern=r'\[\s*{(?s:.+)}\s*]')
+        channel = traverse_obj(channels, (lambda _, v: channel_id == str(v['id'])), get_all=False) if channel_id else channels[0]
+        audition = traverse_obj(channel, ('items', lambda _, v: v['is_live'] is True), get_all=False)
          return {
              '_type': 'url_transparent',
-            'id': channel_id,
-            'url': 'tvp:%s' % video_id,
-            'title': audition_title,
-            'alt_title': station_name,
+            'id': channel_id or channel['id'],
+            'url': 'tvp:%s' % audition['video_id'],
+            'title': audition.get('title'),
+            'alt_title': channel.get('title'),
              'is_live': True,
              'ie_key': 'TVPEmbed',
          }
author	lauren n. liberda <redacted>
	Sun, 12 Feb 2023 04:43:10 +0000 (05:43 +0100)
committer	GitHub <redacted>
	Sun, 12 Feb 2023 04:43:10 +0000 (10:13 +0530)