]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/tubitv.py
[TubiTv] Add TubiTvShowIE (#243)
[yt-dlp.git] / yt_dlp / extractor / tubitv.py
index ebfb05c636dffefdd105a52290048bf01d5b9ce6..6bc3234c6a30dc5fc5fcdb358003d8f2774afe54 100644 (file)
@@ -7,13 +7,19 @@
 from ..utils import (
     ExtractorError,
     int_or_none,
+    js_to_json,
     sanitized_Request,
     urlencode_postdata,
 )
 
 
 class TubiTvIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/(?P<id>[0-9]+)'
+    _VALID_URL = r'''(?x)
+                    (?:
+                        tubitv:|
+                        https?://(?:www\.)?tubitv\.com/(?:video|movies|tv-shows)/
+                    )
+                    (?P<id>[0-9]+)'''
     _LOGIN_URL = 'http://tubitv.com/login'
     _NETRC_MACHINE = 'tubitv'
     _GEO_COUNTRIES = ['US']
@@ -108,3 +114,28 @@ def _real_extract(self, url):
             'uploader_id': video_data.get('publisher_id'),
             'release_year': int_or_none(video_data.get('year')),
         }
+
+
+class TubiTvShowIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?tubitv\.com/series/[0-9]+/(?P<show_name>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://tubitv.com/series/3936/the-joy-of-painting-with-bob-ross?start=true',
+        'playlist_mincount': 390,
+        'info_dict': {
+            'id': 'the-joy-of-painting-with-bob-ross',
+        }
+    }]
+
+    def _entries(self, show_url, show_name):
+        show_webpage = self._download_webpage(show_url, show_name)
+        show_json = self._parse_json(self._search_regex(
+            r"window\.__data\s*=\s*({.+?});\s*</script>",
+            show_webpage, 'data',), show_name, transform_source=js_to_json)['video']
+        for episode_id in show_json['fullContentById'].keys():
+            yield self.url_result(
+                'tubitv:%s' % episode_id,
+                ie=TubiTvIE.ie_key(), video_id=episode_id)
+
+    def _real_extract(self, url):
+        show_name = re.match(self._VALID_URL, url).group('show_name')
+        return self.playlist_result(self._entries(url, show_name), playlist_id=show_name)