]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/tubitv.py
[youtube] Fix error reporting of "Incomplete data"
[yt-dlp.git] / yt_dlp / extractor / tubitv.py
index 6bc3234c6a30dc5fc5fcdb358003d8f2774afe54..d91a46500ca17318cb2dc20ae82961e1c20e2f10 100644 (file)
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import re
 
 from .common import InfoExtractor
@@ -10,6 +7,7 @@
     js_to_json,
     sanitized_Request,
     urlencode_postdata,
+    traverse_obj,
 )
 
 
@@ -54,10 +52,7 @@ class TubiTvIE(InfoExtractor):
         },
     }]
 
-    def _login(self):
-        username, password = self._get_login_info()
-        if username is None:
-            return
+    def _perform_login(self, username, password):
         self.report_login()
         form_data = {
             'username': username,
@@ -72,18 +67,20 @@ def _login(self):
             raise ExtractorError(
                 'Login failed (invalid username/password)', expected=True)
 
-    def _real_initialize(self):
-        self._login()
-
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_data = self._download_json(
-            'http://tubitv.com/oz/videos/%s/content' % video_id, video_id)
+            'https://tubitv.com/oz/videos/%s/content?video_resources=dash&video_resources=hlsv3&video_resources=hlsv6' % video_id, video_id)
         title = video_data['title']
 
-        formats = self._extract_m3u8_formats(
-            self._proto_relative_url(video_data['url']),
-            video_id, 'mp4', 'm3u8_native')
+        formats = []
+
+        for resource in video_data['video_resources']:
+            if resource['type'] in ('dash', ):
+                formats += self._extract_mpd_formats(resource['manifest']['url'], video_id, mpd_id=resource['type'], fatal=False)
+            elif resource['type'] in ('hlsv3', 'hlsv6'):
+                formats += self._extract_m3u8_formats(resource['manifest']['url'], video_id, 'mp4', m3u8_id=resource['type'], fatal=False)
+
         self._sort_formats(formats)
 
         thumbnails = []
@@ -103,6 +100,9 @@ def _real_extract(self, url):
                 'url': self._proto_relative_url(sub_url),
             })
 
+        season_number, episode_number, episode_title = self._search_regex(
+            r'^S(\d+):E(\d+) - (.+)', title, 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None))
+
         return {
             'id': video_id,
             'title': title,
@@ -113,6 +113,9 @@ def _real_extract(self, url):
             'duration': int_or_none(video_data.get('duration')),
             'uploader_id': video_data.get('publisher_id'),
             'release_year': int_or_none(video_data.get('year')),
+            'season_number': int_or_none(season_number),
+            'episode_number': int_or_none(episode_number),
+            'episode_title': episode_title
         }
 
 
@@ -128,14 +131,18 @@ class TubiTvShowIE(InfoExtractor):
 
     def _entries(self, show_url, show_name):
         show_webpage = self._download_webpage(show_url, show_name)
+
         show_json = self._parse_json(self._search_regex(
-            r"window\.__data\s*=\s*({.+?});\s*</script>",
-            show_webpage, 'data',), show_name, transform_source=js_to_json)['video']
+            r'window\.__data\s*=\s*({[^<]+});\s*</script>',
+            show_webpage, 'data'), show_name, transform_source=js_to_json)['video']
+
         for episode_id in show_json['fullContentById'].keys():
+            if traverse_obj(show_json, ('byId', episode_id, 'type')) == 's':
+                continue
             yield self.url_result(
                 'tubitv:%s' % episode_id,
                 ie=TubiTvIE.ie_key(), video_id=episode_id)
 
     def _real_extract(self, url):
-        show_name = re.match(self._VALID_URL, url).group('show_name')
+        show_name = self._match_valid_url(url).group('show_name')
         return self.playlist_result(self._entries(url, show_name), playlist_id=show_name)