]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/youku.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / youku.py
index ab59200d79931f2240b785687701b80b352cacd1..fa6b0539bbac90960b32cca3fbe35babbb80f652 100644 (file)
@@ -6,6 +6,7 @@
 from .common import InfoExtractor
 from ..utils import (
     ExtractorError,
+    clean_html,
     get_element_by_class,
     js_to_json,
     str_or_none,
@@ -19,55 +20,15 @@ class YoukuIE(InfoExtractor):
     _VALID_URL = r'''(?x)
         (?:
             https?://(
-                (?:v|player)\.youku\.com/(?:v_show/id_|player\.php/sid/)|
+                (?:v|play(?:er)?)\.(?:youku|tudou)\.com/(?:v_show/id_|player\.php/sid/)|
                 video\.tudou\.com/v/)|
             youku:)
         (?P<id>[A-Za-z0-9]+)(?:\.html|/v\.swf|)
     '''
 
     _TESTS = [{
-        # MD5 is unstable
-        'url': 'http://v.youku.com/v_show/id_XMTc1ODE5Njcy.html',
-        'info_dict': {
-            'id': 'XMTc1ODE5Njcy',
-            'title': '★Smile﹗♡ Git Fresh -Booty Music舞蹈.',
-            'ext': 'mp4',
-            'duration': 74.73,
-            'thumbnail': r're:^https?://.*',
-            'uploader': '。躲猫猫、',
-            'uploader_id': '36017967',
-            'uploader_url': 'http://i.youku.com/u/UMTQ0MDcxODY4',
-            'tags': list,
-        }
-    }, {
         'url': 'http://player.youku.com/player.php/sid/XNDgyMDQ2NTQw/v.swf',
         'only_matching': True,
-    }, {
-        'url': 'http://v.youku.com/v_show/id_XODgxNjg1Mzk2_ev_1.html',
-        'info_dict': {
-            'id': 'XODgxNjg1Mzk2',
-            'ext': 'mp4',
-            'title': '武媚娘传奇 85',
-            'duration': 1999.61,
-            'thumbnail': r're:^https?://.*',
-            'uploader': '疯狂豆花',
-            'uploader_id': '62583473',
-            'uploader_url': 'http://i.youku.com/u/UMjUwMzMzODky',
-            'tags': list,
-        },
-    }, {
-        'url': 'http://v.youku.com/v_show/id_XMTI1OTczNDM5Mg==.html',
-        'info_dict': {
-            'id': 'XMTI1OTczNDM5Mg',
-            'ext': 'mp4',
-            'title': '花千骨 04',
-            'duration': 2363,
-            'thumbnail': r're:^https?://.*',
-            'uploader': '放剧场-花千骨',
-            'uploader_id': '772849359',
-            'uploader_url': 'http://i.youku.com/u/UMzA5MTM5NzQzNg==',
-            'tags': list,
-        },
     }, {
         'url': 'http://v.youku.com/v_show/id_XNjA1NzA2Njgw.html',
         'note': 'Video protected with password',
@@ -85,6 +46,7 @@ class YoukuIE(InfoExtractor):
         'params': {
             'videopassword': '100600',
         },
+        'skip': '404',
     }, {
         # /play/get.json contains streams with "channel_type":"tail"
         'url': 'http://v.youku.com/v_show/id_XOTUxMzg4NDMy.html',
@@ -125,12 +87,25 @@ class YoukuIE(InfoExtractor):
             'uploader_url': 'https://www.youku.com/profile/index/?uid=UNjU2MzY1MzM1Ng==',
             'tags': list,
         },
+    }, {
+        'url': 'https://play.tudou.com/v_show/id_XNjAxNjI2OTU3Ng==.html?',
+        'info_dict': {
+            'id': 'XNjAxNjI2OTU3Ng',
+            'ext': 'mp4',
+            'title': '阿斯塔意识到哈里杀了人,自己被骗了',
+            'thumbnail': 'https://m.ykimg.com/0541010164F732752794D4D7B70331D1',
+            'uploader_id': '88758207',
+            'tags': [],
+            'uploader_url': 'https://www.youku.com/profile/index/?uid=UMzU1MDMyODI4',
+            'uploader': '英美剧场',
+            'duration': 72.91,
+        },
     }]
 
     @staticmethod
     def get_ysuid():
-        return '%d%s' % (int(time.time()), ''.join([
-            random.choice(string.ascii_letters) for i in range(3)]))
+        return '{}{}'.format(int(time.time()), ''.join(
+            random.choices(string.ascii_letters, k=3)))
 
     def get_format_name(self, fm):
         _dict = {
@@ -192,7 +167,7 @@ def _real_extract(self, url):
             else:
                 msg = 'Youku server reported error %i' % error.get('code')
                 if error_note is not None:
-                    msg += ': ' + error_note
+                    msg += ': ' + clean_html(error_note)
                 raise ExtractorError(msg)
 
         # get video title
@@ -298,7 +273,7 @@ def _real_extract(self, url):
                 continue
             _, new_entries = self._extract_entries(
                 'http://list.youku.com/show/episode', show_id,
-                note='Downloading playlist data page %d' % (idx + 1),
+                note=f'Downloading playlist data page {idx + 1}',
                 query={
                     'id': page_config['showid'],
                     'stage': reload_id,