Update to ytdl-2021.01.03

[yt-dlp.git] / youtube_dlc / extractor / viki.py
diff --git a/youtube_dlc/extractor/viki.py b/youtube_dlc/extractor/viki.py

index f8e3603385474fc003f2e763f9a910a214897939..fd1c305b1ee81b3b5b181e564a194d4a72dd8814 100644 (file)
--- a/youtube_dlc/extractor/viki.py
+++ b/youtube_dlc/extractor/viki.py
@@ -1,6 +1,7 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
+import base64
  import hashlib
  import hmac
  import itertools
@@ -9,6 +10,10 @@
  import time
  
  from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+)
  from ..utils import (
      ExtractorError,
      int_or_none,
@@ -16,6 +21,7 @@
      parse_age_limit,
      parse_iso8601,
      sanitized_Request,
+    std_headers,
  )
  
  
@@ -57,14 +63,14 @@ def _prepare_call(self, path, timestamp=None, post_data=None):
  
      def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
          resp = self._download_json(
-            self._prepare_call(path, timestamp, post_data), video_id, note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
+            self._prepare_call(path, timestamp, post_data), video_id, note)
  
          error = resp.get('error')
          if error:
              if error == 'invalid timestamp':
                  resp = self._download_json(
                      self._prepare_call(path, int(resp['current_timestamp']), post_data),
-                    video_id, '%s (retry)' % note, headers={'x-viki-app-ver': '2.2.5.1428709186'}, expected_status=[200, 400, 404])
+                    video_id, '%s (retry)' % note)
                  error = resp.get('error')
              if error:
                  self._raise_error(resp['error'])
@@ -166,19 +172,20 @@ class VikiIE(VikiBaseIE):
      }, {
          # episode
          'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
-        'md5': '5fa476a902e902783ac7a4d615cdbc7a',
+        'md5': '94e0e34fd58f169f40c184f232356cfe',
          'info_dict': {
              'id': '44699v',
              'ext': 'mp4',
              'title': 'Boys Over Flowers - Episode 1',
              'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
-            'duration': 4204,
+            'duration': 4172,
              'timestamp': 1270496524,
              'upload_date': '20100405',
              'uploader': 'group8',
              'like_count': int,
              'age_limit': 13,
-        }
+        },
+        'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
      }, {
          # youtube external
          'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
@@ -195,14 +202,15 @@ class VikiIE(VikiBaseIE):
              'uploader_id': 'ad14065n',
              'like_count': int,
              'age_limit': 13,
-        }
+        },
+        'skip': 'Page not found!',
      }, {
          'url': 'http://www.viki.com/player/44699v',
          'only_matching': True,
      }, {
          # non-English description
          'url': 'http://www.viki.com/videos/158036v-love-in-magic',
-        'md5': '1713ae35df5a521b31f6dc40730e7c9c',
+        'md5': 'adf9e321a0ae5d0aace349efaaff7691',
          'info_dict': {
              'id': '158036v',
              'ext': 'mp4',
@@ -218,71 +226,13 @@ class VikiIE(VikiBaseIE):
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
-        video = self._call_api(
-            'videos/%s.json' % video_id, video_id, 'Downloading video JSON')
-
-        streams = self._call_api(
-            'videos/%s/streams.json' % video_id, video_id,
-            'Downloading video streams JSON')
-
-        formats = []
-        for format_id, stream_dict in streams.items():
-            height = int_or_none(self._search_regex(
-                r'^(\d+)[pP]$', format_id, 'height', default=None))
-            for protocol, format_dict in stream_dict.items():
-                # rtmps URLs does not seem to work
-                if protocol == 'rtmps':
-                    continue
-                format_url = format_dict.get('url')
-                format_drms = format_dict.get('drms')
-                format_stream_id = format_dict.get('id')
-                if format_id == 'm3u8':
-                    m3u8_formats = self._extract_m3u8_formats(
-                        format_url, video_id, 'mp4',
-                        entry_protocol='m3u8_native',
-                        m3u8_id='m3u8-%s' % protocol, fatal=False)
-                    # Despite CODECS metadata in m3u8 all video-only formats
-                    # are actually video+audio
-                    for f in m3u8_formats:
-                        if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
-                            f['acodec'] = None
-                    formats.extend(m3u8_formats)
-                elif format_id == 'mpd':
-                    mpd_formats = self._extract_mpd_formats(
-                        format_url, video_id,
-                        mpd_id='mpd-%s' % protocol, fatal=False)
-                    formats.extend(mpd_formats)
-                elif format_id == 'mpd':
-
-                    formats.extend(mpd_formats)
-                elif format_url.startswith('rtmp'):
-                    mobj = re.search(
-                        r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
-                        format_url)
-                    if not mobj:
-                        continue
-                    formats.append({
-                        'format_id': 'rtmp-%s' % format_id,
-                        'ext': 'flv',
-                        'url': mobj.group('url'),
-                        'play_path': mobj.group('playpath'),
-                        'app': mobj.group('app'),
-                        'page_url': url,
-                        'drms': format_drms,
-                        'stream_id': format_stream_id,
-                    })
-                else:
-                    urlh = self._request_webpage(
-                        HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
-                    formats.append({
-                        'url': format_url,
-                        'format_id': '%s-%s' % (format_id, protocol),
-                        'height': height,
-                        'drms': format_drms,
-                        'stream_id': format_stream_id,
-                        'filesize': int_or_none(urlh.headers.get('Content-Length')),
-                    })
-        self._sort_formats(formats)
+        resp = self._download_json(
+            'https://www.viki.com/api/videos/' + video_id,
+            video_id, 'Downloading video JSON', headers={
+                'x-client-user-agent': std_headers['User-Agent'],
+                'x-viki-app-ver': '4.0.57',
+            })
+        video = resp['video']
  
          self._check_errors(video)
  
@@ -308,19 +258,26 @@ def _real_extract(self, url):
                  'url': thumbnail.get('url'),
              })
  
-        stream_ids = []
-        for f in formats:
-            s_id = f.get('stream_id')
-            if s_id is not None:
-                stream_ids.append(s_id)
-
          subtitles = {}
-        for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
-            subtitles[subtitle_lang] = [{
-                'ext': subtitles_format,
-                'url': self._prepare_call(
-                    'videos/%s/subtitles/%s.%s?stream_id=%s' % (video_id, subtitle_lang, subtitles_format, stream_ids[0])),
-            } for subtitles_format in ('srt', 'vtt')]
+        try:
+            # New way to fetch subtitles
+            new_video = self._download_json(
+                'https://www.viki.com/api/videos/%s' % video_id, video_id,
+                'Downloading new video JSON to get subtitles', fatal=False)
+            for sub in new_video.get('streamSubtitles').get('dash'):
+                subtitles[sub.get('srclang')] = [{
+                    'ext': 'vtt',
+                    'url': sub.get('src'),
+                    'completion': sub.get('percentage'),
+                }]
+        except AttributeError:
+            # fall-back to the old way if there isn't a streamSubtitles attribute
+            for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
+                subtitles[subtitle_lang] = [{
+                    'ext': subtitles_format,
+                    'url': self._prepare_call(
+                        'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
+                } for subtitles_format in ('srt', 'vtt')]
  
          result = {
              'id': video_id,
@@ -335,12 +292,84 @@ def _real_extract(self, url):
              'subtitles': subtitles,
          }
  
-        if 'external' in streams:
-            result.update({
-                '_type': 'url_transparent',
-                'url': streams['external']['url'],
-            })
-            return result
+        formats = []
+
+        def add_format(format_id, format_dict, protocol='http'):
+            # rtmps URLs does not seem to work
+            if protocol == 'rtmps':
+                return
+            format_url = format_dict.get('url')
+            if not format_url:
+                return
+            format_drms = format_dict.get('drms')
+            format_stream_id = format_dict.get('id')
+            qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
+            stream = qs.get('stream', [None])[0]
+            if stream:
+                format_url = base64.b64decode(stream).decode()
+            if format_id in ('m3u8', 'hls'):
+                m3u8_formats = self._extract_m3u8_formats(
+                    format_url, video_id, 'mp4',
+                    entry_protocol='m3u8_native',
+                    m3u8_id='m3u8-%s' % protocol, fatal=False)
+                # Despite CODECS metadata in m3u8 all video-only formats
+                # are actually video+audio
+                for f in m3u8_formats:
+                    if '_drm/index_' in f['url']:
+                        continue
+                    if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
+                        f['acodec'] = None
+                    formats.append(f)
+            elif format_id in ('mpd', 'dash'):
+                formats.extend(self._extract_mpd_formats(
+                    format_url, video_id, 'mpd-%s' % protocol, fatal=False))
+            elif format_url.startswith('rtmp'):
+                mobj = re.search(
+                    r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
+                    format_url)
+                if not mobj:
+                    return
+                formats.append({
+                    'format_id': 'rtmp-%s' % format_id,
+                    'ext': 'flv',
+                    'url': mobj.group('url'),
+                    'play_path': mobj.group('playpath'),
+                    'app': mobj.group('app'),
+                    'page_url': url,
+                    'drms': format_drms,
+                    'stream_id': format_stream_id,
+                })
+            else:
+                urlh = self._request_webpage(
+                    HEADRequest(format_url), video_id, 'Checking file size', fatal=False)
+                formats.append({
+                    'url': format_url,
+                    'format_id': '%s-%s' % (format_id, protocol),
+                    'height': int_or_none(self._search_regex(
+                        r'^(\d+)[pP]$', format_id, 'height', default=None)),
+                    'drms': format_drms,
+                    'stream_id': format_stream_id,
+                    'filesize': int_or_none(urlh.headers.get('Content-Length')),
+                })
+
+        for format_id, format_dict in (resp.get('streams') or {}).items():
+            add_format(format_id, format_dict)
+        if not formats:
+            streams = self._call_api(
+                'videos/%s/streams.json' % video_id, video_id,
+                'Downloading video streams JSON')
+
+            if 'external' in streams:
+                result.update({
+                    '_type': 'url_transparent',
+                    'url': streams['external']['url'],
+                })
+                return result
+
+            for format_id, stream_dict in streams.items():
+                for protocol, format_dict in stream_dict.items():
+                    add_format(format_id, format_dict, protocol)
+        self._sort_formats(formats)
  
          result['formats'] = formats
          return result