Update to ytdl-2021.01.03

[yt-dlp.git] / youtube_dlc / extractor / vlive.py
diff --git a/youtube_dlc/extractor/vlive.py b/youtube_dlc/extractor/vlive.py

index 577e33f134baad6592857604ba8a83dd0fc48e92..96b4f665ed50519dadd0b97d6ccdf6801f77fea0 100644 (file)
--- a/youtube_dlc/extractor/vlive.py
+++ b/youtube_dlc/extractor/vlive.py
@@ -1,12 +1,9 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-import re
-import time
  import itertools
  import json
  
-from .common import InfoExtractor
  from .naver import NaverBaseIE
  from ..compat import (
      compat_HTTPError,
@@ -16,6 +13,8 @@
      ExtractorError,
      int_or_none,
      merge_dicts,
+    str_or_none,
+    strip_or_none,
      try_get,
      urlencode_postdata,
  )
@@ -69,6 +68,10 @@ class VLiveIE(VLiveBaseIE):
      }, {
          'url': 'https://www.vlive.tv/embed/1326',
          'only_matching': True,
+    }, {
+        # works only with gcc=KR
+        'url': 'https://www.vlive.tv/video/225019',
+        'only_matching': True,
      }]
  
      def _real_initialize(self):
@@ -103,26 +106,26 @@ def is_logged_in():
              raise ExtractorError('Unable to log in', expected=True)
  
      def _call_api(self, path_template, video_id, fields=None):
-        query = {'appId': self._APP_ID}
+        query = {'appId': self._APP_ID, 'gcc': 'KR'}
          if fields:
              query['fields'] = fields
-        return self._download_json(
-            'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
-            'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
-            headers={'Referer': 'https://www.vlive.tv/'}, query=query)
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
          try:
-            post = self._call_api(
-                'post/v1.0/officialVideoPost-%s', video_id,
-                'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
+            return self._download_json(
+                'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
+                'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
+                headers={'Referer': 'https://www.vlive.tv/'}, query=query)
          except ExtractorError as e:
              if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                  self.raise_login_required(json.loads(e.cause.read().decode())['message'])
              raise
  
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        post = self._call_api(
+            'post/v1.0/officialVideoPost-%s', video_id,
+            'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
+
          video = post['officialVideo']
  
          def get_common_fields():
@@ -152,6 +155,7 @@ def get_common_fields():
                      'old/v3/live/%s/playInfo',
                      video_id)['result']['adaptiveStreamUrl']
                  formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
+                self._sort_formats(formats)
                  info = get_common_fields()
                  info.update({
                      'title': self._live_title(video['title']),
@@ -173,6 +177,83 @@ def get_common_fields():
                  raise ExtractorError('Unknown status ' + status)
  
  
+class VLivePostIE(VLiveIE):
+    IE_NAME = 'vlive:post'
+    _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
+    _TESTS = [{
+        # uploadType = SOS
+        'url': 'https://www.vlive.tv/post/1-20088044',
+        'info_dict': {
+            'id': '1-20088044',
+            'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
+            'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
+        },
+        'playlist_count': 3,
+    }, {
+        # uploadType = V
+        'url': 'https://www.vlive.tv/post/1-20087926',
+        'info_dict': {
+            'id': '1-20087926',
+            'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
+        },
+        'playlist_count': 1,
+    }]
+    _FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
+    _SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
+    _INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
+
+    def _real_extract(self, url):
+        post_id = self._match_id(url)
+
+        post = self._call_api(
+            'post/v1.0/post-%s', post_id,
+            'attachments{video},officialVideo{videoSeq},plainBody,title')
+
+        video_seq = str_or_none(try_get(
+            post, lambda x: x['officialVideo']['videoSeq']))
+        if video_seq:
+            return self.url_result(
+                'http://www.vlive.tv/video/' + video_seq,
+                VLiveIE.ie_key(), video_seq)
+
+        title = post['title']
+        entries = []
+        for idx, video in enumerate(post['attachments']['video'].values()):
+            video_id = video.get('videoId')
+            if not video_id:
+                continue
+            upload_type = video.get('uploadType')
+            upload_info = video.get('uploadInfo') or {}
+            entry = None
+            if upload_type == 'SOS':
+                download = self._call_api(
+                    self._SOS_TMPL, video_id)['videoUrl']['download']
+                formats = []
+                for f_id, f_url in download.items():
+                    formats.append({
+                        'format_id': f_id,
+                        'url': f_url,
+                        'height': int_or_none(f_id[:-1]),
+                    })
+                self._sort_formats(formats)
+                entry = {
+                    'formats': formats,
+                    'id': video_id,
+                    'thumbnail': upload_info.get('imageUrl'),
+                }
+            elif upload_type == 'V':
+                vod_id = upload_info.get('videoId')
+                if not vod_id:
+                    continue
+                inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
+                entry = self._extract_video_info(video_id, vod_id, inkey)
+            if entry:
+                entry['title'] = '%s_part%s' % (title, idx)
+                entries.append(entry)
+        return self.playlist_result(
+            entries, post_id, title, strip_or_none(post.get('plainBody')))
+
+
  class VLiveChannelIE(VLiveBaseIE):
      IE_NAME = 'vlive:channel'
      _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
@@ -235,91 +316,34 @@ def _real_extract(self, url):
  
              for video in videos:
                  video_id = video.get('videoSeq')
-                if not video_id:
+                video_type = video.get('videoType')
+
+                if not video_id or not video_type:
                      continue
                  video_id = compat_str(video_id)
-                entries.append(
-                    self.url_result(
-                        'http://www.vlive.tv/video/%s' % video_id,
-                        ie=VLiveIE.ie_key(), video_id=video_id))
+
+                if video_type in ('PLAYLIST'):
+                    playlist_videos = try_get(
+                        video,
+                        lambda x: x['videoPlaylist']['videoList'], list)
+                    if not playlist_videos:
+                        continue
+
+                    for playlist_video in playlist_videos:
+                        playlist_video_id = playlist_video.get('videoSeq')
+                        if not playlist_video_id:
+                            continue
+                        playlist_video_id = compat_str(playlist_video_id)
+
+                        entries.append(
+                            self.url_result(
+                                'http://www.vlive.tv/video/%s' % playlist_video_id,
+                                ie=VLiveIE.ie_key(), video_id=playlist_video_id))
+                else:
+                    entries.append(
+                        self.url_result(
+                            'http://www.vlive.tv/video/%s' % video_id,
+                            ie=VLiveIE.ie_key(), video_id=video_id))
  
          return self.playlist_result(
              entries, channel_code, channel_name)
-
-
-# old extractor. Rewrite?
-
-class VLivePlaylistIE(VLiveBaseIE):
-    IE_NAME = 'vlive:playlist'
-    _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
-    _VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
-    _TESTS = [{
-        # regular working playlist
-        'url': 'https://www.vlive.tv/video/117956/playlist/117963',
-        'info_dict': {
-            'id': '117963',
-            'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
-        },
-        'playlist_mincount': 10
-    }, {
-        # playlist with no playlistVideoSeqs
-        'url': 'http://www.vlive.tv/video/22867/playlist/22912',
-        'info_dict': {
-            'id': '22867',
-            'ext': 'mp4',
-            'title': '[V LIVE] Valentine Day Message from MINA',
-            'creator': 'TWICE',
-            'view_count': int
-        },
-        'params': {
-            'skip_download': True,
-        }
-    }]
-
-    def _build_video_result(self, video_id, message):
-        self.to_screen(message)
-        return self.url_result(
-            self._VIDEO_URL_TEMPLATE % video_id,
-            ie=VLiveIE.ie_key(), video_id=video_id)
-
-    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id, playlist_id = mobj.group('video_id', 'id')
-
-        if self._downloader.params.get('noplaylist'):
-            return self._build_video_result(
-                video_id,
-                'Downloading just video %s because of --no-playlist'
-                % video_id)
-
-        self.to_screen(
-            'Downloading playlist %s - add --no-playlist to just download video'
-            % playlist_id)
-
-        webpage = self._download_webpage(
-            'http://www.vlive.tv/video/%s/playlist/%s'
-            % (video_id, playlist_id), playlist_id)
-
-        raw_item_ids = self._search_regex(
-            r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
-            'playlist video seqs', default=None, fatal=False)
-
-        if not raw_item_ids:
-            return self._build_video_result(
-                video_id,
-                'Downloading just video %s because no playlist was found'
-                % video_id)
-
-        item_ids = self._parse_json(raw_item_ids, playlist_id)
-
-        entries = [
-            self.url_result(
-                self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
-                video_id=compat_str(item_id))
-            for item_id in item_ids]
-
-        playlist_name = self._html_search_regex(
-            r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
-            webpage, 'playlist title', fatal=False)
-
-        return self.playlist_result(entries, playlist_id, playlist_name)