[extractor] Add `_perform_login` function (#2943)

[yt-dlp.git] / yt_dlp / extractor / vlive.py
diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py

index 84f51a544c11aa2cffdb0b683bf48dd284be407d..ae35c976c22a6d8185bb1ab20799dc6c7e38e4ba 100644 (file)
--- a/yt_dlp/extractor/vlive.py
+++ b/yt_dlp/extractor/vlive.py
@@ -12,22 +12,65 @@
  from ..utils import (
      ExtractorError,
      int_or_none,
+    LazyList,
      merge_dicts,
      str_or_none,
      strip_or_none,
      try_get,
      urlencode_postdata,
+    url_or_none,
  )
  
  
  class VLiveBaseIE(NaverBaseIE):
-    _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
+    _NETRC_MACHINE = 'vlive'
+    _logged_in = False
+
+    def _perform_login(self, username, password):
+        if self._logged_in:
+            return
+        LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
+        self._request_webpage(
+            LOGIN_URL, None, note='Downloading login cookies')
+
+        self._download_webpage(
+            LOGIN_URL, None, note='Logging in',
+            data=urlencode_postdata({'email': username, 'pwd': password}),
+            headers={
+                'Referer': LOGIN_URL,
+                'Content-Type': 'application/x-www-form-urlencoded'
+            })
+
+        login_info = self._download_json(
+            'https://www.vlive.tv/auth/loginInfo', None,
+            note='Checking login status',
+            headers={'Referer': 'https://www.vlive.tv/home'})
+
+        if not try_get(login_info, lambda x: x['message']['login'], bool):
+            raise ExtractorError('Unable to log in', expected=True)
+        VLiveBaseIE._logged_in = True
+
+    def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None):
+        if note is None:
+            note = 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0]
+        query = {'appId': '8c6cc7b45d2568fb668be6e05b6e5a3b', 'gcc': 'KR', 'platformType': 'PC'}
+        if fields:
+            query['fields'] = fields
+        if query_add:
+            query.update(query_add)
+        try:
+            return self._download_json(
+                'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
+                note, headers={'Referer': 'https://www.vlive.tv/'}, query=query)
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+                self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
+            raise
  
  
  class VLiveIE(VLiveBaseIE):
      IE_NAME = 'vlive'
      _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
-    _NETRC_MACHINE = 'vlive'
      _TESTS = [{
          'url': 'http://www.vlive.tv/video/1326',
          'md5': 'cc7314812855ce56de70a06a27314983',
@@ -38,6 +81,12 @@ class VLiveIE(VLiveBaseIE):
              'creator': "Girl's Day",
              'view_count': int,
              'uploader_id': 'muploader_a',
+            'upload_date': '20150817',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+            'timestamp': 1439816449,
+        },
+        'params': {
+            'skip_download': True,
          },
      }, {
          'url': 'http://www.vlive.tv/video/16937',
@@ -49,6 +98,9 @@ class VLiveIE(VLiveBaseIE):
              'view_count': int,
              'subtitles': 'mincount:12',
              'uploader_id': 'muploader_j',
+            'upload_date': '20161112',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+            'timestamp': 1478923074,
          },
          'params': {
              'skip_download': True,
@@ -81,53 +133,6 @@ class VLiveIE(VLiveBaseIE):
          'playlist_mincount': 120
      }]
  
-    def _real_initialize(self):
-        self._login()
-
-    def _login(self):
-        email, password = self._get_login_info()
-        if None in (email, password):
-            return
-
-        def is_logged_in():
-            login_info = self._download_json(
-                'https://www.vlive.tv/auth/loginInfo', None,
-                note='Downloading login info',
-                headers={'Referer': 'https://www.vlive.tv/home'})
-            return try_get(
-                login_info, lambda x: x['message']['login'], bool) or False
-
-        LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
-        self._request_webpage(
-            LOGIN_URL, None, note='Downloading login cookies')
-
-        self._download_webpage(
-            LOGIN_URL, None, note='Logging in',
-            data=urlencode_postdata({'email': email, 'pwd': password}),
-            headers={
-                'Referer': LOGIN_URL,
-                'Content-Type': 'application/x-www-form-urlencoded'
-            })
-
-        if not is_logged_in():
-            raise ExtractorError('Unable to log in', expected=True)
-
-    def _call_api(self, path_template, video_id, fields=None, limit=None):
-        query = {'appId': self._APP_ID, 'gcc': 'KR', 'platformType': 'PC'}
-        if fields:
-            query['fields'] = fields
-        if limit:
-            query['limit'] = limit
-        try:
-            return self._download_json(
-                'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
-                'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
-                headers={'Referer': 'https://www.vlive.tv/'}, query=query)
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
-            raise
-
      def _real_extract(self, url):
          video_id = self._match_id(url)
  
@@ -135,30 +140,24 @@ def _real_extract(self, url):
              'post/v1.0/officialVideoPost-%s', video_id,
              'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId},playlist{playlistSeq,totalCount,name}')
  
-        playlist = post.get('playlist')
-        if not playlist or self.get_param('noplaylist'):
-            if playlist:
-                self.to_screen(
-                    'Downloading just video %s because of --no-playlist'
-                    % video_id)
-
+        playlist_id = str_or_none(try_get(post, lambda x: x['playlist']['playlistSeq']))
+        if not self._yes_playlist(playlist_id, video_id):
              video = post['officialVideo']
              return self._get_vlive_info(post, video, video_id)
-        else:
-            playlist_name = playlist.get('name')
-            playlist_id = str_or_none(playlist.get('playlistSeq'))
-            playlist_count = str_or_none(playlist.get('totalCount'))
  
-            playlist = self._call_api(
-                'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', limit=playlist_count)
+        playlist_name = str_or_none(try_get(post, lambda x: x['playlist']['name']))
+        playlist_count = str_or_none(try_get(post, lambda x: x['playlist']['totalCount']))
  
-            entries = []
-            for video_data in playlist['data']:
-                video = video_data.get('officialVideo')
-                video_id = str_or_none(video.get('videoSeq'))
-                entries.append(self._get_vlive_info(video_data, video, video_id))
+        playlist = self._call_api(
+            'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count})
  
-            return self.playlist_result(entries, playlist_id, playlist_name)
+        entries = []
+        for video_data in playlist['data']:
+            video = video_data.get('officialVideo')
+            video_id = str_or_none(video.get('videoSeq'))
+            entries.append(self._get_vlive_info(video_data, video, video_id))
+
+        return self.playlist_result(entries, playlist_id, playlist_name)
  
      def _get_vlive_info(self, post, video, video_id):
          def get_common_fields():
@@ -172,6 +171,8 @@ def get_common_fields():
                  'view_count': int_or_none(video.get('playCount')),
                  'like_count': int_or_none(video.get('likeCount')),
                  'comment_count': int_or_none(video.get('commentCount')),
+                'timestamp': int_or_none(video.get('createdAt'), scale=1000),
+                'thumbnail': video.get('thumb'),
              }
  
          video_type = video.get('type')
@@ -197,7 +198,7 @@ def get_common_fields():
                  self._sort_formats(formats)
                  info = get_common_fields()
                  info.update({
-                    'title': self._live_title(video['title']),
+                    'title': video['title'],
                      'id': video_id,
                      'formats': formats,
                      'is_live': True,
@@ -216,7 +217,7 @@ def get_common_fields():
                  raise ExtractorError('Unknown status ' + status)
  
  
-class VLivePostIE(VLiveIE):
+class VLivePostIE(VLiveBaseIE):
      IE_NAME = 'vlive:post'
      _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
      _TESTS = [{
@@ -238,8 +239,6 @@ class VLivePostIE(VLiveIE):
          'playlist_count': 1,
      }]
      _FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
-    _SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
-    _INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
  
      def _real_extract(self, url):
          post_id = self._match_id(url)
@@ -266,7 +265,7 @@ def _real_extract(self, url):
              entry = None
              if upload_type == 'SOS':
                  download = self._call_api(
-                    self._SOS_TMPL, video_id)['videoUrl']['download']
+                    self._FVIDEO_TMPL % 'sosPlayInfo', video_id)['videoUrl']['download']
                  formats = []
                  for f_id, f_url in download.items():
                      formats.append({
@@ -284,7 +283,7 @@ def _real_extract(self, url):
                  vod_id = upload_info.get('videoId')
                  if not vod_id:
                      continue
-                inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
+                inkey = self._call_api(self._FVIDEO_TMPL % 'inKey', video_id)['inKey']
                  entry = self._extract_video_info(video_id, vod_id, inkey)
              if entry:
                  entry['title'] = '%s_part%s' % (title, idx)
@@ -295,7 +294,7 @@ def _real_extract(self, url):
  
  class VLiveChannelIE(VLiveBaseIE):
      IE_NAME = 'vlive:channel'
-    _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
+    _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<channel_id>[0-9A-Z]+)(?:/board/(?P<posts_id>\d+))?'
      _TESTS = [{
          'url': 'http://channels.vlive.tv/FCD4B',
          'info_dict': {
@@ -306,78 +305,57 @@ class VLiveChannelIE(VLiveBaseIE):
      }, {
          'url': 'https://www.vlive.tv/channel/FCD4B',
          'only_matching': True,
+    }, {
+        'url': 'https://www.vlive.tv/channel/FCD4B/board/3546',
+        'info_dict': {
+            'id': 'FCD4B-3546',
+            'title': 'MAMAMOO - Star Board',
+        },
+        'playlist_mincount': 880
      }]
  
-    def _call_api(self, path, channel_key_suffix, channel_value, note, query):
-        q = {
-            'app_id': self._APP_ID,
-            'channel' + channel_key_suffix: channel_value,
-        }
-        q.update(query)
-        return self._download_json(
-            'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
-            channel_value, note='Downloading ' + note, query=q)['result']
-
-    def _real_extract(self, url):
-        channel_code = self._match_id(url)
-
-        channel_seq = self._call_api(
-            'decodeChannelCode', 'Code', channel_code,
-            'decode channel code', {})['channelSeq']
-
-        channel_name = None
-        entries = []
+    def _entries(self, posts_id, board_name):
+        if board_name:
+            posts_path = 'post/v1.0/board-%s/posts'
+            query_add = {'limit': 100, 'sortType': 'LATEST'}
+        else:
+            posts_path = 'post/v1.0/channel-%s/starPosts'
+            query_add = {'limit': 100}
  
          for page_num in itertools.count(1):
              video_list = self._call_api(
-                'getChannelVideoList', 'Seq', channel_seq,
-                'channel list page #%d' % page_num, {
-                    # Large values of maxNumOfRows (~300 or above) may cause
-                    # empty responses (see [1]), e.g. this happens for [2] that
-                    # has more than 300 videos.
-                    # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
-                    # 2. http://channels.vlive.tv/EDBF.
-                    'maxNumOfRows': 100,
-                    'pageNo': page_num
-                }
-            )
-
-            if not channel_name:
-                channel_name = try_get(
-                    video_list,
-                    lambda x: x['channelInfo']['channelName'],
-                    compat_str)
+                posts_path, posts_id, 'channel{channelName},contentType,postId,title,url', query_add,
+                note=f'Downloading playlist page {page_num}')
+
+            for video in try_get(video_list, lambda x: x['data'], list) or []:
+                video_id = str(video.get('postId'))
+                video_title = str_or_none(video.get('title'))
+                video_url = url_or_none(video.get('url'))
+                if not all((video_id, video_title, video_url)) or video.get('contentType') != 'VIDEO':
+                    continue
+                channel_name = try_get(video, lambda x: x['channel']['channelName'], compat_str)
+                yield self.url_result(video_url, VLivePostIE.ie_key(), video_id, video_title, channel=channel_name)
  
-            videos = try_get(
-                video_list, lambda x: x['videoList'], list)
-            if not videos:
+            after = try_get(video_list, lambda x: x['paging']['nextParams']['after'], compat_str)
+            if not after:
                  break
+            query_add['after'] = after
+
+    def _real_extract(self, url):
+        channel_id, posts_id = self._match_valid_url(url).groups()
  
-            for video in videos:
-                video_id = video.get('videoSeq')
-                video_type = video.get('videoType')
+        board_name = None
+        if posts_id:
+            board = self._call_api(
+                'board/v1.0/board-%s', posts_id, 'title,boardType')
+            board_name = board.get('title') or 'Unknown'
+            if board.get('boardType') not in ('STAR', 'VLIVE_PLUS'):
+                raise ExtractorError(f'Board {board_name!r} is not supported', expected=True)
  
-                if not video_id or not video_type:
-                    continue
-                video_id = compat_str(video_id)
-
-                if video_type in ('PLAYLIST'):
-                    first_video_id = try_get(
-                        video,
-                        lambda x: x['videoPlaylist']['videoList'][0]['videoSeq'], int)
-
-                    if not first_video_id:
-                        continue
-
-                    entries.append(
-                        self.url_result(
-                            'http://www.vlive.tv/video/%s' % first_video_id,
-                            ie=VLiveIE.ie_key(), video_id=first_video_id))
-                else:
-                    entries.append(
-                        self.url_result(
-                            'http://www.vlive.tv/video/%s' % video_id,
-                            ie=VLiveIE.ie_key(), video_id=video_id))
+        entries = LazyList(self._entries(posts_id or channel_id, board_name))
+        channel_name = entries[0]['channel']
  
          return self.playlist_result(
-            entries, channel_code, channel_name)
+            entries,
+            f'{channel_id}-{posts_id}' if posts_id else channel_id,
+            f'{channel_name} - {board_name}' if channel_name and board_name else channel_name)