[extractor/vlive] Replace with `VLiveWebArchiveIE` (#6196)

author sepro <redacted>

Sun, 12 Feb 2023 04:47:03 +0000 (05:47 +0100)

committer GitHub <redacted>

Sun, 12 Feb 2023 04:47:03 +0000 (10:17 +0530)
author sepro <redacted>
Sun, 12 Feb 2023 04:47:03 +0000 (05:47 +0100)
committer GitHub <redacted>
Sun, 12 Feb 2023 04:47:03 +0000 (10:17 +0530)
diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py

index 50dfe2eb1fd36f387afd706bb4cd74fbc34f4117..12ef50cc6bc26c41884a81ebea5cb02c1c0372fd 100644 (file)
--- a/yt_dlp/extractor/_extractors.py
+++ b/yt_dlp/extractor/_extractors.py
@@ -122,6 +122,7 @@
  from .archiveorg import (
      ArchiveOrgIE,
      YoutubeWebArchiveIE,
+    VLiveWebArchiveIE,
  )
  from .arcpublishing import ArcPublishingIE
  from .arkena import ArkenaIE
@@ -2183,11 +2184,6 @@
      VKUserVideosIE,
      VKWallPostIE,
  )
-from .vlive import (
-    VLiveIE,
-    VLivePostIE,
-    VLiveChannelIE,
-)
  from .vodlocker import VodlockerIE
  from .vodpl import VODPlIE
  from .vodplatform import VODPlatformIE
diff --git a/yt_dlp/extractor/archiveorg.py b/yt_dlp/extractor/archiveorg.py

index 90dda9f53f99533944c8a54b224d98466ea3d0eb..4ccd39825767eec0f1ce1de27de1fb1fcab0e905 100644 (file)
--- a/yt_dlp/extractor/archiveorg.py
+++ b/yt_dlp/extractor/archiveorg.py
@@ -1,8 +1,10 @@
  import json
  import re
+import urllib.error
  import urllib.parse
  
  from .common import InfoExtractor
+from .naver import NaverBaseIE
  from .youtube import YoutubeBaseInfoExtractor, YoutubeIE
  from ..compat import compat_HTTPError, compat_urllib_parse_unquote
  from ..utils import (
@@ -945,3 +947,237 @@ def _real_extract(self, url):
          if not info.get('title'):
              info['title'] = video_id
          return info
+
+
+class VLiveWebArchiveIE(InfoExtractor):
+    IE_NAME = 'web.archive:vlive'
+    IE_DESC = 'web.archive.org saved vlive videos'
+    _VALID_URL = r'''(?x)
+            (?:https?://)?web\.archive\.org/
+            (?:web/)?(?:(?P<date>[0-9]{14})?[0-9A-Za-z_*]*/)?  # /web and the version index is optional
+            (?:https?(?::|%3[Aa])//)?(?:
+                (?:(?:www|m)\.)?vlive\.tv(?::(?:80|443))?/(?:video|embed)/(?P<id>[0-9]+)  # VLive URL
+            )
+        '''
+    _TESTS = [{
+        'url': 'https://web.archive.org/web/20221221144331/http://www.vlive.tv/video/1326',
+        'md5': 'cc7314812855ce56de70a06a27314983',
+        'info_dict': {
+            'id': '1326',
+            'ext': 'mp4',
+            'title': "Girl's Day's Broadcast",
+            'creator': "Girl's Day",
+            'view_count': int,
+            'uploader_id': 'muploader_a',
+            'uploader_url': None,
+            'uploader': None,
+            'upload_date': '20150817',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+            'timestamp': 1439816449,
+            'like_count': int,
+            'channel': 'Girl\'s Day',
+            'channel_id': 'FDF27',
+            'comment_count': int,
+            'release_timestamp': 1439818140,
+            'release_date': '20150817',
+            'duration': 1014,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://web.archive.org/web/20221221182103/http://www.vlive.tv/video/16937',
+        'info_dict': {
+            'id': '16937',
+            'ext': 'mp4',
+            'title': '첸백시 걍방',
+            'creator': 'EXO',
+            'view_count': int,
+            'subtitles': 'mincount:12',
+            'uploader_id': 'muploader_j',
+            'uploader_url': 'http://vlive.tv',
+            'uploader': None,
+            'upload_date': '20161112',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+            'timestamp': 1478923074,
+            'like_count': int,
+            'channel': 'EXO',
+            'channel_id': 'F94BD',
+            'comment_count': int,
+            'release_timestamp': 1478924280,
+            'release_date': '20161112',
+            'duration': 906,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870',
+        'info_dict': {
+            'id': '101870',
+            'ext': 'mp4',
+            'title': '[ⓓ xV] “레벨이들 매력에 반해? 안 반해?” 움직이는 HD 포토 (레드벨벳:Red Velvet)',
+            'creator': 'Dispatch',
+            'view_count': int,
+            'subtitles': 'mincount:6',
+            'uploader_id': 'V__FRA08071',
+            'uploader_url': 'http://vlive.tv',
+            'uploader': None,
+            'upload_date': '20181130',
+            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
+            'timestamp': 1543601327,
+            'like_count': int,
+            'channel': 'Dispatch',
+            'channel_id': 'C796F3',
+            'comment_count': int,
+            'release_timestamp': 1543601040,
+            'release_date': '20181130',
+            'duration': 279,
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
+    # The wayback machine has special timestamp and "mode" values:
+    # timestamp:
+    #   1 = the first capture
+    #   2 = the last capture
+    # mode:
+    #   id_ = Identity - perform no alterations of the original resource, return it as it was archived.
+    _WAYBACK_BASE_URL = 'https://web.archive.org/web/2id_/'
+
+    def _download_archived_page(self, url, video_id, *, timestamp='2', **kwargs):
+        for retry in self.RetryManager():
+            try:
+                return self._download_webpage(f'https://web.archive.org/web/{timestamp}id_/{url}', video_id, **kwargs)
+            except ExtractorError as e:
+                if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 404:
+                    raise ExtractorError('Page was not archived', expected=True)
+                retry.error = e
+                continue
+
+    def _download_archived_json(self, url, video_id, **kwargs):
+        page = self._download_archived_page(url, video_id, **kwargs)
+        if not page:
+            raise ExtractorError('Page was not archived', expected=True)
+        else:
+            return self._parse_json(page, video_id)
+
+    def _extract_formats_from_m3u8(self, m3u8_url, params, video_id):
+        m3u8_doc = self._download_archived_page(m3u8_url, video_id, note='Downloading m3u8', query=params, fatal=False)
+        if not m3u8_doc:
+            return
+
+        # M3U8 document should be changed to archive domain
+        m3u8_doc = m3u8_doc.splitlines()
+        url_base = m3u8_url.rsplit('/', 1)[0]
+        first_segment = None
+        for i, line in enumerate(m3u8_doc):
+            if not line.startswith('#'):
+                m3u8_doc[i] = f'{self._WAYBACK_BASE_URL}{url_base}/{line}?{urllib.parse.urlencode(params)}'
+                first_segment = first_segment or m3u8_doc[i]
+
+        # Segments may not have been archived. See https://web.archive.org/web/20221127190050/http://www.vlive.tv/video/101870
+        urlh = self._request_webpage(HEADRequest(first_segment), video_id, errnote=False,
+                                     fatal=False, note='Check first segment availablity')
+        if urlh:
+            formats, subtitles = self._parse_m3u8_formats_and_subtitles('\n'.join(m3u8_doc), ext='mp4', video_id=video_id)
+            if subtitles:
+                self._report_ignoring_subs('m3u8')
+            return formats
+
+    # Closely follows the logic of the ArchiveTeam grab script
+    # See: https://github.com/ArchiveTeam/vlive-grab/blob/master/vlive.lua
+    def _real_extract(self, url):
+        video_id, url_date = self._match_valid_url(url).group('id', 'date')
+
+        webpage = self._download_archived_page(f'https://www.vlive.tv/video/{video_id}', video_id, timestamp=url_date)
+
+        player_info = self._search_json(r'__PRELOADED_STATE__\s*=', webpage, 'player info', video_id)
+        user_country = traverse_obj(player_info, ('common', 'userCountry'))
+
+        main_script_url = self._search_regex(r'<script\s+src="([^"]+/js/main\.[^"]+\.js)"', webpage, 'main script url')
+        main_script = self._download_archived_page(main_script_url, video_id, note='Downloading main script')
+        app_id = self._search_regex(r'appId\s*=\s*"([^"]+)"', main_script, 'app id')
+
+        inkey = self._download_archived_json(
+            f'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/{video_id}/inkey', video_id, note='Fetching inkey', query={
+                'appId': app_id,
+                'platformType': 'PC',
+                'gcc': user_country,
+                'locale': 'en_US',
+            }, fatal=False)
+
+        vod_id = traverse_obj(player_info, ('postDetail', 'post', 'officialVideo', 'vodId'))
+
+        vod_data = self._download_archived_json(
+            f'https://apis.naver.com/rmcnmv/rmcnmv/vod/play/v2.0/{vod_id}', video_id, note='Fetching vod data', query={
+                'key': inkey.get('inkey'),
+                'pid': 'rmcPlayer_16692457559726800',  # partially unix time and partially random. Fixed value used by archiveteam project
+                'sid': '2024',
+                'ver': '2.0',
+                'devt': 'html5_pc',
+                'doct': 'json',
+                'ptc': 'https',
+                'sptc': 'https',
+                'cpt': 'vtt',
+                'ctls': '%7B%22visible%22%3A%7B%22fullscreen%22%3Atrue%2C%22logo%22%3Afalse%2C%22playbackRate%22%3Afalse%2C%22scrap%22%3Afalse%2C%22playCount%22%3Atrue%2C%22commentCount%22%3Atrue%2C%22title%22%3Atrue%2C%22writer%22%3Atrue%2C%22expand%22%3Afalse%2C%22subtitles%22%3Atrue%2C%22thumbnails%22%3Atrue%2C%22quality%22%3Atrue%2C%22setting%22%3Atrue%2C%22script%22%3Afalse%2C%22logoDimmed%22%3Atrue%2C%22badge%22%3Atrue%2C%22seekingTime%22%3Atrue%2C%22muted%22%3Atrue%2C%22muteButton%22%3Afalse%2C%22viewerNotice%22%3Afalse%2C%22linkCount%22%3Afalse%2C%22createTime%22%3Afalse%2C%22thumbnail%22%3Atrue%7D%2C%22clicked%22%3A%7B%22expand%22%3Afalse%2C%22subtitles%22%3Afalse%7D%7D',
+                'pv': '4.26.9',
+                'dr': '1920x1080',
+                'cpl': 'en_US',
+                'lc': 'en_US',
+                'adi': '%5B%7B%22type%22%3A%22pre%22%2C%22exposure%22%3Afalse%2C%22replayExposure%22%3Afalse%7D%5D',
+                'adu': '%2F',
+                'videoId': vod_id,
+                'cc': user_country,
+            })
+
+        formats = []
+
+        streams = traverse_obj(vod_data, ('streams', ...))
+        if len(streams) > 1:
+            self.report_warning('Multiple streams found. Only the first stream will be downloaded.')
+        stream = streams[0]
+
+        max_stream = max(
+            stream.get('videos') or [],
+            key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
+        if max_stream is not None:
+            params = {arg.get('name'): arg.get('value') for arg in stream.get('keys', []) if arg.get('type') == 'param'}
+            formats = self._extract_formats_from_m3u8(max_stream.get('source'), params, video_id) or []
+
+        # For parts of the project MP4 files were archived
+        max_video = max(
+            traverse_obj(vod_data, ('videos', 'list', ...)),
+            key=lambda v: traverse_obj(v, ('bitrate', 'video'), default=0), default=None)
+        if max_video is not None:
+            video_url = self._WAYBACK_BASE_URL + max_video.get('source')
+            urlh = self._request_webpage(HEADRequest(video_url), video_id, errnote=False,
+                                         fatal=False, note='Check video availablity')
+            if urlh:
+                formats.append({'url': video_url})
+
+        return {
+            'id': video_id,
+            'formats': formats,
+            **traverse_obj(player_info, ('postDetail', 'post', {
+                'title': ('officialVideo', 'title', {str}),
+                'creator': ('author', 'nickname', {str}),
+                'channel': ('channel', 'channelName', {str}),
+                'channel_id': ('channel', 'channelCode', {str}),
+                'duration': ('officialVideo', 'playTime', {int_or_none}),
+                'view_count': ('officialVideo', 'playCount', {int_or_none}),
+                'like_count': ('officialVideo', 'likeCount', {int_or_none}),
+                'comment_count': ('officialVideo', 'commentCount', {int_or_none}),
+                'timestamp': ('officialVideo', 'createdAt', {lambda x: int_or_none(x, scale=1000)}),
+                'release_timestamp': ('officialVideo', 'willStartAt', {lambda x: int_or_none(x, scale=1000)}),
+            })),
+            **traverse_obj(vod_data, ('meta', {
+                'uploader_id': ('user', 'id', {str}),
+                'uploader': ('user', 'name', {str}),
+                'uploader_url': ('user', 'url', {url_or_none}),
+                'thumbnail': ('cover', 'source', {url_or_none}),
+            }), expected_type=lambda x: x or None),
+            **NaverBaseIE.process_subtitles(vod_data, lambda x: [self._WAYBACK_BASE_URL + x]),
+        }
diff --git a/yt_dlp/extractor/naver.py b/yt_dlp/extractor/naver.py

index e2e6e9728c01852d36ad27e35ab14c58172d16c4..7a1890a618a976cc31d5e100839e7dd27be0e824 100644 (file)
--- a/yt_dlp/extractor/naver.py
+++ b/yt_dlp/extractor/naver.py
@@ -21,6 +21,23 @@
  class NaverBaseIE(InfoExtractor):
      _CAPTION_EXT_RE = r'\.(?:ttml|vtt)'
  
+    @staticmethod  # NB: Used in VLiveWebArchiveIE
+    def process_subtitles(vod_data, process_url):
+        ret = {'subtitles': {}, 'automatic_captions': {}}
+        for caption in traverse_obj(vod_data, ('captions', 'list', ...)):
+            caption_url = caption.get('source')
+            if not caption_url:
+                continue
+            type_ = 'automatic_captions' if caption.get('type') == 'auto' else 'subtitles'
+            lang = caption.get('locale') or join_nonempty('language', 'country', from_dict=caption) or 'und'
+            if caption.get('type') == 'fan':
+                lang += '_fan%d' % next(i for i in itertools.count(1) if f'{lang}_fan{i}' not in ret[type_])
+            ret[type_].setdefault(lang, []).extend({
+                'url': sub_url,
+                'name': join_nonempty('label', 'fanName', from_dict=caption, delim=' - '),
+            } for sub_url in process_url(caption_url))
+        return ret
+
      def _extract_video_info(self, video_id, vid, key):
          video_data = self._download_json(
              'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid,
@@ -79,34 +96,18 @@ def get_subs(caption_url):
                  ]
              return [caption_url]
  
-        automatic_captions = {}
-        subtitles = {}
-        for caption in get_list('caption'):
-            caption_url = caption.get('source')
-            if not caption_url:
-                continue
-            sub_dict = automatic_captions if caption.get('type') == 'auto' else subtitles
-            lang = caption.get('locale') or join_nonempty('language', 'country', from_dict=caption) or 'und'
-            if caption.get('type') == 'fan':
-                lang += '_fan%d' % next(i for i in itertools.count(1) if f'{lang}_fan{i}' not in sub_dict)
-            sub_dict.setdefault(lang, []).extend({
-                'url': sub_url,
-                'name': join_nonempty('label', 'fanName', from_dict=caption, delim=' - '),
-            } for sub_url in get_subs(caption_url))
-
          user = meta.get('user', {})
  
          return {
              'id': video_id,
              'title': title,
              'formats': formats,
-            'subtitles': subtitles,
-            'automatic_captions': automatic_captions,
              'thumbnail': try_get(meta, lambda x: x['cover']['source']),
              'view_count': int_or_none(meta.get('count')),
              'uploader_id': user.get('id'),
              'uploader': user.get('name'),
              'uploader_url': user.get('url'),
+            **self.process_subtitles(video_data, get_subs),
          }
  
  
diff --git a/yt_dlp/extractor/vlive.py b/yt_dlp/extractor/vlive.py

deleted file mode 100644 (file)

index e2fd393..0000000
--- a/yt_dlp/extractor/vlive.py
+++ /dev/null
@@ -1,372 +0,0 @@
-import itertools
-import json
-
-from .naver import NaverBaseIE
-from ..compat import (
-    compat_HTTPError,
-    compat_str,
-)
-from ..utils import (
-    ExtractorError,
-    int_or_none,
-    LazyList,
-    merge_dicts,
-    str_or_none,
-    strip_or_none,
-    traverse_obj,
-    try_get,
-    urlencode_postdata,
-    url_or_none,
-)
-
-
-class VLiveBaseIE(NaverBaseIE):
-    _NETRC_MACHINE = 'vlive'
-    _logged_in = False
-
-    def _perform_login(self, username, password):
-        if self._logged_in:
-            return
-        LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
-        self._request_webpage(
-            LOGIN_URL, None, note='Downloading login cookies')
-
-        self._download_webpage(
-            LOGIN_URL, None, note='Logging in',
-            data=urlencode_postdata({'email': username, 'pwd': password}),
-            headers={
-                'Referer': LOGIN_URL,
-                'Content-Type': 'application/x-www-form-urlencoded'
-            })
-
-        login_info = self._download_json(
-            'https://www.vlive.tv/auth/loginInfo', None,
-            note='Checking login status',
-            headers={'Referer': 'https://www.vlive.tv/home'})
-
-        if not try_get(login_info, lambda x: x['message']['login'], bool):
-            raise ExtractorError('Unable to log in', expected=True)
-        VLiveBaseIE._logged_in = True
-
-    def _call_api(self, path_template, video_id, fields=None, query_add={}, note=None):
-        if note is None:
-            note = 'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0]
-        query = {'appId': '8c6cc7b45d2568fb668be6e05b6e5a3b', 'gcc': 'KR', 'platformType': 'PC'}
-        if fields:
-            query['fields'] = fields
-        if query_add:
-            query.update(query_add)
-        try:
-            return self._download_json(
-                'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
-                note, headers={'Referer': 'https://www.vlive.tv/'}, query=query)
-        except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
-                self.raise_login_required(json.loads(e.cause.read().decode('utf-8'))['message'])
-            raise
-
-
-class VLiveIE(VLiveBaseIE):
-    IE_NAME = 'vlive'
-    _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
-    _TESTS = [{
-        'url': 'http://www.vlive.tv/video/1326',
-        'md5': 'cc7314812855ce56de70a06a27314983',
-        'info_dict': {
-            'id': '1326',
-            'ext': 'mp4',
-            'title': "Girl's Day's Broadcast",
-            'creator': "Girl's Day",
-            'view_count': int,
-            'uploader_id': 'muploader_a',
-            'upload_date': '20150817',
-            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
-            'timestamp': 1439816449,
-            'like_count': int,
-            'channel': 'Girl\'s Day',
-            'channel_id': 'FDF27',
-            'comment_count': int,
-            'release_timestamp': 1439818140,
-            'release_date': '20150817',
-            'duration': 1014,
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'http://www.vlive.tv/video/16937',
-        'info_dict': {
-            'id': '16937',
-            'ext': 'mp4',
-            'title': '첸백시 걍방',
-            'creator': 'EXO',
-            'view_count': int,
-            'subtitles': 'mincount:12',
-            'uploader_id': 'muploader_j',
-            'upload_date': '20161112',
-            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
-            'timestamp': 1478923074,
-            'like_count': int,
-            'channel': 'EXO',
-            'channel_id': 'F94BD',
-            'comment_count': int,
-            'release_timestamp': 1478924280,
-            'release_date': '20161112',
-            'duration': 906,
-        },
-        'params': {
-            'skip_download': True,
-        },
-    }, {
-        'url': 'https://www.vlive.tv/video/129100',
-        'md5': 'ca2569453b79d66e5b919e5d308bff6b',
-        'info_dict': {
-            'id': '129100',
-            'ext': 'mp4',
-            'title': '[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene',
-            'creator': 'BTS+',
-            'view_count': int,
-            'subtitles': 'mincount:10',
-        },
-        'skip': 'This video is only available for CH+ subscribers',
-    }, {
-        'url': 'https://www.vlive.tv/embed/1326',
-        'only_matching': True,
-    }, {
-        # works only with gcc=KR
-        'url': 'https://www.vlive.tv/video/225019',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.vlive.tv/video/223906',
-        'info_dict': {
-            'id': '58',
-            'title': 'RUN BTS!'
-        },
-        'playlist_mincount': 120
-    }]
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        post = self._call_api(
-            'post/v1.0/officialVideoPost-%s', video_id,
-            'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId},playlist{playlistSeq,totalCount,name}')
-
-        playlist_id = str_or_none(try_get(post, lambda x: x['playlist']['playlistSeq']))
-        if not self._yes_playlist(playlist_id, video_id):
-            video = post['officialVideo']
-            return self._get_vlive_info(post, video, video_id)
-
-        playlist_name = str_or_none(try_get(post, lambda x: x['playlist']['name']))
-        playlist_count = str_or_none(try_get(post, lambda x: x['playlist']['totalCount']))
-
-        playlist = self._call_api(
-            'playlist/v1.0/playlist-%s/posts', playlist_id, 'data', {'limit': playlist_count})
-
-        entries = []
-        for video_data in playlist['data']:
-            video = video_data.get('officialVideo')
-            video_id = str_or_none(video.get('videoSeq'))
-            entries.append(self._get_vlive_info(video_data, video, video_id))
-
-        return self.playlist_result(entries, playlist_id, playlist_name)
-
-    def _get_vlive_info(self, post, video, video_id):
-        def get_common_fields():
-            channel = post.get('channel') or {}
-            return {
-                'title': video.get('title'),
-                'creator': post.get('author', {}).get('nickname'),
-                'channel': channel.get('channelName'),
-                'channel_id': channel.get('channelCode'),
-                'duration': int_or_none(video.get('playTime')),
-                'view_count': int_or_none(video.get('playCount')),
-                'like_count': int_or_none(video.get('likeCount')),
-                'comment_count': int_or_none(video.get('commentCount')),
-                'timestamp': int_or_none(video.get('createdAt'), scale=1000),
-                'release_timestamp': int_or_none(traverse_obj(video, 'onAirStartAt', 'willStartAt'), scale=1000),
-                'thumbnail': video.get('thumb'),
-            }
-
-        video_type = video.get('type')
-        if video_type == 'VOD':
-            inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
-            vod_id = video['vodId']
-            info_dict = merge_dicts(
-                get_common_fields(),
-                self._extract_video_info(video_id, vod_id, inkey))
-            thumbnail = video.get('thumb')
-            if thumbnail:
-                if not info_dict.get('thumbnails') and info_dict.get('thumbnail'):
-                    info_dict['thumbnails'] = [{'url': info_dict.pop('thumbnail')}]
-                info_dict.setdefault('thumbnails', []).append({'url': thumbnail, 'preference': 1})
-            return info_dict
-        elif video_type == 'LIVE':
-            status = video.get('status')
-            if status == 'ON_AIR':
-                stream_url = self._call_api(
-                    'old/v3/live/%s/playInfo',
-                    video_id)['result']['adaptiveStreamUrl']
-                formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
-                info = get_common_fields()
-                info.update({
-                    'title': video['title'],
-                    'id': video_id,
-                    'formats': formats,
-                    'is_live': True,
-                })
-                return info
-            elif status == 'ENDED':
-                raise ExtractorError(
-                    'Uploading for replay. Please wait...', expected=True)
-            elif status == 'RESERVED':
-                raise ExtractorError('Coming soon!', expected=True)
-            elif video.get('exposeStatus') == 'CANCEL':
-                raise ExtractorError(
-                    'We are sorry, but the live broadcast has been canceled.',
-                    expected=True)
-            else:
-                raise ExtractorError('Unknown status ' + status)
-
-
-class VLivePostIE(VLiveBaseIE):
-    IE_NAME = 'vlive:post'
-    _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
-    _TESTS = [{
-        # uploadType = SOS
-        'url': 'https://www.vlive.tv/post/1-20088044',
-        'info_dict': {
-            'id': '1-20088044',
-            'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
-            'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
-        },
-        'playlist_count': 3,
-    }, {
-        # uploadType = V
-        'url': 'https://www.vlive.tv/post/1-20087926',
-        'info_dict': {
-            'id': '1-20087926',
-            'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
-        },
-        'playlist_count': 1,
-    }]
-    _FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
-
-    def _real_extract(self, url):
-        post_id = self._match_id(url)
-
-        post = self._call_api(
-            'post/v1.0/post-%s', post_id,
-            'attachments{video},officialVideo{videoSeq},plainBody,title')
-
-        video_seq = str_or_none(try_get(
-            post, lambda x: x['officialVideo']['videoSeq']))
-        if video_seq:
-            return self.url_result(
-                'http://www.vlive.tv/video/' + video_seq,
-                VLiveIE.ie_key(), video_seq)
-
-        title = post['title']
-        entries = []
-        for idx, video in enumerate(post['attachments']['video'].values()):
-            video_id = video.get('videoId')
-            if not video_id:
-                continue
-            upload_type = video.get('uploadType')
-            upload_info = video.get('uploadInfo') or {}
-            entry = None
-            if upload_type == 'SOS':
-                download = self._call_api(
-                    self._FVIDEO_TMPL % 'sosPlayInfo', video_id)['videoUrl']['download']
-                formats = []
-                for f_id, f_url in download.items():
-                    formats.append({
-                        'format_id': f_id,
-                        'url': f_url,
-                        'height': int_or_none(f_id[:-1]),
-                    })
-                entry = {
-                    'formats': formats,
-                    'id': video_id,
-                    'thumbnail': upload_info.get('imageUrl'),
-                }
-            elif upload_type == 'V':
-                vod_id = upload_info.get('videoId')
-                if not vod_id:
-                    continue
-                inkey = self._call_api(self._FVIDEO_TMPL % 'inKey', video_id)['inKey']
-                entry = self._extract_video_info(video_id, vod_id, inkey)
-            if entry:
-                entry['title'] = '%s_part%s' % (title, idx)
-                entries.append(entry)
-        return self.playlist_result(
-            entries, post_id, title, strip_or_none(post.get('plainBody')))
-
-
-class VLiveChannelIE(VLiveBaseIE):
-    IE_NAME = 'vlive:channel'
-    _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<channel_id>[0-9A-Z]+)(?:/board/(?P<posts_id>\d+))?'
-    _TESTS = [{
-        'url': 'http://channels.vlive.tv/FCD4B',
-        'info_dict': {
-            'id': 'FCD4B',
-            'title': 'MAMAMOO',
-        },
-        'playlist_mincount': 110
-    }, {
-        'url': 'https://www.vlive.tv/channel/FCD4B',
-        'only_matching': True,
-    }, {
-        'url': 'https://www.vlive.tv/channel/FCD4B/board/3546',
-        'info_dict': {
-            'id': 'FCD4B-3546',
-            'title': 'MAMAMOO - Star Board',
-        },
-        'playlist_mincount': 880
-    }]
-
-    def _entries(self, posts_id, board_name):
-        if board_name:
-            posts_path = 'post/v1.0/board-%s/posts'
-            query_add = {'limit': 100, 'sortType': 'LATEST'}
-        else:
-            posts_path = 'post/v1.0/channel-%s/starPosts'
-            query_add = {'limit': 100}
-
-        for page_num in itertools.count(1):
-            video_list = self._call_api(
-                posts_path, posts_id, 'channel{channelName},contentType,postId,title,url', query_add,
-                note=f'Downloading playlist page {page_num}')
-
-            for video in try_get(video_list, lambda x: x['data'], list) or []:
-                video_id = str(video.get('postId'))
-                video_title = str_or_none(video.get('title'))
-                video_url = url_or_none(video.get('url'))
-                if not all((video_id, video_title, video_url)) or video.get('contentType') != 'VIDEO':
-                    continue
-                channel_name = try_get(video, lambda x: x['channel']['channelName'], compat_str)
-                yield self.url_result(video_url, VLivePostIE.ie_key(), video_id, video_title, channel=channel_name)
-
-            after = try_get(video_list, lambda x: x['paging']['nextParams']['after'], compat_str)
-            if not after:
-                break
-            query_add['after'] = after
-
-    def _real_extract(self, url):
-        channel_id, posts_id = self._match_valid_url(url).groups()
-
-        board_name = None
-        if posts_id:
-            board = self._call_api(
-                'board/v1.0/board-%s', posts_id, 'title,boardType')
-            board_name = board.get('title') or 'Unknown'
-            if board.get('boardType') not in ('STAR', 'VLIVE_PLUS'):
-                raise ExtractorError(f'Board {board_name!r} is not supported', expected=True)
-
-        entries = LazyList(self._entries(posts_id or channel_id, board_name))
-        channel_name = entries[0]['channel']
-
-        return self.playlist_result(
-            entries,
-            f'{channel_id}-{posts_id}' if posts_id else channel_id,
-            f'{channel_name} - {board_name}' if channel_name and board_name else channel_name)
author	sepro <redacted>
	Sun, 12 Feb 2023 04:47:03 +0000 (05:47 +0100)
committer	GitHub <redacted>
	Sun, 12 Feb 2023 04:47:03 +0000 (10:17 +0530)
yt_dlp/extractor/_extractors.py		patch \| blob \| blame \| history
yt_dlp/extractor/archiveorg.py		patch \| blob \| blame \| history
yt_dlp/extractor/naver.py		patch \| blob \| blame \| history
yt_dlp/extractor/vlive.py	[deleted file]	patch \| blob \| blame \| history