]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/idolplus.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / idolplus.py
1 from .common import InfoExtractor
2 from ..utils import traverse_obj, try_call, url_or_none
3
4
5 class IdolPlusIE(InfoExtractor):
6 _VALID_URL = r'https?://(?:www\.)?idolplus\.com/z[us]/(?:concert/|contents/?\?(?:[^#]+&)?albumId=)(?P<id>\w+)'
7 _TESTS = [{
8 'url': 'https://idolplus.com/zs/contents?albumId=M012077298PPV00',
9 'md5': '2ace3f4661c943a2f7e79f0b88cea1e7',
10 'info_dict': {
11 'id': 'M012077298PPV00',
12 'ext': 'mp4',
13 'title': '[MultiCam] Aegyo on Top of Aegyo (IZ*ONE EATING TRIP)',
14 'release_date': '20200707',
15 'formats': 'count:65',
16 },
17 'params': {'format': '532-KIM_MINJU'},
18 }, {
19 'url': 'https://idolplus.com/zs/contents?albumId=M01232H058PPV00&catId=E9TX5',
20 'info_dict': {
21 'id': 'M01232H058PPV00',
22 'ext': 'mp4',
23 'title': 'YENA (CIRCLE CHART MUSIC AWARDS 2022 RED CARPET)',
24 'release_date': '20230218',
25 'formats': 'count:5',
26 },
27 'params': {'skip_download': 'm3u8'},
28 }, {
29 # live stream
30 'url': 'https://idolplus.com/zu/contents?albumId=M012323174PPV00',
31 'info_dict': {
32 'id': 'M012323174PPV00',
33 'ext': 'mp4',
34 'title': 'Hanteo Music Awards 2022 DAY2',
35 'release_date': '20230211',
36 'formats': 'count:5',
37 },
38 'params': {'skip_download': 'm3u8'},
39 }, {
40 'url': 'https://idolplus.com/zs/concert/M012323039PPV00',
41 'info_dict': {
42 'id': 'M012323039PPV00',
43 'ext': 'mp4',
44 'title': 'CIRCLE CHART MUSIC AWARDS 2022',
45 'release_date': '20230218',
46 'formats': 'count:5',
47 },
48 'params': {'skip_download': 'm3u8'},
49 }]
50
51 def _real_extract(self, url):
52 video_id = self._match_id(url)
53 data_list = traverse_obj(self._download_json(
54 'https://idolplus.com/api/zs/viewdata/ruleset/build', video_id,
55 headers={'App_type': 'web', 'Country_Code': 'KR'}, query={
56 'rulesetId': 'contents',
57 'albumId': video_id,
58 'distribute': 'PRD',
59 'loggedIn': 'false',
60 'region': 'zs',
61 'countryGroup': '00010',
62 'lang': 'en',
63 'saId': '999999999998',
64 }), ('data', 'viewData', ...))
65
66 player_data = {}
67 while data_list:
68 player_data = data_list.pop()
69 if traverse_obj(player_data, 'type') == 'player':
70 break
71 elif traverse_obj(player_data, ('dataList', ...)):
72 data_list += player_data['dataList']
73
74 formats = self._extract_m3u8_formats(traverse_obj(player_data, (
75 'vodPlayerList', 'vodProfile', 0, 'vodServer', 0, 'video_url', {url_or_none})), video_id)
76
77 subtitles = {}
78 for caption in traverse_obj(player_data, ('vodPlayerList', 'caption')) or []:
79 subtitles.setdefault(caption.get('lang') or 'und', []).append({
80 'url': caption.get('smi_url'),
81 'ext': 'vtt',
82 })
83
84 # Add member multicams as alternative formats
85 if (traverse_obj(player_data, ('detail', 'has_cuesheet')) == 'Y'
86 and traverse_obj(player_data, ('detail', 'is_omni_member')) == 'Y'):
87 cuesheet = traverse_obj(self._download_json(
88 'https://idolplus.com/gapi/contents/v1.0/content/cuesheet', video_id,
89 'Downloading JSON metadata for member multicams',
90 headers={'App_type': 'web', 'Country_Code': 'KR'}, query={
91 'ALBUM_ID': video_id,
92 'COUNTRY_GRP': '00010',
93 'LANG': 'en',
94 'SA_ID': '999999999998',
95 'COUNTRY_CODE': 'KR',
96 }), ('data', 'cuesheet_item', 0))
97
98 for member in traverse_obj(cuesheet, ('members', ...)):
99 index = try_call(lambda: int(member['omni_view_index']) - 1)
100 member_video_url = traverse_obj(cuesheet, ('omni_view', index, 'cdn_url', 0, 'url', {url_or_none}))
101 if not member_video_url:
102 continue
103 member_formats = self._extract_m3u8_formats(
104 member_video_url, video_id, note=f'Downloading m3u8 for multicam {member["name"]}')
105 for mf in member_formats:
106 mf['format_id'] = f'{mf["format_id"]}-{member["name"].replace(" ", "_")}'
107 formats.extend(member_formats)
108
109 return {
110 'id': video_id,
111 'title': traverse_obj(player_data, ('detail', 'albumName')),
112 'formats': formats,
113 'subtitles': subtitles,
114 'release_date': traverse_obj(player_data, ('detail', 'broadcastDate')),
115 }