]>
Commit | Line | Data |
---|---|---|
a281beba | 1 | import base64 |
2 | import hashlib | |
3 | import hmac | |
14c3a980 | 4 | import itertools |
a281beba | 5 | import json |
c88debff | 6 | import re |
a281beba | 7 | import time |
e897bd82 | 8 | import urllib.parse |
c88debff | 9 | |
6b95b065 | 10 | from .common import InfoExtractor |
1cc79574 | 11 | from ..utils import ( |
14c3a980 | 12 | ExtractorError, |
c88debff | 13 | dict_get, |
b02b960c | 14 | int_or_none, |
c0caa805 | 15 | join_nonempty, |
14c3a980 | 16 | merge_dicts, |
a281beba | 17 | parse_iso8601, |
14c3a980 | 18 | traverse_obj, |
c88debff | 19 | try_get, |
14c3a980 | 20 | unified_timestamp, |
b02b960c | 21 | update_url_query, |
a281beba | 22 | url_or_none, |
6b95b065 JMF |
23 | ) |
24 | ||
25 | ||
c88debff RA |
26 | class NaverBaseIE(InfoExtractor): |
27 | _CAPTION_EXT_RE = r'\.(?:ttml|vtt)' | |
190f6c93 | 28 | |
9652bca1 | 29 | @staticmethod # NB: Used in WeverseIE |
b3eaab7c | 30 | def process_subtitles(vod_data, process_url): |
31 | ret = {'subtitles': {}, 'automatic_captions': {}} | |
32 | for caption in traverse_obj(vod_data, ('captions', 'list', ...)): | |
33 | caption_url = caption.get('source') | |
34 | if not caption_url: | |
35 | continue | |
36 | type_ = 'automatic_captions' if caption.get('type') == 'auto' else 'subtitles' | |
37 | lang = caption.get('locale') or join_nonempty('language', 'country', from_dict=caption) or 'und' | |
38 | if caption.get('type') == 'fan': | |
add96eb9 | 39 | lang += '_fan{}'.format(next(i for i in itertools.count(1) if f'{lang}_fan{i}' not in ret[type_])) |
b3eaab7c | 40 | ret[type_].setdefault(lang, []).extend({ |
41 | 'url': sub_url, | |
42 | 'name': join_nonempty('label', 'fanName', from_dict=caption, delim=' - '), | |
43 | } for sub_url in process_url(caption_url)) | |
44 | return ret | |
45 | ||
c88debff | 46 | def _extract_video_info(self, video_id, vid, key): |
f65dc41b | 47 | video_data = self._download_json( |
190f6c93 | 48 | 'http://play.rmcnmv.naver.com/vod/play/v2.0/' + vid, |
f65dc41b | 49 | video_id, query={ |
c88debff | 50 | 'key': key, |
f65dc41b | 51 | }) |
b02b960c RA |
52 | meta = video_data['meta'] |
53 | title = meta['subject'] | |
6b95b065 | 54 | formats = [] |
c88debff | 55 | get_list = lambda x: try_get(video_data, lambda y: y[x + 's']['list'], list) or [] |
b02b960c RA |
56 | |
57 | def extract_formats(streams, stream_type, query={}): | |
58 | for stream in streams: | |
59 | stream_url = stream.get('source') | |
60 | if not stream_url: | |
61 | continue | |
62 | stream_url = update_url_query(stream_url, query) | |
63 | encoding_option = stream.get('encodingOption', {}) | |
64 | bitrate = stream.get('bitrate', {}) | |
65 | formats.append({ | |
add96eb9 | 66 | 'format_id': '{}_{}'.format(stream.get('type') or stream_type, dict_get(encoding_option, ('name', 'id'))), |
b02b960c | 67 | 'url': stream_url, |
652fb0d4 | 68 | 'ext': 'mp4', |
b02b960c RA |
69 | 'width': int_or_none(encoding_option.get('width')), |
70 | 'height': int_or_none(encoding_option.get('height')), | |
71 | 'vbr': int_or_none(bitrate.get('video')), | |
72 | 'abr': int_or_none(bitrate.get('audio')), | |
73 | 'filesize': int_or_none(stream.get('size')), | |
74 | 'protocol': 'm3u8_native' if stream_type == 'HLS' else None, | |
087ca2cb | 75 | }) |
b02b960c | 76 | |
c88debff | 77 | extract_formats(get_list('video'), 'H264') |
b02b960c RA |
78 | for stream_set in video_data.get('streams', []): |
79 | query = {} | |
80 | for param in stream_set.get('keys', []): | |
81 | query[param['name']] = param['value'] | |
82 | stream_type = stream_set.get('type') | |
83 | videos = stream_set.get('videos') | |
84 | if videos: | |
85 | extract_formats(videos, stream_type, query) | |
86 | elif stream_type == 'HLS': | |
87 | stream_url = stream_set.get('source') | |
88 | if not stream_url: | |
89 | continue | |
90 | formats.extend(self._extract_m3u8_formats( | |
91 | update_url_query(stream_url, query), video_id, | |
92 | 'mp4', 'm3u8_native', m3u8_id=stream_type, fatal=False)) | |
6b95b065 | 93 | |
c88debff RA |
94 | replace_ext = lambda x, y: re.sub(self._CAPTION_EXT_RE, '.' + y, x) |
95 | ||
96 | def get_subs(caption_url): | |
97 | if re.search(self._CAPTION_EXT_RE, caption_url): | |
c0caa805 | 98 | return [ |
99 | replace_ext(caption_url, 'ttml'), | |
100 | replace_ext(caption_url, 'vtt'), | |
101 | ] | |
102 | return [caption_url] | |
c88debff | 103 | |
c88debff | 104 | user = meta.get('user', {}) |
f65dc41b | 105 | |
fb7abb31 | 106 | return { |
6b95b065 | 107 | 'id': video_id, |
b02b960c | 108 | 'title': title, |
6b95b065 | 109 | 'formats': formats, |
c88debff | 110 | 'thumbnail': try_get(meta, lambda x: x['cover']['source']), |
b02b960c | 111 | 'view_count': int_or_none(meta.get('count')), |
c88debff RA |
112 | 'uploader_id': user.get('id'), |
113 | 'uploader': user.get('name'), | |
114 | 'uploader_url': user.get('url'), | |
b3eaab7c | 115 | **self.process_subtitles(video_data, get_subs), |
6b95b065 | 116 | } |
c88debff | 117 | |
a281beba | 118 | def _call_api(self, path, video_id): |
119 | api_endpoint = f'https://apis.naver.com/now_web2/now_web_api/v1{path}' | |
120 | key = b'nbxvs5nwNG9QKEWK0ADjYA4JZoujF4gHcIwvoCxFTPAeamq5eemvt5IWAYXxrbYM' | |
121 | msgpad = int(time.time() * 1000) | |
122 | md = base64.b64encode(hmac.HMAC( | |
123 | key, f'{api_endpoint[:255]}{msgpad}'.encode(), digestmod=hashlib.sha1).digest()).decode() | |
124 | ||
125 | return self._download_json(api_endpoint, video_id=video_id, headers=self.geo_verification_headers(), query={ | |
126 | 'msgpad': msgpad, | |
127 | 'md': md, | |
128 | })['result'] | |
129 | ||
c88debff RA |
130 | |
131 | class NaverIE(NaverBaseIE): | |
132 | _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/(?:v|embed)/(?P<id>\d+)' | |
133 | _GEO_BYPASS = False | |
134 | _TESTS = [{ | |
135 | 'url': 'http://tv.naver.com/v/81652', | |
136 | 'info_dict': { | |
137 | 'id': '81652', | |
138 | 'ext': 'mp4', | |
139 | 'title': '[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번', | |
140 | 'description': '메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.', | |
83817163 | 141 | 'timestamp': 1378200754, |
c88debff RA |
142 | 'upload_date': '20130903', |
143 | 'uploader': '메가스터디, 합격불변의 법칙', | |
144 | 'uploader_id': 'megastudy', | |
a281beba | 145 | 'uploader_url': 'https://tv.naver.com/megastudy', |
146 | 'view_count': int, | |
147 | 'like_count': int, | |
148 | 'comment_count': int, | |
149 | 'duration': 2118, | |
150 | 'thumbnail': r're:^https?://.*\.jpg', | |
c88debff RA |
151 | }, |
152 | }, { | |
153 | 'url': 'http://tv.naver.com/v/395837', | |
a281beba | 154 | 'md5': '7791205fa89dbed2f5e3eb16d287ff05', |
c88debff RA |
155 | 'info_dict': { |
156 | 'id': '395837', | |
157 | 'ext': 'mp4', | |
158 | 'title': '9년이 지나도 아픈 기억, 전효성의 아버지', | |
a281beba | 159 | 'description': 'md5:c76be23e21403a6473d8119678cdb5cb', |
83817163 | 160 | 'timestamp': 1432030253, |
c88debff | 161 | 'upload_date': '20150519', |
a281beba | 162 | 'uploader': '4가지쇼', |
163 | 'uploader_id': '4show', | |
164 | 'uploader_url': 'https://tv.naver.com/4show', | |
165 | 'view_count': int, | |
166 | 'like_count': int, | |
167 | 'comment_count': int, | |
168 | 'duration': 277, | |
169 | 'thumbnail': r're:^https?://.*\.jpg', | |
c88debff | 170 | }, |
c88debff RA |
171 | }, { |
172 | 'url': 'http://tvcast.naver.com/v/81652', | |
173 | 'only_matching': True, | |
174 | }] | |
175 | ||
176 | def _real_extract(self, url): | |
177 | video_id = self._match_id(url) | |
a281beba | 178 | data = self._call_api(f'/clips/{video_id}/play-info', video_id) |
c88debff | 179 | |
a281beba | 180 | vid = traverse_obj(data, ('clip', 'videoId', {str})) |
181 | in_key = traverse_obj(data, ('play', 'inKey', {str})) | |
c88debff RA |
182 | |
183 | if not vid or not in_key: | |
a281beba | 184 | raise ExtractorError('Unable to extract video info') |
185 | ||
c88debff | 186 | info = self._extract_video_info(video_id, vid, in_key) |
a281beba | 187 | info.update(traverse_obj(data, ('clip', { |
188 | 'title': 'title', | |
189 | 'description': 'description', | |
190 | 'timestamp': ('firstExposureDatetime', {parse_iso8601}), | |
191 | 'duration': ('playTime', {int_or_none}), | |
192 | 'like_count': ('likeItCount', {int_or_none}), | |
193 | 'view_count': ('playCount', {int_or_none}), | |
194 | 'comment_count': ('commentCount', {int_or_none}), | |
195 | 'thumbnail': ('thumbnailImageUrl', {url_or_none}), | |
196 | 'uploader': 'channelName', | |
197 | 'uploader_id': 'channelId', | |
198 | 'uploader_url': ('channelUrl', {url_or_none}), | |
199 | 'age_limit': ('adultVideo', {lambda x: 19 if x else None}), | |
200 | }))) | |
c88debff | 201 | return info |
217e5173 S |
202 | |
203 | ||
a281beba | 204 | class NaverLiveIE(NaverBaseIE): |
217e5173 S |
205 | IE_NAME = 'Naver:live' |
206 | _VALID_URL = r'https?://(?:m\.)?tv(?:cast)?\.naver\.com/l/(?P<id>\d+)' | |
207 | _GEO_BYPASS = False | |
208 | _TESTS = [{ | |
a281beba | 209 | 'url': 'https://tv.naver.com/l/127062', |
217e5173 | 210 | 'info_dict': { |
a281beba | 211 | 'id': '127062', |
652fb0d4 | 212 | 'ext': 'mp4', |
a281beba | 213 | 'live_status': 'is_live', |
214 | 'channel': '뉴스는 YTN', | |
215 | 'channel_id': 'ytnnews24', | |
216 | 'title': 're:^대한민국 24시간 뉴스 채널 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
217 | 'description': 'md5:f938b5956711beab6f882314ffadf4d5', | |
218 | 'start_time': 1677752280, | |
219 | 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)', | |
220 | 'like_count': int, | |
217e5173 S |
221 | }, |
222 | }, { | |
a281beba | 223 | 'url': 'https://tv.naver.com/l/140535', |
217e5173 | 224 | 'info_dict': { |
a281beba | 225 | 'id': '140535', |
652fb0d4 | 226 | 'ext': 'mp4', |
a281beba | 227 | 'live_status': 'is_live', |
228 | 'channel': 'KBS뉴스', | |
229 | 'channel_id': 'kbsnews', | |
230 | 'start_time': 1696867320, | |
231 | 'title': 're:^언제 어디서나! KBS 뉴스 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
232 | 'description': 'md5:6ad419c0bf2f332829bda3f79c295284', | |
233 | 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)', | |
234 | 'like_count': int, | |
217e5173 S |
235 | }, |
236 | }, { | |
237 | 'url': 'https://tv.naver.com/l/54887', | |
238 | 'only_matching': True, | |
239 | }] | |
240 | ||
241 | def _real_extract(self, url): | |
242 | video_id = self._match_id(url) | |
a281beba | 243 | data = self._call_api(f'/live-end/normal/{video_id}/play-info?renewLastPlayDate=true', video_id) |
217e5173 | 244 | |
a281beba | 245 | status = traverse_obj(data, ('live', 'liveStatus')) |
217e5173 S |
246 | if status == 'CLOSED': |
247 | raise ExtractorError('Stream is offline.', expected=True) | |
248 | elif status != 'OPENED': | |
a281beba | 249 | raise ExtractorError(f'Unknown status {status!r}') |
217e5173 S |
250 | |
251 | return { | |
252 | 'id': video_id, | |
a281beba | 253 | 'formats': self._extract_m3u8_formats( |
254 | traverse_obj(data, ('playbackBody', {json.loads}, 'media', 0, 'path')), video_id, live=True), | |
255 | **traverse_obj(data, ('live', { | |
256 | 'title': 'title', | |
257 | 'channel': 'channelName', | |
258 | 'channel_id': 'channelId', | |
259 | 'description': 'description', | |
260 | 'like_count': (('likeCount', 'likeItCount'), {int_or_none}), | |
261 | 'thumbnail': ('thumbnailImageUrl', {url_or_none}), | |
262 | 'start_time': (('startTime', 'startDateTime', 'startYmdt'), {parse_iso8601}), | |
263 | }), get_all=False), | |
add96eb9 | 264 | 'is_live': True, |
217e5173 | 265 | } |
14c3a980 | 266 | |
267 | ||
268 | class NaverNowIE(NaverBaseIE): | |
269 | IE_NAME = 'navernow' | |
d761dfd0 | 270 | _VALID_URL = r'https?://now\.naver\.com/s/now\.(?P<id>\w+)' |
bfbb5a1b | 271 | _API_URL = 'https://apis.naver.com/now_web/oldnow_web/v4' |
14c3a980 | 272 | _TESTS = [{ |
bfbb5a1b | 273 | 'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay=', |
14c3a980 | 274 | 'md5': 'e05854162c21c221481de16b2944a0bc', |
275 | 'info_dict': { | |
bfbb5a1b | 276 | 'id': '4759-26331132', |
14c3a980 | 277 | 'title': '아이키X노제\r\n💖꽁냥꽁냥💖(1)', |
278 | 'ext': 'mp4', | |
279 | 'thumbnail': r're:^https?://.*\.jpg', | |
280 | 'timestamp': 1650369600, | |
281 | 'upload_date': '20220419', | |
282 | 'uploader_id': 'now', | |
283 | 'view_count': int, | |
bfbb5a1b | 284 | 'uploader_url': 'https://now.naver.com/show/4759', |
285 | 'uploader': '아이키의 떰즈업', | |
14c3a980 | 286 | }, |
287 | 'params': { | |
288 | 'noplaylist': True, | |
add96eb9 | 289 | }, |
14c3a980 | 290 | }, { |
bfbb5a1b | 291 | 'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=', |
14c3a980 | 292 | 'md5': '9f6118e398aa0f22b2152f554ea7851b', |
293 | 'info_dict': { | |
bfbb5a1b | 294 | 'id': '4759-26601461', |
14c3a980 | 295 | 'title': '아이키: 나 리정한테 흔들렸어,,, 질투 폭발하는 노제 여보😾 [아이키의 떰즈업]ㅣ네이버 NOW.', |
296 | 'ext': 'mp4', | |
297 | 'thumbnail': r're:^https?://.*\.jpg', | |
298 | 'upload_date': '20220504', | |
bfbb5a1b | 299 | 'timestamp': 1651648311, |
14c3a980 | 300 | 'uploader_id': 'now', |
301 | 'view_count': int, | |
bfbb5a1b | 302 | 'uploader_url': 'https://now.naver.com/show/4759', |
303 | 'uploader': '아이키의 떰즈업', | |
14c3a980 | 304 | }, |
305 | 'params': { | |
306 | 'noplaylist': True, | |
307 | }, | |
308 | }, { | |
bfbb5a1b | 309 | 'url': 'https://now.naver.com/s/now.4759', |
14c3a980 | 310 | 'info_dict': { |
311 | 'id': '4759', | |
312 | 'title': '아이키의 떰즈업', | |
313 | }, | |
add96eb9 | 314 | 'playlist_mincount': 101, |
14c3a980 | 315 | }, { |
bfbb5a1b | 316 | 'url': 'https://now.naver.com/s/now.4759?shareReplayId=26331132#replay', |
14c3a980 | 317 | 'info_dict': { |
318 | 'id': '4759', | |
319 | 'title': '아이키의 떰즈업', | |
320 | }, | |
bfbb5a1b | 321 | 'playlist_mincount': 101, |
14c3a980 | 322 | }, { |
bfbb5a1b | 323 | 'url': 'https://now.naver.com/s/now.4759?shareHightlight=26601461#highlight=', |
14c3a980 | 324 | 'info_dict': { |
325 | 'id': '4759', | |
326 | 'title': '아이키의 떰즈업', | |
327 | }, | |
bfbb5a1b | 328 | 'playlist_mincount': 101, |
d761dfd0 | 329 | }, { |
330 | 'url': 'https://now.naver.com/s/now.kihyunplay?shareReplayId=30573291#replay', | |
331 | 'only_matching': True, | |
14c3a980 | 332 | }] |
333 | ||
334 | def _extract_replay(self, show_id, replay_id): | |
bfbb5a1b | 335 | vod_info = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}', replay_id) |
336 | in_key = self._download_json(f'{self._API_URL}/shows/now.{show_id}/vod/{replay_id}/inkey', replay_id)['inKey'] | |
14c3a980 | 337 | return merge_dicts({ |
338 | 'id': f'{show_id}-{replay_id}', | |
339 | 'title': traverse_obj(vod_info, ('episode', 'title')), | |
340 | 'timestamp': unified_timestamp(traverse_obj(vod_info, ('episode', 'start_time'))), | |
341 | 'thumbnail': vod_info.get('thumbnail_image_url'), | |
342 | }, self._extract_video_info(replay_id, vod_info['video_id'], in_key)) | |
343 | ||
344 | def _extract_show_replays(self, show_id): | |
bfbb5a1b | 345 | page_size = 15 |
346 | page = 1 | |
14c3a980 | 347 | while True: |
348 | show_vod_info = self._download_json( | |
bfbb5a1b | 349 | f'{self._API_URL}/vod-shows/now.{show_id}', show_id, |
350 | query={'page': page, 'page_size': page_size}, | |
add96eb9 | 351 | note=f'Downloading JSON vod list for show {show_id} - page {page}', |
14c3a980 | 352 | )['response']['result'] |
353 | for v in show_vod_info.get('vod_list') or []: | |
354 | yield self._extract_replay(show_id, v['id']) | |
355 | ||
bfbb5a1b | 356 | if len(show_vod_info.get('vod_list') or []) < page_size: |
14c3a980 | 357 | break |
358 | page += 1 | |
359 | ||
360 | def _extract_show_highlights(self, show_id, highlight_id=None): | |
bfbb5a1b | 361 | page_size = 10 |
362 | page = 1 | |
14c3a980 | 363 | while True: |
364 | highlights_videos = self._download_json( | |
bfbb5a1b | 365 | f'{self._API_URL}/shows/now.{show_id}/highlights/videos/', show_id, |
366 | query={'page': page, 'page_size': page_size}, | |
14c3a980 | 367 | note=f'Downloading JSON highlights for show {show_id} - page {page}') |
368 | ||
369 | for highlight in highlights_videos.get('results') or []: | |
bfbb5a1b | 370 | if highlight_id and highlight.get('clip_no') != int(highlight_id): |
14c3a980 | 371 | continue |
372 | yield merge_dicts({ | |
bfbb5a1b | 373 | 'id': f'{show_id}-{highlight["clip_no"]}', |
14c3a980 | 374 | 'title': highlight.get('title'), |
375 | 'timestamp': unified_timestamp(highlight.get('regdate')), | |
376 | 'thumbnail': highlight.get('thumbnail_url'), | |
bfbb5a1b | 377 | }, self._extract_video_info(highlight['clip_no'], highlight['video_id'], highlight['video_inkey'])) |
14c3a980 | 378 | |
bfbb5a1b | 379 | if len(highlights_videos.get('results') or []) < page_size: |
14c3a980 | 380 | break |
381 | page += 1 | |
382 | ||
383 | def _extract_highlight(self, show_id, highlight_id): | |
384 | try: | |
385 | return next(self._extract_show_highlights(show_id, highlight_id)) | |
386 | except StopIteration: | |
387 | raise ExtractorError(f'Unable to find highlight {highlight_id} for show {show_id}') | |
388 | ||
389 | def _real_extract(self, url): | |
390 | show_id = self._match_id(url) | |
e3a3ed8a | 391 | qs = urllib.parse.parse_qs(urllib.parse.urlparse(url).query) |
14c3a980 | 392 | |
393 | if not self._yes_playlist(show_id, qs.get('shareHightlight')): | |
394 | return self._extract_highlight(show_id, qs['shareHightlight'][0]) | |
395 | elif not self._yes_playlist(show_id, qs.get('shareReplayId')): | |
396 | return self._extract_replay(show_id, qs['shareReplayId'][0]) | |
397 | ||
398 | show_info = self._download_json( | |
bfbb5a1b | 399 | f'{self._API_URL}/shows/now.{show_id}/', show_id, |
14c3a980 | 400 | note=f'Downloading JSON vod list for show {show_id}') |
401 | ||
402 | return self.playlist_result( | |
403 | itertools.chain(self._extract_show_replays(show_id), self._extract_show_highlights(show_id)), | |
404 | show_id, show_info.get('title')) |