]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/niconico.py
[niconico] Rewrite NiconicoIE (#3018)
[yt-dlp.git] / yt_dlp / extractor / niconico.py
CommitLineData
dcdb292f 1# coding: utf-8
214c22c7 2from __future__ import unicode_literals
52ad14ae 3
fb198a8a 4import datetime
f494ddad 5import functools
7bdcb4a4 6import itertools
abafce59 7import json
8import re
7bdcb4a4 9import time
52ad14ae 10
abafce59 11from .common import InfoExtractor, SearchInfoExtractor
1cc79574 12from ..compat import (
bb139491 13 compat_parse_qs,
29f7c58a 14 compat_urllib_parse_urlparse,
f494ddad 15 compat_HTTPError,
1cc79574
PH
16)
17from ..utils import (
6110bbbf 18 ExtractorError,
7bdcb4a4
LNO
19 OnDemandPagedList,
20 bug_reports_message,
21 clean_html,
fb198a8a 22 float_or_none,
b868936c 23 int_or_none,
7bdcb4a4 24 join_nonempty,
1cc79574 25 parse_duration,
7bdcb4a4 26 parse_filesize,
bb865f3a 27 parse_iso8601,
ee6a6116 28 remove_start,
f494ddad 29 traverse_obj,
463e7216 30 try_get,
f494ddad 31 unescapeHTML,
7bdcb4a4
LNO
32 update_url_query,
33 url_or_none,
6e6bc8da 34 urlencode_postdata,
52ad14ae
TT
35)
36
13ebea79 37
52ad14ae 38class NiconicoIE(InfoExtractor):
214c22c7
JMF
39 IE_NAME = 'niconico'
40 IE_DESC = 'ニコニコ動画'
52ad14ae 41
1c9a1457 42 _TESTS = [{
214c22c7 43 'url': 'http://www.nicovideo.jp/watch/sm22312215',
7bdcb4a4 44 'md5': 'd1a75c0823e2f629128c43e1212760f9',
214c22c7
JMF
45 'info_dict': {
46 'id': 'sm22312215',
47 'ext': 'mp4',
48 'title': 'Big Buck Bunny',
463e7216 49 'thumbnail': r're:https?://.*',
214c22c7
JMF
50 'uploader': 'takuya0301',
51 'uploader_id': '2698420',
52 'upload_date': '20131123',
aaab8c5e 53 'timestamp': int, # timestamp is unstable
214c22c7 54 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
15ce1338 55 'duration': 33,
463e7216
YCH
56 'view_count': int,
57 'comment_count': int,
52ad14ae 58 },
8e4988f1 59 'skip': 'Requires an account',
1c9a1457 60 }, {
59d814f7
YCH
61 # File downloaded with and without credentials are different, so omit
62 # the md5 field
1c9a1457 63 'url': 'http://www.nicovideo.jp/watch/nm14296458',
1c9a1457
S
64 'info_dict': {
65 'id': 'nm14296458',
66 'ext': 'swf',
67 'title': '【鏡音リン】Dance on media【オリジナル】take2!',
bb865f3a 68 'description': 'md5:689f066d74610b3b22e0f1739add0f58',
463e7216 69 'thumbnail': r're:https?://.*',
1c9a1457
S
70 'uploader': 'りょうた',
71 'uploader_id': '18822557',
72 'upload_date': '20110429',
bb865f3a 73 'timestamp': 1304065916,
1c9a1457
S
74 'duration': 209,
75 },
8e4988f1 76 'skip': 'Requires an account',
bb865f3a
YCH
77 }, {
78 # 'video exists but is marked as "deleted"
b2e8e7da 79 # md5 is unstable
bb865f3a 80 'url': 'http://www.nicovideo.jp/watch/sm10000',
bb865f3a
YCH
81 'info_dict': {
82 'id': 'sm10000',
83 'ext': 'unknown_video',
84 'description': 'deleted',
85 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
463e7216 86 'thumbnail': r're:https?://.*',
b2e8e7da 87 'upload_date': '20071224',
8e4988f1 88 'timestamp': int, # timestamp field has different value if logged in
b2e8e7da 89 'duration': 304,
463e7216 90 'view_count': int,
bb865f3a 91 },
8e4988f1 92 'skip': 'Requires an account',
621ffe7b
YCH
93 }, {
94 'url': 'http://www.nicovideo.jp/watch/so22543406',
95 'info_dict': {
96 'id': '1388129933',
97 'ext': 'mp4',
98 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
99 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
463e7216 100 'thumbnail': r're:https?://.*',
621ffe7b
YCH
101 'timestamp': 1388851200,
102 'upload_date': '20140104',
103 'uploader': 'アニメロチャンネル',
104 'uploader_id': '312',
8e4988f1
YCH
105 },
106 'skip': 'The viewing period of the video you were searching for has expired.',
463e7216 107 }, {
ee6a6116 108 # video not available via `getflv`; "old" HTML5 video
463e7216 109 'url': 'http://www.nicovideo.jp/watch/sm1151009',
ee6a6116 110 'md5': '8fa81c364eb619d4085354eab075598a',
463e7216
YCH
111 'info_dict': {
112 'id': 'sm1151009',
ee6a6116 113 'ext': 'mp4',
463e7216
YCH
114 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
115 'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7',
116 'thumbnail': r're:https?://.*',
117 'duration': 184,
118 'timestamp': 1190868283,
119 'upload_date': '20070927',
120 'uploader': 'denden2',
121 'uploader_id': '1392194',
122 'view_count': int,
123 'comment_count': int,
124 },
125 'skip': 'Requires an account',
ee6a6116
YCH
126 }, {
127 # "New" HTML5 video
aaab8c5e 128 # md5 is unstable
ee6a6116 129 'url': 'http://www.nicovideo.jp/watch/sm31464864',
ee6a6116
YCH
130 'info_dict': {
131 'id': 'sm31464864',
132 'ext': 'mp4',
133 'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
134 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
135 'timestamp': 1498514060,
136 'upload_date': '20170626',
aaab8c5e 137 'uploader': 'ゲスト',
ee6a6116
YCH
138 'uploader_id': '40826363',
139 'thumbnail': r're:https?://.*',
140 'duration': 198,
141 'view_count': int,
142 'comment_count': int,
143 },
144 'skip': 'Requires an account',
aaab8c5e
PP
145 }, {
146 # Video without owner
147 'url': 'http://www.nicovideo.jp/watch/sm18238488',
148 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
149 'info_dict': {
150 'id': 'sm18238488',
151 'ext': 'mp4',
152 'title': '【実写版】ミュータントタートルズ',
153 'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
154 'timestamp': 1341160408,
155 'upload_date': '20120701',
156 'uploader': None,
157 'uploader_id': None,
158 'thumbnail': r're:https?://.*',
159 'duration': 5271,
160 'view_count': int,
161 'comment_count': int,
162 },
163 'skip': 'Requires an account',
4a87de72
LS
164 }, {
165 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
166 'only_matching': True,
7bdcb4a4
LNO
167 }, {
168 'note': 'a video that is only served as an ENCRYPTED HLS.',
169 'url': 'https://www.nicovideo.jp/watch/so38016254',
170 'only_matching': True,
1c9a1457 171 }]
52ad14ae 172
7bdcb4a4 173 _VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
52ad14ae 174 _NETRC_MACHINE = 'niconico'
7bdcb4a4
LNO
175 _COMMENT_API_ENDPOINTS = (
176 'https://nvcomment.nicovideo.jp/legacy/api.json',
177 'https://nmsg.nicovideo.jp/api.json',)
2291dbce 178 _API_HEADERS = {
179 'X-Frontend-ID': '6',
7bdcb4a4
LNO
180 'X-Frontend-Version': '0',
181 'X-Niconico-Language': 'en-us',
182 'Referer': 'https://www.nicovideo.jp/',
183 'Origin': 'https://www.nicovideo.jp',
2291dbce 184 }
185
52ad14ae 186 def _real_initialize(self):
23d83ad4 187 self._login()
52ad14ae
TT
188
189 def _login(self):
68217024 190 username, password = self._get_login_info()
23d83ad4
NJ
191 # No authentication to be performed
192 if not username:
193 return True
52ad14ae
TT
194
195 # Log in
bb139491 196 login_ok = True
52ad14ae 197 login_form_strs = {
bb139491 198 'mail_tel': username,
214c22c7 199 'password': password,
52ad14ae 200 }
7bdcb4a4
LNO
201 self._request_webpage(
202 'https://account.nicovideo.jp/login', None,
203 note='Acquiring Login session')
bb139491 204 urlh = self._request_webpage(
7bdcb4a4 205 'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
bb139491 206 note='Logging in', errnote='Unable to log in',
7bdcb4a4
LNO
207 data=urlencode_postdata(login_form_strs),
208 headers={
209 'Referer': 'https://account.nicovideo.jp/login',
210 'Content-Type': 'application/x-www-form-urlencoded',
211 })
bb139491
YCH
212 if urlh is False:
213 login_ok = False
214 else:
29f7c58a 215 parts = compat_urllib_parse_urlparse(urlh.geturl())
bb139491
YCH
216 if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
217 login_ok = False
218 if not login_ok:
6a39ee13 219 self.report_warning('unable to log in: bad username or password')
bb139491 220 return login_ok
52ad14ae 221
fb198a8a 222 def _get_heartbeat_info(self, info_dict):
fb198a8a 223 video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
7bdcb4a4 224 dmc_protocol = info_dict['_expected_protocol']
ee6a6116 225
2291dbce 226 api_data = (
227 info_dict.get('_api_data')
228 or self._parse_json(
229 self._html_search_regex(
230 'data-api-data="([^"]+)"',
231 self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
232 'API data', default='{}'),
233 video_id))
fb198a8a 234
7978e172 235 session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
fb198a8a 236 session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
237
2291dbce 238 def ping():
7bdcb4a4
LNO
239 tracking_id = traverse_obj(api_data, ('media', 'delivery', 'trackingId'))
240 if tracking_id:
241 tracking_url = update_url_query('https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', {'t': tracking_id})
242 watch_request_response = self._download_json(
243 tracking_url, video_id,
244 note='Acquiring permission for downloading video', fatal=False,
245 headers=self._API_HEADERS)
246 if traverse_obj(watch_request_response, ('meta', 'status')) != 200:
247 self.report_warning('Failed to acquire permission for playing video. Video download may fail.')
fb198a8a 248
249 yesno = lambda x: 'yes' if x else 'no'
250
7bdcb4a4
LNO
251 if dmc_protocol == 'http':
252 protocol = 'http'
253 protocol_parameters = {
254 'http_output_download_parameters': {
255 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
256 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
257 }
258 }
259 elif dmc_protocol == 'hls':
fb198a8a 260 protocol = 'm3u8'
7bdcb4a4
LNO
261 segment_duration = try_get(self._configuration_arg('segment_duration'), lambda x: int(x[0])) or 6000
262 parsed_token = self._parse_json(session_api_data['token'], video_id)
263 encryption = traverse_obj(api_data, ('media', 'delivery', 'encryption'))
264 protocol_parameters = {
265 'hls_parameters': {
266 'segment_duration': segment_duration,
267 'transfer_preset': '',
268 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
269 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
fb198a8a 270 }
271 }
7bdcb4a4
LNO
272 if 'hls_encryption' in parsed_token and encryption:
273 protocol_parameters['hls_parameters']['encryption'] = {
274 parsed_token['hls_encryption']: {
275 'encrypted_key': encryption['encryptedKey'],
276 'key_uri': encryption['keyUri'],
fb198a8a 277 }
278 }
7bdcb4a4
LNO
279 else:
280 protocol = 'm3u8_native'
281 else:
282 raise ExtractorError(f'Unsupported DMC protocol: {dmc_protocol}')
ee6a6116
YCH
283
284 session_response = self._download_json(
285 session_api_endpoint['url'], video_id,
286 query={'_format': 'json'},
287 headers={'Content-Type': 'application/json'},
fb198a8a 288 note='Downloading JSON metadata for %s' % info_dict['format_id'],
ee6a6116
YCH
289 data=json.dumps({
290 'session': {
291 'client_info': {
7978e172 292 'player_id': session_api_data.get('playerId'),
ee6a6116
YCH
293 },
294 'content_auth': {
7978e172 295 'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
296 'content_key_timeout': session_api_data.get('contentKeyTimeout'),
ee6a6116 297 'service_id': 'nicovideo',
7978e172 298 'service_user_id': session_api_data.get('serviceUserId')
ee6a6116 299 },
7978e172 300 'content_id': session_api_data.get('contentId'),
ee6a6116
YCH
301 'content_src_id_sets': [{
302 'content_src_ids': [{
303 'src_id_to_mux': {
fb198a8a 304 'audio_src_ids': [audio_src_id],
305 'video_src_ids': [video_src_id],
ee6a6116
YCH
306 }
307 }]
308 }],
309 'content_type': 'movie',
310 'content_uri': '',
311 'keep_method': {
312 'heartbeat': {
7978e172 313 'lifetime': session_api_data.get('heartbeatLifetime')
ee6a6116
YCH
314 }
315 },
7bdcb4a4 316 'priority': session_api_data['priority'],
ee6a6116
YCH
317 'protocol': {
318 'name': 'http',
319 'parameters': {
7bdcb4a4
LNO
320 'http_parameters': {
321 'parameters': protocol_parameters
322 }
ee6a6116
YCH
323 }
324 },
7978e172 325 'recipe_id': session_api_data.get('recipeId'),
ee6a6116
YCH
326 'session_operation_auth': {
327 'session_operation_auth_by_signature': {
fb198a8a 328 'signature': session_api_data.get('signature'),
329 'token': session_api_data.get('token'),
ee6a6116
YCH
330 }
331 },
332 'timing_constraint': 'unlimited'
333 }
4d59db5b 334 }).encode())
ee6a6116 335
fb198a8a 336 info_dict['url'] = session_response['data']['session']['content_uri']
337 info_dict['protocol'] = protocol
338
339 # get heartbeat info
340 heartbeat_info_dict = {
341 'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
342 'data': json.dumps(session_response['data']),
343 # interval, convert milliseconds to seconds, then halve to make a buffer.
2291dbce 344 'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
345 'ping': ping
fb198a8a 346 }
347
348 return info_dict, heartbeat_info_dict
349
7bdcb4a4
LNO
350 def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):
351
352 if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
353 return None
354
355 def extract_video_quality(video_quality):
356 return parse_filesize('%sB' % self._search_regex(
357 r'\| ([0-9]*\.?[0-9]*[MK])', video_quality, 'vbr', default=''))
358
359 format_id = '-'.join(
360 [remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
361
362 vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
363 vid_quality = traverse_obj(video_quality, ('metadata', 'bitrate'))
ee6a6116
YCH
364
365 return {
7bdcb4a4 366 'url': 'niconico_dmc:%s/%s/%s' % (video_id, video_quality['id'], audio_quality['id']),
ee6a6116 367 'format_id': format_id,
7bdcb4a4 368 'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '),
ee6a6116 369 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
7bdcb4a4
LNO
370 'acodec': 'aac',
371 'vcodec': 'h264',
372 'abr': float_or_none(traverse_obj(audio_quality, ('metadata', 'bitrate')), 1000),
373 'vbr': float_or_none(vid_quality if vid_quality > 0 else extract_video_quality(vid_qual_label), 1000),
374 'height': traverse_obj(video_quality, ('metadata', 'resolution', 'height')),
375 'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')),
376 'quality': -2 if 'low' in video_quality['id'] else None,
377 'protocol': 'niconico_dmc',
378 '_expected_protocol': dmc_protocol,
fb198a8a 379 'http_headers': {
380 'Origin': 'https://www.nicovideo.jp',
381 'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
382 }
ee6a6116
YCH
383 }
384
52ad14ae 385 def _real_extract(self, url):
937daef4 386 video_id = self._match_id(url)
52ad14ae 387
7bdcb4a4
LNO
388 try:
389 webpage, handle = self._download_webpage_handle(
390 'http://www.nicovideo.jp/watch/' + video_id, video_id)
391 if video_id.startswith('so'):
392 video_id = self._match_id(handle.geturl())
fb198a8a 393
7bdcb4a4
LNO
394 api_data = self._parse_json(self._html_search_regex(
395 'data-api-data="([^"]+)"', webpage,
396 'API data', default='{}'), video_id)
397 except ExtractorError as e:
398 try:
399 api_data = self._download_json(
400 'https://www.nicovideo.jp/api/watch/v3/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id,
401 note='Downloading API JSON', errnote='Unable to fetch data')['data']
402 except ExtractorError:
403 if not isinstance(e.cause, compat_HTTPError):
404 raise
405 webpage = e.cause.read().decode('utf-8', 'replace')
406 error_msg = self._html_search_regex(
407 r'(?s)<section\s+class="(?:(?:ErrorMessage|WatchExceptionPage-message)\s*)+">(.+?)</section>',
408 webpage, 'error reason', default=None)
409 if not error_msg:
410 raise
411 raise ExtractorError(re.sub(r'\s+', ' ', error_msg), expected=True)
7978e172 412
7bdcb4a4 413 formats = []
7978e172 414
7bdcb4a4
LNO
415 def get_video_info(*items, get_first=True, **kwargs):
416 return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
7978e172 417
7bdcb4a4
LNO
418 quality_info = api_data['media']['delivery']['movie']
419 session_api_data = quality_info['session']
420 for (audio_quality, video_quality, protocol) in itertools.product(quality_info['audios'], quality_info['videos'], session_api_data['protocols']):
421 fmt = self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol)
422 if fmt:
423 formats.append(fmt)
fb198a8a 424
fb198a8a 425 self._sort_formats(formats)
ee6a6116 426
52ad14ae 427 # Start extracting information
7bdcb4a4
LNO
428 tags = None
429 if webpage:
430 # use og:video:tag (not logged in)
431 og_video_tags = re.finditer(r'<meta\s+property="og:video:tag"\s*content="(.*?)">', webpage)
432 tags = list(filter(None, (clean_html(x.group(1)) for x in og_video_tags)))
433 if not tags:
434 # use keywords and split with comma (not logged in)
435 kwds = self._html_search_meta('keywords', webpage, default=None)
436 if kwds:
437 tags = [x for x in kwds.split(',') if x]
438 if not tags:
439 # find in json (logged in)
440 tags = traverse_obj(api_data, ('tag', 'items', ..., 'name'))
52ad14ae 441
b2e8e7da 442 return {
214c22c7 443 'id': video_id,
2291dbce 444 '_api_data': api_data,
7bdcb4a4 445 'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
ee6a6116 446 'formats': formats,
7bdcb4a4
LNO
447 'thumbnail': get_video_info('thumbnail', 'url') or self._html_search_meta(
448 ('image', 'og:image'), webpage, 'thumbnail', default=None),
449 'description': clean_html(get_video_info('description')),
450 'uploader': traverse_obj(api_data, ('owner', 'nickname')),
451 'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601(
452 self._html_search_meta('video:release_date', webpage, 'date published', default=None)),
453 'uploader_id': traverse_obj(api_data, ('owner', 'id')),
454 'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')),
455 'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')),
456 'view_count': int_or_none(get_video_info('count', 'view')),
457 'tags': tags,
458 'genre': traverse_obj(api_data, ('genre', 'label'), ('genre', 'key')),
459 'comment_count': get_video_info('count', 'comment', expected_type=int),
460 'duration': (
461 parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
462 or get_video_info('duration')),
463 'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
464 'subtitles': self.extract_subtitles(video_id, api_data, session_api_data),
52ad14ae 465 }
a9bad429 466
7bdcb4a4
LNO
467 def _get_subtitles(self, video_id, api_data, session_api_data):
468 comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey'))
469 user_id_str = session_api_data.get('serviceUserId')
470
471 thread_ids = [x for x in traverse_obj(api_data, ('comment', 'threads')) or [] if x['isActive']]
472 raw_danmaku = self._extract_all_comments(video_id, thread_ids, user_id_str, comment_user_key)
473 if not raw_danmaku:
474 self.report_warning(f'Failed to get comments. {bug_reports_message()}')
475 return
476 return {
477 'comments': [{
478 'ext': 'json',
479 'data': json.dumps(raw_danmaku),
480 }],
481 }
482
483 def _extract_all_comments(self, video_id, threads, user_id, user_key):
484 auth_data = {
485 'user_id': user_id,
486 'userkey': user_key,
487 } if user_id and user_key else {'user_id': ''}
488
489 # Request Start
490 post_data = [{'ping': {'content': 'rs:0'}}]
491 for i, thread in enumerate(threads):
492 thread_id = thread['id']
493 thread_fork = thread['fork']
494 # Post Start (2N)
495 post_data.append({'ping': {'content': f'ps:{i * 2}'}})
496 post_data.append({'thread': {
497 'fork': thread_fork,
498 'language': 0,
499 'nicoru': 3,
500 'scores': 1,
501 'thread': thread_id,
502 'version': '20090904',
503 'with_global': 1,
504 **auth_data,
505 }})
506 # Post Final (2N)
507 post_data.append({'ping': {'content': f'pf:{i * 2}'}})
508
509 # Post Start (2N+1)
510 post_data.append({'ping': {'content': f'ps:{i * 2 + 1}'}})
511 post_data.append({'thread_leaves': {
512 # format is '<bottom of minute range>-<top of minute range>:<comments per minute>,<total last comments'
513 # unfortunately NND limits (deletes?) comment returns this way, so you're only able to grab the last 1000 per language
514 'content': '0-999999:999999,999999,nicoru:999999',
515 'fork': thread_fork,
516 'language': 0,
517 'nicoru': 3,
518 'scores': 1,
519 'thread': thread_id,
520 **auth_data,
521 }})
522 # Post Final (2N+1)
523 post_data.append({'ping': {'content': f'pf:{i * 2 + 1}'}})
524 # Request Final
525 post_data.append({'ping': {'content': 'rf:0'}})
526
527 for api_url in self._COMMENT_API_ENDPOINTS:
528 comments = self._download_json(
529 api_url, video_id, data=json.dumps(post_data).encode(), fatal=False,
530 headers={
531 'Referer': 'https://www.nicovideo.jp/watch/%s' % video_id,
532 'Origin': 'https://www.nicovideo.jp',
533 'Content-Type': 'text/plain;charset=UTF-8',
534 },
535 note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
536 if comments:
537 return comments
538
a9bad429 539
f494ddad
LNO
540class NiconicoPlaylistBaseIE(InfoExtractor):
541 _PAGE_SIZE = 100
542
543 _API_HEADERS = {
544 'X-Frontend-ID': '6',
545 'X-Frontend-Version': '0',
546 'X-Niconico-Language': 'en-us'
547 }
548
549 def _call_api(self, list_id, resource, query):
550 "Implement this in child class"
551 pass
552
553 @staticmethod
554 def _parse_owner(item):
555 return {
556 'uploader': traverse_obj(item, ('owner', 'name')),
557 'uploader_id': traverse_obj(item, ('owner', 'id')),
558 }
559
560 def _fetch_page(self, list_id, page):
561 page += 1
562 resp = self._call_api(list_id, 'page %d' % page, {
563 'page': page,
564 'pageSize': self._PAGE_SIZE,
565 })
566 # this is needed to support both mylist and user
567 for video in traverse_obj(resp, ('items', ..., ('video', None))) or []:
568 video_id = video.get('id')
569 if not video_id:
570 # skip {"video": {"id": "blablabla", ...}}
571 continue
572 count = video.get('count') or {}
573 get_count = lambda x: int_or_none(count.get(x))
574 yield {
575 '_type': 'url',
576 'id': video_id,
577 'title': video.get('title'),
578 'url': f'https://www.nicovideo.jp/watch/{video_id}',
579 'description': video.get('shortDescription'),
580 'duration': int_or_none(video.get('duration')),
581 'view_count': get_count('view'),
582 'comment_count': get_count('comment'),
583 'thumbnail': traverse_obj(video, ('thumbnail', ('nHdUrl', 'largeUrl', 'listingUrl', 'url'))),
584 'ie_key': NiconicoIE.ie_key(),
585 **self._parse_owner(video),
586 }
587
588 def _entries(self, list_id):
589 return OnDemandPagedList(functools.partial(self._fetch_page, list_id), self._PAGE_SIZE)
590
591
592class NiconicoPlaylistIE(NiconicoPlaylistBaseIE):
593 IE_NAME = 'niconico:playlist'
594 _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/(?:user/\d+/)?(?:my/)?mylist/(?:#/)?(?P<id>\d+)'
a9bad429 595
29f7c58a 596 _TESTS = [{
a9bad429
JMF
597 'url': 'http://www.nicovideo.jp/mylist/27411728',
598 'info_dict': {
599 'id': '27411728',
600 'title': 'AKB48のオールナイトニッポン',
29f7c58a 601 'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
602 'uploader': 'のっく',
603 'uploader_id': '805442',
a9bad429 604 },
f494ddad 605 'playlist_mincount': 291,
29f7c58a 606 }, {
607 'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
608 'only_matching': True,
f494ddad
LNO
609 }, {
610 'url': 'https://www.nicovideo.jp/my/mylist/#/68048635',
611 'only_matching': True,
29f7c58a 612 }]
a9bad429 613
f494ddad
LNO
614 def _call_api(self, list_id, resource, query):
615 return self._download_json(
616 f'https://nvapi.nicovideo.jp/v2/mylists/{list_id}', list_id,
617 f'Downloading {resource}', query=query,
618 headers=self._API_HEADERS)['data']['mylist']
2291dbce 619
a9bad429
JMF
620 def _real_extract(self, url):
621 list_id = self._match_id(url)
f494ddad
LNO
622 mylist = self._call_api(list_id, 'list', {
623 'pageSize': 1,
624 })
625 return self.playlist_result(
626 self._entries(list_id), list_id,
627 mylist.get('name'), mylist.get('description'), **self._parse_owner(mylist))
fb198a8a 628
fb198a8a 629
f494ddad
LNO
630class NiconicoSeriesIE(InfoExtractor):
631 IE_NAME = 'niconico:series'
632 _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P<id>\d+)'
633
634 _TESTS = [{
635 'url': 'https://www.nicovideo.jp/series/110226',
636 'info_dict': {
637 'id': '110226',
638 'title': 'ご立派ァ!のシリーズ',
639 },
640 'playlist_mincount': 10, # as of 2021/03/17
641 }, {
642 'url': 'https://www.nicovideo.jp/series/12312/',
643 'info_dict': {
644 'id': '12312',
645 'title': 'バトルスピリッツ お勧めカード紹介(調整中)',
646 },
647 'playlist_mincount': 97, # as of 2021/03/17
648 }, {
649 'url': 'https://nico.ms/series/203559',
650 'only_matching': True,
651 }]
652
653 def _real_extract(self, url):
654 list_id = self._match_id(url)
655 webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id)
656
657 title = self._search_regex(
658 (r'<title>「(.+)(全',
659 r'<div class="TwitterShareButton"\s+data-text="(.+)\s+https:'),
660 webpage, 'title', fatal=False)
661 if title:
662 title = unescapeHTML(title)
663 playlist = [
664 self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
665 for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)]
666 return self.playlist_result(playlist, list_id, title)
667
668
669class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
670 IE_NAME = 'niconico:history'
671 IE_DESC = 'NicoNico user history. Requires cookies.'
672 _VALID_URL = r'https?://(?:www\.|sp\.)?nicovideo\.jp/my/history'
673
674 _TESTS = [{
675 'note': 'PC page, with /video',
676 'url': 'https://www.nicovideo.jp/my/history/video',
677 'only_matching': True,
678 }, {
679 'note': 'PC page, without /video',
680 'url': 'https://www.nicovideo.jp/my/history',
681 'only_matching': True,
682 }, {
683 'note': 'mobile page, with /video',
684 'url': 'https://sp.nicovideo.jp/my/history/video',
685 'only_matching': True,
686 }, {
687 'note': 'mobile page, without /video',
688 'url': 'https://sp.nicovideo.jp/my/history',
689 'only_matching': True,
690 }]
691
692 def _call_api(self, list_id, resource, query):
693 return self._download_json(
694 'https://nvapi.nicovideo.jp/v1/users/me/watch/history', 'history',
695 f'Downloading {resource}', query=query,
696 headers=self._API_HEADERS)['data']
697
698 def _real_extract(self, url):
699 list_id = 'history'
700 try:
701 mylist = self._call_api(list_id, 'list', {
702 'pageSize': 1,
703 })
704 except ExtractorError as e:
705 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
706 self.raise_login_required('You have to be logged in to get your watch history')
707 raise
708 return self.playlist_result(self._entries(list_id), list_id, **self._parse_owner(mylist))
c1d3a4a8 709
710
63ccf4ff 711class NicovideoSearchBaseIE(InfoExtractor):
9a5b0125
LNO
712 _SEARCH_TYPE = 'search'
713
63ccf4ff 714 def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
715 query = query or {}
716 pages = [query['page']] if 'page' in query else itertools.count(1)
717 for page_num in pages:
718 query['page'] = str(page_num)
719 webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
720 results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage)
721 for item in results:
722 yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item)
723 if not results:
724 break
725
146cc411 726 def _search_results(self, query):
727 return self._entries(
9a5b0125 728 self._proto_relative_url(f'//www.nicovideo.jp/{self._SEARCH_TYPE}/{query}'), query)
146cc411 729
63ccf4ff 730
731class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
732 IE_DESC = 'Nico video search'
733 IE_NAME = 'nicovideo:search'
734 _SEARCH_KEY = 'nicosearch'
abafce59 735
63ccf4ff 736
737class NicovideoSearchURLIE(NicovideoSearchBaseIE):
738 IE_NAME = f'{NicovideoSearchIE.IE_NAME}_url'
abafce59 739 IE_DESC = 'Nico video search URLs'
740 _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?'
741 _TESTS = [{
742 'url': 'http://www.nicovideo.jp/search/sm9',
743 'info_dict': {
744 'id': 'sm9',
745 'title': 'sm9'
746 },
747 'playlist_mincount': 40,
748 }, {
749 'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01',
750 'info_dict': {
751 'id': 'sm9',
752 'title': 'sm9'
753 },
754 'playlist_count': 31,
755 }]
756
abafce59 757 def _real_extract(self, url):
758 query = self._match_id(url)
759 return self.playlist_result(self._entries(url, query), query, query)
760
761
63ccf4ff 762class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
f304da8a 763 IE_DESC = 'Nico video search, newest first'
63ccf4ff 764 IE_NAME = f'{NicovideoSearchIE.IE_NAME}:date'
abafce59 765 _SEARCH_KEY = 'nicosearchdate'
766 _TESTS = [{
767 'url': 'nicosearchdateall:a',
768 'info_dict': {
769 'id': 'a',
770 'title': 'a'
771 },
772 'playlist_mincount': 1610,
773 }]
774
775 _START_DATE = datetime.date(2007, 1, 1)
776 _RESULTS_PER_PAGE = 32
777 _MAX_PAGES = 50
778
779 def _entries(self, url, item_id, start_date=None, end_date=None):
780 start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
781
782 # If the last page has a full page of videos, we need to break down the query interval further
783 last_page_len = len(list(self._get_entries_for_date(
784 url, item_id, start_date, end_date, self._MAX_PAGES,
785 note=f'Checking number of videos from {start_date} to {end_date}')))
786 if (last_page_len == self._RESULTS_PER_PAGE and start_date != end_date):
787 midpoint = start_date + ((end_date - start_date) // 2)
788 yield from self._entries(url, item_id, midpoint, end_date)
789 yield from self._entries(url, item_id, start_date, midpoint)
790 else:
791 self.to_screen(f'{item_id}: Downloading results from {start_date} to {end_date}')
792 yield from self._get_entries_for_date(
793 url, item_id, start_date, end_date, note=' Downloading page %(page)s')
794
795 def _get_entries_for_date(self, url, item_id, start_date, end_date=None, page_num=None, note=None):
796 query = {
797 'start': str(start_date),
798 'end': str(end_date or start_date),
799 'sort': 'f',
800 'order': 'd',
801 }
802 if page_num:
803 query['page'] = str(page_num)
804
63ccf4ff 805 yield from super()._entries(url, item_id, query=query, note=note)
abafce59 806
807
9a5b0125
LNO
808class NicovideoTagURLIE(NicovideoSearchBaseIE):
809 IE_NAME = 'niconico:tag'
810 IE_DESC = 'NicoNico video tag URLs'
811 _SEARCH_TYPE = 'tag'
812 _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/tag/(?P<id>[^?#&]+)?'
813 _TESTS = [{
814 'url': 'https://www.nicovideo.jp/tag/ドキュメンタリー淫夢',
815 'info_dict': {
816 'id': 'ドキュメンタリー淫夢',
817 'title': 'ドキュメンタリー淫夢'
818 },
819 'playlist_mincount': 400,
820 }]
821
822 def _real_extract(self, url):
823 query = self._match_id(url)
824 return self.playlist_result(self._entries(url, query), query, query)
825
826
c1d3a4a8 827class NiconicoUserIE(InfoExtractor):
828 _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
829 _TEST = {
830 'url': 'https://www.nicovideo.jp/user/419948',
831 'info_dict': {
832 'id': '419948',
833 },
834 'playlist_mincount': 101,
835 }
836 _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
2291dbce 837 _PAGE_SIZE = 100
838
839 _API_HEADERS = {
c1d3a4a8 840 'X-Frontend-ID': '6',
2291dbce 841 'X-Frontend-Version': '0'
c1d3a4a8 842 }
c1d3a4a8 843
abafce59 844 def _entries(self, list_id):
c1d3a4a8 845 total_count = 1
846 count = page_num = 0
847 while count < total_count:
848 json_parsed = self._download_json(
849 self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
2291dbce 850 headers=self._API_HEADERS,
c1d3a4a8 851 note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
852 if not page_num:
853 total_count = int_or_none(json_parsed['data'].get('totalCount'))
854 for entry in json_parsed["data"]["items"]:
855 count += 1
856 yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
857 page_num += 1
858
859 def _real_extract(self, url):
860 list_id = self._match_id(url)
861 return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())