]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/niconico.py
[niconico] Rewrite NiconicoIE (#3018)
[yt-dlp.git] / yt_dlp / extractor / niconico.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import datetime
5 import functools
6 import itertools
7 import json
8 import re
9 import time
10
11 from .common import InfoExtractor, SearchInfoExtractor
12 from ..compat import (
13 compat_parse_qs,
14 compat_urllib_parse_urlparse,
15 compat_HTTPError,
16 )
17 from ..utils import (
18 ExtractorError,
19 OnDemandPagedList,
20 bug_reports_message,
21 clean_html,
22 float_or_none,
23 int_or_none,
24 join_nonempty,
25 parse_duration,
26 parse_filesize,
27 parse_iso8601,
28 remove_start,
29 traverse_obj,
30 try_get,
31 unescapeHTML,
32 update_url_query,
33 url_or_none,
34 urlencode_postdata,
35 )
36
37
38 class NiconicoIE(InfoExtractor):
39 IE_NAME = 'niconico'
40 IE_DESC = 'ニコニコ動画'
41
42 _TESTS = [{
43 'url': 'http://www.nicovideo.jp/watch/sm22312215',
44 'md5': 'd1a75c0823e2f629128c43e1212760f9',
45 'info_dict': {
46 'id': 'sm22312215',
47 'ext': 'mp4',
48 'title': 'Big Buck Bunny',
49 'thumbnail': r're:https?://.*',
50 'uploader': 'takuya0301',
51 'uploader_id': '2698420',
52 'upload_date': '20131123',
53 'timestamp': int, # timestamp is unstable
54 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
55 'duration': 33,
56 'view_count': int,
57 'comment_count': int,
58 },
59 'skip': 'Requires an account',
60 }, {
61 # File downloaded with and without credentials are different, so omit
62 # the md5 field
63 'url': 'http://www.nicovideo.jp/watch/nm14296458',
64 'info_dict': {
65 'id': 'nm14296458',
66 'ext': 'swf',
67 'title': '【鏡音リン】Dance on media【オリジナル】take2!',
68 'description': 'md5:689f066d74610b3b22e0f1739add0f58',
69 'thumbnail': r're:https?://.*',
70 'uploader': 'りょうた',
71 'uploader_id': '18822557',
72 'upload_date': '20110429',
73 'timestamp': 1304065916,
74 'duration': 209,
75 },
76 'skip': 'Requires an account',
77 }, {
78 # 'video exists but is marked as "deleted"
79 # md5 is unstable
80 'url': 'http://www.nicovideo.jp/watch/sm10000',
81 'info_dict': {
82 'id': 'sm10000',
83 'ext': 'unknown_video',
84 'description': 'deleted',
85 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
86 'thumbnail': r're:https?://.*',
87 'upload_date': '20071224',
88 'timestamp': int, # timestamp field has different value if logged in
89 'duration': 304,
90 'view_count': int,
91 },
92 'skip': 'Requires an account',
93 }, {
94 'url': 'http://www.nicovideo.jp/watch/so22543406',
95 'info_dict': {
96 'id': '1388129933',
97 'ext': 'mp4',
98 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
99 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
100 'thumbnail': r're:https?://.*',
101 'timestamp': 1388851200,
102 'upload_date': '20140104',
103 'uploader': 'アニメロチャンネル',
104 'uploader_id': '312',
105 },
106 'skip': 'The viewing period of the video you were searching for has expired.',
107 }, {
108 # video not available via `getflv`; "old" HTML5 video
109 'url': 'http://www.nicovideo.jp/watch/sm1151009',
110 'md5': '8fa81c364eb619d4085354eab075598a',
111 'info_dict': {
112 'id': 'sm1151009',
113 'ext': 'mp4',
114 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
115 'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7',
116 'thumbnail': r're:https?://.*',
117 'duration': 184,
118 'timestamp': 1190868283,
119 'upload_date': '20070927',
120 'uploader': 'denden2',
121 'uploader_id': '1392194',
122 'view_count': int,
123 'comment_count': int,
124 },
125 'skip': 'Requires an account',
126 }, {
127 # "New" HTML5 video
128 # md5 is unstable
129 'url': 'http://www.nicovideo.jp/watch/sm31464864',
130 'info_dict': {
131 'id': 'sm31464864',
132 'ext': 'mp4',
133 'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
134 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
135 'timestamp': 1498514060,
136 'upload_date': '20170626',
137 'uploader': 'ゲスト',
138 'uploader_id': '40826363',
139 'thumbnail': r're:https?://.*',
140 'duration': 198,
141 'view_count': int,
142 'comment_count': int,
143 },
144 'skip': 'Requires an account',
145 }, {
146 # Video without owner
147 'url': 'http://www.nicovideo.jp/watch/sm18238488',
148 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
149 'info_dict': {
150 'id': 'sm18238488',
151 'ext': 'mp4',
152 'title': '【実写版】ミュータントタートルズ',
153 'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
154 'timestamp': 1341160408,
155 'upload_date': '20120701',
156 'uploader': None,
157 'uploader_id': None,
158 'thumbnail': r're:https?://.*',
159 'duration': 5271,
160 'view_count': int,
161 'comment_count': int,
162 },
163 'skip': 'Requires an account',
164 }, {
165 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
166 'only_matching': True,
167 }, {
168 'note': 'a video that is only served as an ENCRYPTED HLS.',
169 'url': 'https://www.nicovideo.jp/watch/so38016254',
170 'only_matching': True,
171 }]
172
173 _VALID_URL = r'https?://(?:(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch|nico\.ms)/(?P<id>(?:[a-z]{2})?[0-9]+)'
174 _NETRC_MACHINE = 'niconico'
175 _COMMENT_API_ENDPOINTS = (
176 'https://nvcomment.nicovideo.jp/legacy/api.json',
177 'https://nmsg.nicovideo.jp/api.json',)
178 _API_HEADERS = {
179 'X-Frontend-ID': '6',
180 'X-Frontend-Version': '0',
181 'X-Niconico-Language': 'en-us',
182 'Referer': 'https://www.nicovideo.jp/',
183 'Origin': 'https://www.nicovideo.jp',
184 }
185
186 def _real_initialize(self):
187 self._login()
188
189 def _login(self):
190 username, password = self._get_login_info()
191 # No authentication to be performed
192 if not username:
193 return True
194
195 # Log in
196 login_ok = True
197 login_form_strs = {
198 'mail_tel': username,
199 'password': password,
200 }
201 self._request_webpage(
202 'https://account.nicovideo.jp/login', None,
203 note='Acquiring Login session')
204 urlh = self._request_webpage(
205 'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
206 note='Logging in', errnote='Unable to log in',
207 data=urlencode_postdata(login_form_strs),
208 headers={
209 'Referer': 'https://account.nicovideo.jp/login',
210 'Content-Type': 'application/x-www-form-urlencoded',
211 })
212 if urlh is False:
213 login_ok = False
214 else:
215 parts = compat_urllib_parse_urlparse(urlh.geturl())
216 if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
217 login_ok = False
218 if not login_ok:
219 self.report_warning('unable to log in: bad username or password')
220 return login_ok
221
222 def _get_heartbeat_info(self, info_dict):
223 video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
224 dmc_protocol = info_dict['_expected_protocol']
225
226 api_data = (
227 info_dict.get('_api_data')
228 or self._parse_json(
229 self._html_search_regex(
230 'data-api-data="([^"]+)"',
231 self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
232 'API data', default='{}'),
233 video_id))
234
235 session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
236 session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
237
238 def ping():
239 tracking_id = traverse_obj(api_data, ('media', 'delivery', 'trackingId'))
240 if tracking_id:
241 tracking_url = update_url_query('https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', {'t': tracking_id})
242 watch_request_response = self._download_json(
243 tracking_url, video_id,
244 note='Acquiring permission for downloading video', fatal=False,
245 headers=self._API_HEADERS)
246 if traverse_obj(watch_request_response, ('meta', 'status')) != 200:
247 self.report_warning('Failed to acquire permission for playing video. Video download may fail.')
248
249 yesno = lambda x: 'yes' if x else 'no'
250
251 if dmc_protocol == 'http':
252 protocol = 'http'
253 protocol_parameters = {
254 'http_output_download_parameters': {
255 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
256 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
257 }
258 }
259 elif dmc_protocol == 'hls':
260 protocol = 'm3u8'
261 segment_duration = try_get(self._configuration_arg('segment_duration'), lambda x: int(x[0])) or 6000
262 parsed_token = self._parse_json(session_api_data['token'], video_id)
263 encryption = traverse_obj(api_data, ('media', 'delivery', 'encryption'))
264 protocol_parameters = {
265 'hls_parameters': {
266 'segment_duration': segment_duration,
267 'transfer_preset': '',
268 'use_ssl': yesno(session_api_data['urls'][0]['isSsl']),
269 'use_well_known_port': yesno(session_api_data['urls'][0]['isWellKnownPort']),
270 }
271 }
272 if 'hls_encryption' in parsed_token and encryption:
273 protocol_parameters['hls_parameters']['encryption'] = {
274 parsed_token['hls_encryption']: {
275 'encrypted_key': encryption['encryptedKey'],
276 'key_uri': encryption['keyUri'],
277 }
278 }
279 else:
280 protocol = 'm3u8_native'
281 else:
282 raise ExtractorError(f'Unsupported DMC protocol: {dmc_protocol}')
283
284 session_response = self._download_json(
285 session_api_endpoint['url'], video_id,
286 query={'_format': 'json'},
287 headers={'Content-Type': 'application/json'},
288 note='Downloading JSON metadata for %s' % info_dict['format_id'],
289 data=json.dumps({
290 'session': {
291 'client_info': {
292 'player_id': session_api_data.get('playerId'),
293 },
294 'content_auth': {
295 'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
296 'content_key_timeout': session_api_data.get('contentKeyTimeout'),
297 'service_id': 'nicovideo',
298 'service_user_id': session_api_data.get('serviceUserId')
299 },
300 'content_id': session_api_data.get('contentId'),
301 'content_src_id_sets': [{
302 'content_src_ids': [{
303 'src_id_to_mux': {
304 'audio_src_ids': [audio_src_id],
305 'video_src_ids': [video_src_id],
306 }
307 }]
308 }],
309 'content_type': 'movie',
310 'content_uri': '',
311 'keep_method': {
312 'heartbeat': {
313 'lifetime': session_api_data.get('heartbeatLifetime')
314 }
315 },
316 'priority': session_api_data['priority'],
317 'protocol': {
318 'name': 'http',
319 'parameters': {
320 'http_parameters': {
321 'parameters': protocol_parameters
322 }
323 }
324 },
325 'recipe_id': session_api_data.get('recipeId'),
326 'session_operation_auth': {
327 'session_operation_auth_by_signature': {
328 'signature': session_api_data.get('signature'),
329 'token': session_api_data.get('token'),
330 }
331 },
332 'timing_constraint': 'unlimited'
333 }
334 }).encode())
335
336 info_dict['url'] = session_response['data']['session']['content_uri']
337 info_dict['protocol'] = protocol
338
339 # get heartbeat info
340 heartbeat_info_dict = {
341 'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
342 'data': json.dumps(session_response['data']),
343 # interval, convert milliseconds to seconds, then halve to make a buffer.
344 'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
345 'ping': ping
346 }
347
348 return info_dict, heartbeat_info_dict
349
350 def _extract_format_for_quality(self, video_id, audio_quality, video_quality, dmc_protocol):
351
352 if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
353 return None
354
355 def extract_video_quality(video_quality):
356 return parse_filesize('%sB' % self._search_regex(
357 r'\| ([0-9]*\.?[0-9]*[MK])', video_quality, 'vbr', default=''))
358
359 format_id = '-'.join(
360 [remove_start(s['id'], 'archive_') for s in (video_quality, audio_quality)] + [dmc_protocol])
361
362 vid_qual_label = traverse_obj(video_quality, ('metadata', 'label'))
363 vid_quality = traverse_obj(video_quality, ('metadata', 'bitrate'))
364
365 return {
366 'url': 'niconico_dmc:%s/%s/%s' % (video_id, video_quality['id'], audio_quality['id']),
367 'format_id': format_id,
368 'format_note': join_nonempty('DMC', vid_qual_label, dmc_protocol.upper(), delim=' '),
369 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
370 'acodec': 'aac',
371 'vcodec': 'h264',
372 'abr': float_or_none(traverse_obj(audio_quality, ('metadata', 'bitrate')), 1000),
373 'vbr': float_or_none(vid_quality if vid_quality > 0 else extract_video_quality(vid_qual_label), 1000),
374 'height': traverse_obj(video_quality, ('metadata', 'resolution', 'height')),
375 'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')),
376 'quality': -2 if 'low' in video_quality['id'] else None,
377 'protocol': 'niconico_dmc',
378 '_expected_protocol': dmc_protocol,
379 'http_headers': {
380 'Origin': 'https://www.nicovideo.jp',
381 'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
382 }
383 }
384
385 def _real_extract(self, url):
386 video_id = self._match_id(url)
387
388 try:
389 webpage, handle = self._download_webpage_handle(
390 'http://www.nicovideo.jp/watch/' + video_id, video_id)
391 if video_id.startswith('so'):
392 video_id = self._match_id(handle.geturl())
393
394 api_data = self._parse_json(self._html_search_regex(
395 'data-api-data="([^"]+)"', webpage,
396 'API data', default='{}'), video_id)
397 except ExtractorError as e:
398 try:
399 api_data = self._download_json(
400 'https://www.nicovideo.jp/api/watch/v3/%s?_frontendId=6&_frontendVersion=0&actionTrackId=AAAAAAAAAA_%d' % (video_id, round(time.time() * 1000)), video_id,
401 note='Downloading API JSON', errnote='Unable to fetch data')['data']
402 except ExtractorError:
403 if not isinstance(e.cause, compat_HTTPError):
404 raise
405 webpage = e.cause.read().decode('utf-8', 'replace')
406 error_msg = self._html_search_regex(
407 r'(?s)<section\s+class="(?:(?:ErrorMessage|WatchExceptionPage-message)\s*)+">(.+?)</section>',
408 webpage, 'error reason', default=None)
409 if not error_msg:
410 raise
411 raise ExtractorError(re.sub(r'\s+', ' ', error_msg), expected=True)
412
413 formats = []
414
415 def get_video_info(*items, get_first=True, **kwargs):
416 return traverse_obj(api_data, ('video', *items), get_all=not get_first, **kwargs)
417
418 quality_info = api_data['media']['delivery']['movie']
419 session_api_data = quality_info['session']
420 for (audio_quality, video_quality, protocol) in itertools.product(quality_info['audios'], quality_info['videos'], session_api_data['protocols']):
421 fmt = self._extract_format_for_quality(video_id, audio_quality, video_quality, protocol)
422 if fmt:
423 formats.append(fmt)
424
425 self._sort_formats(formats)
426
427 # Start extracting information
428 tags = None
429 if webpage:
430 # use og:video:tag (not logged in)
431 og_video_tags = re.finditer(r'<meta\s+property="og:video:tag"\s*content="(.*?)">', webpage)
432 tags = list(filter(None, (clean_html(x.group(1)) for x in og_video_tags)))
433 if not tags:
434 # use keywords and split with comma (not logged in)
435 kwds = self._html_search_meta('keywords', webpage, default=None)
436 if kwds:
437 tags = [x for x in kwds.split(',') if x]
438 if not tags:
439 # find in json (logged in)
440 tags = traverse_obj(api_data, ('tag', 'items', ..., 'name'))
441
442 return {
443 'id': video_id,
444 '_api_data': api_data,
445 'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
446 'formats': formats,
447 'thumbnail': get_video_info('thumbnail', 'url') or self._html_search_meta(
448 ('image', 'og:image'), webpage, 'thumbnail', default=None),
449 'description': clean_html(get_video_info('description')),
450 'uploader': traverse_obj(api_data, ('owner', 'nickname')),
451 'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601(
452 self._html_search_meta('video:release_date', webpage, 'date published', default=None)),
453 'uploader_id': traverse_obj(api_data, ('owner', 'id')),
454 'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')),
455 'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')),
456 'view_count': int_or_none(get_video_info('count', 'view')),
457 'tags': tags,
458 'genre': traverse_obj(api_data, ('genre', 'label'), ('genre', 'key')),
459 'comment_count': get_video_info('count', 'comment', expected_type=int),
460 'duration': (
461 parse_duration(self._html_search_meta('video:duration', webpage, 'video duration', default=None))
462 or get_video_info('duration')),
463 'webpage_url': url_or_none(url) or f'https://www.nicovideo.jp/watch/{video_id}',
464 'subtitles': self.extract_subtitles(video_id, api_data, session_api_data),
465 }
466
467 def _get_subtitles(self, video_id, api_data, session_api_data):
468 comment_user_key = traverse_obj(api_data, ('comment', 'keys', 'userKey'))
469 user_id_str = session_api_data.get('serviceUserId')
470
471 thread_ids = [x for x in traverse_obj(api_data, ('comment', 'threads')) or [] if x['isActive']]
472 raw_danmaku = self._extract_all_comments(video_id, thread_ids, user_id_str, comment_user_key)
473 if not raw_danmaku:
474 self.report_warning(f'Failed to get comments. {bug_reports_message()}')
475 return
476 return {
477 'comments': [{
478 'ext': 'json',
479 'data': json.dumps(raw_danmaku),
480 }],
481 }
482
483 def _extract_all_comments(self, video_id, threads, user_id, user_key):
484 auth_data = {
485 'user_id': user_id,
486 'userkey': user_key,
487 } if user_id and user_key else {'user_id': ''}
488
489 # Request Start
490 post_data = [{'ping': {'content': 'rs:0'}}]
491 for i, thread in enumerate(threads):
492 thread_id = thread['id']
493 thread_fork = thread['fork']
494 # Post Start (2N)
495 post_data.append({'ping': {'content': f'ps:{i * 2}'}})
496 post_data.append({'thread': {
497 'fork': thread_fork,
498 'language': 0,
499 'nicoru': 3,
500 'scores': 1,
501 'thread': thread_id,
502 'version': '20090904',
503 'with_global': 1,
504 **auth_data,
505 }})
506 # Post Final (2N)
507 post_data.append({'ping': {'content': f'pf:{i * 2}'}})
508
509 # Post Start (2N+1)
510 post_data.append({'ping': {'content': f'ps:{i * 2 + 1}'}})
511 post_data.append({'thread_leaves': {
512 # format is '<bottom of minute range>-<top of minute range>:<comments per minute>,<total last comments'
513 # unfortunately NND limits (deletes?) comment returns this way, so you're only able to grab the last 1000 per language
514 'content': '0-999999:999999,999999,nicoru:999999',
515 'fork': thread_fork,
516 'language': 0,
517 'nicoru': 3,
518 'scores': 1,
519 'thread': thread_id,
520 **auth_data,
521 }})
522 # Post Final (2N+1)
523 post_data.append({'ping': {'content': f'pf:{i * 2 + 1}'}})
524 # Request Final
525 post_data.append({'ping': {'content': 'rf:0'}})
526
527 for api_url in self._COMMENT_API_ENDPOINTS:
528 comments = self._download_json(
529 api_url, video_id, data=json.dumps(post_data).encode(), fatal=False,
530 headers={
531 'Referer': 'https://www.nicovideo.jp/watch/%s' % video_id,
532 'Origin': 'https://www.nicovideo.jp',
533 'Content-Type': 'text/plain;charset=UTF-8',
534 },
535 note='Downloading comments', errnote=f'Failed to access endpoint {api_url}')
536 if comments:
537 return comments
538
539
540 class NiconicoPlaylistBaseIE(InfoExtractor):
541 _PAGE_SIZE = 100
542
543 _API_HEADERS = {
544 'X-Frontend-ID': '6',
545 'X-Frontend-Version': '0',
546 'X-Niconico-Language': 'en-us'
547 }
548
549 def _call_api(self, list_id, resource, query):
550 "Implement this in child class"
551 pass
552
553 @staticmethod
554 def _parse_owner(item):
555 return {
556 'uploader': traverse_obj(item, ('owner', 'name')),
557 'uploader_id': traverse_obj(item, ('owner', 'id')),
558 }
559
560 def _fetch_page(self, list_id, page):
561 page += 1
562 resp = self._call_api(list_id, 'page %d' % page, {
563 'page': page,
564 'pageSize': self._PAGE_SIZE,
565 })
566 # this is needed to support both mylist and user
567 for video in traverse_obj(resp, ('items', ..., ('video', None))) or []:
568 video_id = video.get('id')
569 if not video_id:
570 # skip {"video": {"id": "blablabla", ...}}
571 continue
572 count = video.get('count') or {}
573 get_count = lambda x: int_or_none(count.get(x))
574 yield {
575 '_type': 'url',
576 'id': video_id,
577 'title': video.get('title'),
578 'url': f'https://www.nicovideo.jp/watch/{video_id}',
579 'description': video.get('shortDescription'),
580 'duration': int_or_none(video.get('duration')),
581 'view_count': get_count('view'),
582 'comment_count': get_count('comment'),
583 'thumbnail': traverse_obj(video, ('thumbnail', ('nHdUrl', 'largeUrl', 'listingUrl', 'url'))),
584 'ie_key': NiconicoIE.ie_key(),
585 **self._parse_owner(video),
586 }
587
588 def _entries(self, list_id):
589 return OnDemandPagedList(functools.partial(self._fetch_page, list_id), self._PAGE_SIZE)
590
591
592 class NiconicoPlaylistIE(NiconicoPlaylistBaseIE):
593 IE_NAME = 'niconico:playlist'
594 _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/(?:user/\d+/)?(?:my/)?mylist/(?:#/)?(?P<id>\d+)'
595
596 _TESTS = [{
597 'url': 'http://www.nicovideo.jp/mylist/27411728',
598 'info_dict': {
599 'id': '27411728',
600 'title': 'AKB48のオールナイトニッポン',
601 'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
602 'uploader': 'のっく',
603 'uploader_id': '805442',
604 },
605 'playlist_mincount': 291,
606 }, {
607 'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
608 'only_matching': True,
609 }, {
610 'url': 'https://www.nicovideo.jp/my/mylist/#/68048635',
611 'only_matching': True,
612 }]
613
614 def _call_api(self, list_id, resource, query):
615 return self._download_json(
616 f'https://nvapi.nicovideo.jp/v2/mylists/{list_id}', list_id,
617 f'Downloading {resource}', query=query,
618 headers=self._API_HEADERS)['data']['mylist']
619
620 def _real_extract(self, url):
621 list_id = self._match_id(url)
622 mylist = self._call_api(list_id, 'list', {
623 'pageSize': 1,
624 })
625 return self.playlist_result(
626 self._entries(list_id), list_id,
627 mylist.get('name'), mylist.get('description'), **self._parse_owner(mylist))
628
629
630 class NiconicoSeriesIE(InfoExtractor):
631 IE_NAME = 'niconico:series'
632 _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P<id>\d+)'
633
634 _TESTS = [{
635 'url': 'https://www.nicovideo.jp/series/110226',
636 'info_dict': {
637 'id': '110226',
638 'title': 'ご立派ァ!のシリーズ',
639 },
640 'playlist_mincount': 10, # as of 2021/03/17
641 }, {
642 'url': 'https://www.nicovideo.jp/series/12312/',
643 'info_dict': {
644 'id': '12312',
645 'title': 'バトルスピリッツ お勧めカード紹介(調整中)',
646 },
647 'playlist_mincount': 97, # as of 2021/03/17
648 }, {
649 'url': 'https://nico.ms/series/203559',
650 'only_matching': True,
651 }]
652
653 def _real_extract(self, url):
654 list_id = self._match_id(url)
655 webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id)
656
657 title = self._search_regex(
658 (r'<title>「(.+)(全',
659 r'<div class="TwitterShareButton"\s+data-text="(.+)\s+https:'),
660 webpage, 'title', fatal=False)
661 if title:
662 title = unescapeHTML(title)
663 playlist = [
664 self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
665 for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)]
666 return self.playlist_result(playlist, list_id, title)
667
668
669 class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
670 IE_NAME = 'niconico:history'
671 IE_DESC = 'NicoNico user history. Requires cookies.'
672 _VALID_URL = r'https?://(?:www\.|sp\.)?nicovideo\.jp/my/history'
673
674 _TESTS = [{
675 'note': 'PC page, with /video',
676 'url': 'https://www.nicovideo.jp/my/history/video',
677 'only_matching': True,
678 }, {
679 'note': 'PC page, without /video',
680 'url': 'https://www.nicovideo.jp/my/history',
681 'only_matching': True,
682 }, {
683 'note': 'mobile page, with /video',
684 'url': 'https://sp.nicovideo.jp/my/history/video',
685 'only_matching': True,
686 }, {
687 'note': 'mobile page, without /video',
688 'url': 'https://sp.nicovideo.jp/my/history',
689 'only_matching': True,
690 }]
691
692 def _call_api(self, list_id, resource, query):
693 return self._download_json(
694 'https://nvapi.nicovideo.jp/v1/users/me/watch/history', 'history',
695 f'Downloading {resource}', query=query,
696 headers=self._API_HEADERS)['data']
697
698 def _real_extract(self, url):
699 list_id = 'history'
700 try:
701 mylist = self._call_api(list_id, 'list', {
702 'pageSize': 1,
703 })
704 except ExtractorError as e:
705 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
706 self.raise_login_required('You have to be logged in to get your watch history')
707 raise
708 return self.playlist_result(self._entries(list_id), list_id, **self._parse_owner(mylist))
709
710
711 class NicovideoSearchBaseIE(InfoExtractor):
712 _SEARCH_TYPE = 'search'
713
714 def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
715 query = query or {}
716 pages = [query['page']] if 'page' in query else itertools.count(1)
717 for page_num in pages:
718 query['page'] = str(page_num)
719 webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
720 results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage)
721 for item in results:
722 yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item)
723 if not results:
724 break
725
726 def _search_results(self, query):
727 return self._entries(
728 self._proto_relative_url(f'//www.nicovideo.jp/{self._SEARCH_TYPE}/{query}'), query)
729
730
731 class NicovideoSearchIE(NicovideoSearchBaseIE, SearchInfoExtractor):
732 IE_DESC = 'Nico video search'
733 IE_NAME = 'nicovideo:search'
734 _SEARCH_KEY = 'nicosearch'
735
736
737 class NicovideoSearchURLIE(NicovideoSearchBaseIE):
738 IE_NAME = f'{NicovideoSearchIE.IE_NAME}_url'
739 IE_DESC = 'Nico video search URLs'
740 _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/search/(?P<id>[^?#&]+)?'
741 _TESTS = [{
742 'url': 'http://www.nicovideo.jp/search/sm9',
743 'info_dict': {
744 'id': 'sm9',
745 'title': 'sm9'
746 },
747 'playlist_mincount': 40,
748 }, {
749 'url': 'https://www.nicovideo.jp/search/sm9?sort=h&order=d&end=2020-12-31&start=2020-01-01',
750 'info_dict': {
751 'id': 'sm9',
752 'title': 'sm9'
753 },
754 'playlist_count': 31,
755 }]
756
757 def _real_extract(self, url):
758 query = self._match_id(url)
759 return self.playlist_result(self._entries(url, query), query, query)
760
761
762 class NicovideoSearchDateIE(NicovideoSearchBaseIE, SearchInfoExtractor):
763 IE_DESC = 'Nico video search, newest first'
764 IE_NAME = f'{NicovideoSearchIE.IE_NAME}:date'
765 _SEARCH_KEY = 'nicosearchdate'
766 _TESTS = [{
767 'url': 'nicosearchdateall:a',
768 'info_dict': {
769 'id': 'a',
770 'title': 'a'
771 },
772 'playlist_mincount': 1610,
773 }]
774
775 _START_DATE = datetime.date(2007, 1, 1)
776 _RESULTS_PER_PAGE = 32
777 _MAX_PAGES = 50
778
779 def _entries(self, url, item_id, start_date=None, end_date=None):
780 start_date, end_date = start_date or self._START_DATE, end_date or datetime.datetime.now().date()
781
782 # If the last page has a full page of videos, we need to break down the query interval further
783 last_page_len = len(list(self._get_entries_for_date(
784 url, item_id, start_date, end_date, self._MAX_PAGES,
785 note=f'Checking number of videos from {start_date} to {end_date}')))
786 if (last_page_len == self._RESULTS_PER_PAGE and start_date != end_date):
787 midpoint = start_date + ((end_date - start_date) // 2)
788 yield from self._entries(url, item_id, midpoint, end_date)
789 yield from self._entries(url, item_id, start_date, midpoint)
790 else:
791 self.to_screen(f'{item_id}: Downloading results from {start_date} to {end_date}')
792 yield from self._get_entries_for_date(
793 url, item_id, start_date, end_date, note=' Downloading page %(page)s')
794
795 def _get_entries_for_date(self, url, item_id, start_date, end_date=None, page_num=None, note=None):
796 query = {
797 'start': str(start_date),
798 'end': str(end_date or start_date),
799 'sort': 'f',
800 'order': 'd',
801 }
802 if page_num:
803 query['page'] = str(page_num)
804
805 yield from super()._entries(url, item_id, query=query, note=note)
806
807
808 class NicovideoTagURLIE(NicovideoSearchBaseIE):
809 IE_NAME = 'niconico:tag'
810 IE_DESC = 'NicoNico video tag URLs'
811 _SEARCH_TYPE = 'tag'
812 _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/tag/(?P<id>[^?#&]+)?'
813 _TESTS = [{
814 'url': 'https://www.nicovideo.jp/tag/ドキュメンタリー淫夢',
815 'info_dict': {
816 'id': 'ドキュメンタリー淫夢',
817 'title': 'ドキュメンタリー淫夢'
818 },
819 'playlist_mincount': 400,
820 }]
821
822 def _real_extract(self, url):
823 query = self._match_id(url)
824 return self.playlist_result(self._entries(url, query), query, query)
825
826
827 class NiconicoUserIE(InfoExtractor):
828 _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
829 _TEST = {
830 'url': 'https://www.nicovideo.jp/user/419948',
831 'info_dict': {
832 'id': '419948',
833 },
834 'playlist_mincount': 101,
835 }
836 _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
837 _PAGE_SIZE = 100
838
839 _API_HEADERS = {
840 'X-Frontend-ID': '6',
841 'X-Frontend-Version': '0'
842 }
843
844 def _entries(self, list_id):
845 total_count = 1
846 count = page_num = 0
847 while count < total_count:
848 json_parsed = self._download_json(
849 self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
850 headers=self._API_HEADERS,
851 note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
852 if not page_num:
853 total_count = int_or_none(json_parsed['data'].get('totalCount'))
854 for entry in json_parsed["data"]["items"]:
855 count += 1
856 yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
857 page_num += 1
858
859 def _real_extract(self, url):
860 list_id = self._match_id(url)
861 return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())