]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/niconico.py
Allow multiple `--exec` and `--exec-before-download`
[yt-dlp.git] / yt_dlp / extractor / niconico.py
CommitLineData
dcdb292f 1# coding: utf-8
214c22c7 2from __future__ import unicode_literals
52ad14ae 3
fb198a8a 4import re
29f7c58a 5import json
fb198a8a 6import datetime
52ad14ae
TT
7
8from .common import InfoExtractor
fb198a8a 9from ..postprocessor.ffmpeg import FFmpegPostProcessor
1cc79574 10from ..compat import (
7978e172 11 compat_str,
bb139491 12 compat_parse_qs,
29f7c58a 13 compat_urllib_parse_urlparse,
1cc79574
PH
14)
15from ..utils import (
463e7216 16 dict_get,
6110bbbf 17 ExtractorError,
29f7c58a 18 int_or_none,
fb198a8a 19 float_or_none,
20 OnDemandPagedList,
1cc79574 21 parse_duration,
bb865f3a 22 parse_iso8601,
fb198a8a 23 PostProcessingError,
7978e172 24 str_or_none,
ee6a6116 25 remove_start,
463e7216
YCH
26 try_get,
27 unified_timestamp,
6e6bc8da 28 urlencode_postdata,
bb139491 29 xpath_text,
52ad14ae
TT
30)
31
13ebea79 32
52ad14ae 33class NiconicoIE(InfoExtractor):
214c22c7
JMF
34 IE_NAME = 'niconico'
35 IE_DESC = 'ニコニコ動画'
52ad14ae 36
1c9a1457 37 _TESTS = [{
214c22c7 38 'url': 'http://www.nicovideo.jp/watch/sm22312215',
7978e172 39 'md5': 'a5bad06f1347452102953f323c69da34s',
214c22c7
JMF
40 'info_dict': {
41 'id': 'sm22312215',
42 'ext': 'mp4',
43 'title': 'Big Buck Bunny',
463e7216 44 'thumbnail': r're:https?://.*',
214c22c7
JMF
45 'uploader': 'takuya0301',
46 'uploader_id': '2698420',
47 'upload_date': '20131123',
aaab8c5e 48 'timestamp': int, # timestamp is unstable
214c22c7 49 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
15ce1338 50 'duration': 33,
463e7216
YCH
51 'view_count': int,
52 'comment_count': int,
52ad14ae 53 },
8e4988f1 54 'skip': 'Requires an account',
1c9a1457 55 }, {
59d814f7
YCH
56 # File downloaded with and without credentials are different, so omit
57 # the md5 field
1c9a1457 58 'url': 'http://www.nicovideo.jp/watch/nm14296458',
1c9a1457
S
59 'info_dict': {
60 'id': 'nm14296458',
61 'ext': 'swf',
62 'title': '【鏡音リン】Dance on media【オリジナル】take2!',
bb865f3a 63 'description': 'md5:689f066d74610b3b22e0f1739add0f58',
463e7216 64 'thumbnail': r're:https?://.*',
1c9a1457
S
65 'uploader': 'りょうた',
66 'uploader_id': '18822557',
67 'upload_date': '20110429',
bb865f3a 68 'timestamp': 1304065916,
1c9a1457
S
69 'duration': 209,
70 },
8e4988f1 71 'skip': 'Requires an account',
bb865f3a
YCH
72 }, {
73 # 'video exists but is marked as "deleted"
b2e8e7da 74 # md5 is unstable
bb865f3a 75 'url': 'http://www.nicovideo.jp/watch/sm10000',
bb865f3a
YCH
76 'info_dict': {
77 'id': 'sm10000',
78 'ext': 'unknown_video',
79 'description': 'deleted',
80 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
463e7216 81 'thumbnail': r're:https?://.*',
b2e8e7da 82 'upload_date': '20071224',
8e4988f1 83 'timestamp': int, # timestamp field has different value if logged in
b2e8e7da 84 'duration': 304,
463e7216 85 'view_count': int,
bb865f3a 86 },
8e4988f1 87 'skip': 'Requires an account',
621ffe7b
YCH
88 }, {
89 'url': 'http://www.nicovideo.jp/watch/so22543406',
90 'info_dict': {
91 'id': '1388129933',
92 'ext': 'mp4',
93 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
94 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
463e7216 95 'thumbnail': r're:https?://.*',
621ffe7b
YCH
96 'timestamp': 1388851200,
97 'upload_date': '20140104',
98 'uploader': 'アニメロチャンネル',
99 'uploader_id': '312',
8e4988f1
YCH
100 },
101 'skip': 'The viewing period of the video you were searching for has expired.',
463e7216 102 }, {
ee6a6116 103 # video not available via `getflv`; "old" HTML5 video
463e7216 104 'url': 'http://www.nicovideo.jp/watch/sm1151009',
ee6a6116 105 'md5': '8fa81c364eb619d4085354eab075598a',
463e7216
YCH
106 'info_dict': {
107 'id': 'sm1151009',
ee6a6116 108 'ext': 'mp4',
463e7216
YCH
109 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
110 'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7',
111 'thumbnail': r're:https?://.*',
112 'duration': 184,
113 'timestamp': 1190868283,
114 'upload_date': '20070927',
115 'uploader': 'denden2',
116 'uploader_id': '1392194',
117 'view_count': int,
118 'comment_count': int,
119 },
120 'skip': 'Requires an account',
ee6a6116
YCH
121 }, {
122 # "New" HTML5 video
aaab8c5e 123 # md5 is unstable
ee6a6116 124 'url': 'http://www.nicovideo.jp/watch/sm31464864',
ee6a6116
YCH
125 'info_dict': {
126 'id': 'sm31464864',
127 'ext': 'mp4',
128 'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
129 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
130 'timestamp': 1498514060,
131 'upload_date': '20170626',
aaab8c5e 132 'uploader': 'ゲスト',
ee6a6116
YCH
133 'uploader_id': '40826363',
134 'thumbnail': r're:https?://.*',
135 'duration': 198,
136 'view_count': int,
137 'comment_count': int,
138 },
139 'skip': 'Requires an account',
aaab8c5e
PP
140 }, {
141 # Video without owner
142 'url': 'http://www.nicovideo.jp/watch/sm18238488',
143 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
144 'info_dict': {
145 'id': 'sm18238488',
146 'ext': 'mp4',
147 'title': '【実写版】ミュータントタートルズ',
148 'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
149 'timestamp': 1341160408,
150 'upload_date': '20120701',
151 'uploader': None,
152 'uploader_id': None,
153 'thumbnail': r're:https?://.*',
154 'duration': 5271,
155 'view_count': int,
156 'comment_count': int,
157 },
158 'skip': 'Requires an account',
4a87de72
LS
159 }, {
160 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
161 'only_matching': True,
1c9a1457 162 }]
52ad14ae 163
4a87de72 164 _VALID_URL = r'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
52ad14ae 165 _NETRC_MACHINE = 'niconico'
52ad14ae 166
2291dbce 167 _API_HEADERS = {
168 'X-Frontend-ID': '6',
169 'X-Frontend-Version': '0'
170 }
171
52ad14ae 172 def _real_initialize(self):
23d83ad4 173 self._login()
52ad14ae
TT
174
175 def _login(self):
68217024 176 username, password = self._get_login_info()
23d83ad4
NJ
177 # No authentication to be performed
178 if not username:
179 return True
52ad14ae
TT
180
181 # Log in
bb139491 182 login_ok = True
52ad14ae 183 login_form_strs = {
bb139491 184 'mail_tel': username,
214c22c7 185 'password': password,
52ad14ae 186 }
bb139491
YCH
187 urlh = self._request_webpage(
188 'https://account.nicovideo.jp/api/v1/login', None,
189 note='Logging in', errnote='Unable to log in',
190 data=urlencode_postdata(login_form_strs))
191 if urlh is False:
192 login_ok = False
193 else:
29f7c58a 194 parts = compat_urllib_parse_urlparse(urlh.geturl())
bb139491
YCH
195 if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
196 login_ok = False
197 if not login_ok:
6a39ee13 198 self.report_warning('unable to log in: bad username or password')
bb139491 199 return login_ok
52ad14ae 200
fb198a8a 201 def _get_heartbeat_info(self, info_dict):
ee6a6116 202
fb198a8a 203 video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
ee6a6116 204
2291dbce 205 api_data = (
206 info_dict.get('_api_data')
207 or self._parse_json(
208 self._html_search_regex(
209 'data-api-data="([^"]+)"',
210 self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
211 'API data', default='{}'),
212 video_id))
fb198a8a 213
7978e172 214 session_api_data = try_get(api_data, lambda x: x['media']['delivery']['movie']['session'])
fb198a8a 215 session_api_endpoint = try_get(session_api_data, lambda x: x['urls'][0])
216
2291dbce 217 def ping():
218 status = try_get(
219 self._download_json(
220 'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id,
221 query={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])},
222 note='Acquiring permission for downloading video',
223 headers=self._API_HEADERS),
224 lambda x: x['meta']['status'])
225 if status != 200:
226 self.report_warning('Failed to acquire permission for playing video. The video may not download.')
fb198a8a 227
228 yesno = lambda x: 'yes' if x else 'no'
229
230 # m3u8 (encryption)
2291dbce 231 if try_get(api_data, lambda x: x['media']['delivery']['encryption']) is not None:
fb198a8a 232 protocol = 'm3u8'
2291dbce 233 encryption = self._parse_json(session_api_data['token'], video_id)['hls_encryption']
fb198a8a 234 session_api_http_parameters = {
235 'parameters': {
236 'hls_parameters': {
237 'encryption': {
2291dbce 238 encryption: {
239 'encrypted_key': try_get(api_data, lambda x: x['media']['delivery']['encryption']['encryptedKey']),
240 'key_uri': try_get(api_data, lambda x: x['media']['delivery']['encryption']['keyUri'])
fb198a8a 241 }
242 },
243 'transfer_preset': '',
2291dbce 244 'use_ssl': yesno(session_api_endpoint['isSsl']),
245 'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
246 'segment_duration': 6000,
fb198a8a 247 }
248 }
249 }
250 # http
251 else:
252 protocol = 'http'
253 session_api_http_parameters = {
254 'parameters': {
255 'http_output_download_parameters': {
7978e172 256 'use_ssl': yesno(session_api_endpoint['isSsl']),
257 'use_well_known_port': yesno(session_api_endpoint['isWellKnownPort']),
fb198a8a 258 }
259 }
260 }
ee6a6116
YCH
261
262 session_response = self._download_json(
263 session_api_endpoint['url'], video_id,
264 query={'_format': 'json'},
265 headers={'Content-Type': 'application/json'},
fb198a8a 266 note='Downloading JSON metadata for %s' % info_dict['format_id'],
ee6a6116
YCH
267 data=json.dumps({
268 'session': {
269 'client_info': {
7978e172 270 'player_id': session_api_data.get('playerId'),
ee6a6116
YCH
271 },
272 'content_auth': {
7978e172 273 'auth_type': try_get(session_api_data, lambda x: x['authTypes'][session_api_data['protocols'][0]]),
274 'content_key_timeout': session_api_data.get('contentKeyTimeout'),
ee6a6116 275 'service_id': 'nicovideo',
7978e172 276 'service_user_id': session_api_data.get('serviceUserId')
ee6a6116 277 },
7978e172 278 'content_id': session_api_data.get('contentId'),
ee6a6116
YCH
279 'content_src_id_sets': [{
280 'content_src_ids': [{
281 'src_id_to_mux': {
fb198a8a 282 'audio_src_ids': [audio_src_id],
283 'video_src_ids': [video_src_id],
ee6a6116
YCH
284 }
285 }]
286 }],
287 'content_type': 'movie',
288 'content_uri': '',
289 'keep_method': {
290 'heartbeat': {
7978e172 291 'lifetime': session_api_data.get('heartbeatLifetime')
ee6a6116
YCH
292 }
293 },
fb198a8a 294 'priority': session_api_data.get('priority'),
ee6a6116
YCH
295 'protocol': {
296 'name': 'http',
297 'parameters': {
fb198a8a 298 'http_parameters': session_api_http_parameters
ee6a6116
YCH
299 }
300 },
7978e172 301 'recipe_id': session_api_data.get('recipeId'),
ee6a6116
YCH
302 'session_operation_auth': {
303 'session_operation_auth_by_signature': {
fb198a8a 304 'signature': session_api_data.get('signature'),
305 'token': session_api_data.get('token'),
ee6a6116
YCH
306 }
307 },
308 'timing_constraint': 'unlimited'
309 }
4d59db5b 310 }).encode())
ee6a6116 311
fb198a8a 312 info_dict['url'] = session_response['data']['session']['content_uri']
313 info_dict['protocol'] = protocol
314
315 # get heartbeat info
316 heartbeat_info_dict = {
317 'url': session_api_endpoint['url'] + '/' + session_response['data']['session']['id'] + '?_format=json&_method=PUT',
318 'data': json.dumps(session_response['data']),
319 # interval, convert milliseconds to seconds, then halve to make a buffer.
2291dbce 320 'interval': float_or_none(session_api_data.get('heartbeatLifetime'), scale=3000),
321 'ping': ping
fb198a8a 322 }
323
324 return info_dict, heartbeat_info_dict
325
326 def _extract_format_for_quality(self, api_data, video_id, audio_quality, video_quality):
327 def parse_format_id(id_code):
328 mobj = re.match(r'''(?x)
329 (?:archive_)?
330 (?:(?P<codec>[^_]+)_)?
331 (?:(?P<br>[\d]+)kbps_)?
332 (?:(?P<res>[\d+]+)p_)?
333 ''', '%s_' % id_code)
334 return mobj.groupdict() if mobj else {}
335
336 protocol = 'niconico_dmc'
337 format_id = '-'.join(map(lambda s: remove_start(s['id'], 'archive_'), [video_quality, audio_quality]))
338 vdict = parse_format_id(video_quality['id'])
339 adict = parse_format_id(audio_quality['id'])
7978e172 340 resolution = try_get(video_quality, lambda x: x['metadata']['resolution'], dict) or {'height': vdict.get('res')}
341 vbr = try_get(video_quality, lambda x: x['metadata']['bitrate'], float)
ee6a6116
YCH
342
343 return {
fb198a8a 344 'url': '%s:%s/%s/%s' % (protocol, video_id, video_quality['id'], audio_quality['id']),
ee6a6116 345 'format_id': format_id,
7978e172 346 'format_note': 'DMC %s' % try_get(video_quality, lambda x: x['metadata']['label'], compat_str),
ee6a6116 347 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
fb198a8a 348 'vcodec': vdict.get('codec'),
349 'acodec': adict.get('codec'),
7978e172 350 'vbr': float_or_none(vbr, 1000) or float_or_none(vdict.get('br')),
fb198a8a 351 'abr': float_or_none(audio_quality.get('bitrate'), 1000) or float_or_none(adict.get('br')),
352 'height': int_or_none(resolution.get('height', vdict.get('res'))),
353 'width': int_or_none(resolution.get('width')),
354 'quality': -2 if 'low' in format_id else -1, # Default quality value is -1
355 'protocol': protocol,
356 'http_headers': {
357 'Origin': 'https://www.nicovideo.jp',
358 'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
359 }
ee6a6116
YCH
360 }
361
52ad14ae 362 def _real_extract(self, url):
937daef4 363 video_id = self._match_id(url)
52ad14ae 364
fb198a8a 365 # Get video webpage for API data.
621ffe7b
YCH
366 webpage, handle = self._download_webpage_handle(
367 'http://www.nicovideo.jp/watch/' + video_id, video_id)
368 if video_id.startswith('so'):
369 video_id = self._match_id(handle.geturl())
52ad14ae 370
463e7216
YCH
371 api_data = self._parse_json(self._html_search_regex(
372 'data-api-data="([^"]+)"', webpage,
373 'API data', default='{}'), video_id)
463e7216 374
fb198a8a 375 def get_video_info_web(items):
376 return dict_get(api_data['video'], items)
377
378 # Get video info
379 video_info_xml = self._download_xml(
380 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id,
381 video_id, note='Downloading video info page')
382
383 def get_video_info_xml(items):
384 if not isinstance(items, list):
385 items = [items]
386 for item in items:
387 ret = xpath_text(video_info_xml, './/' + item)
388 if ret:
389 return ret
390
391 if get_video_info_xml('error'):
392 error_code = get_video_info_xml('code')
393
394 if error_code == 'DELETED':
395 raise ExtractorError('The video has been deleted.',
396 expected=True)
397 elif error_code == 'NOT_FOUND':
398 raise ExtractorError('The video is not found.',
399 expected=True)
400 elif error_code == 'COMMUNITY':
401 self.to_screen('%s: The video is community members only.' % video_id)
402 else:
403 raise ExtractorError('%s reports error: %s' % (self.IE_NAME, error_code))
404
405 # Start extracting video formats
406 formats = []
407
408 # Get HTML5 videos info
7978e172 409 quality_info = try_get(api_data, lambda x: x['media']['delivery']['movie'])
410 if not quality_info:
2291dbce 411 raise ExtractorError('The video can\'t be downloaded', expected=True)
fb198a8a 412
fb198a8a 413 for audio_quality in quality_info.get('audios') or {}:
414 for video_quality in quality_info.get('videos') or {}:
7978e172 415 if not audio_quality.get('isAvailable') or not video_quality.get('isAvailable'):
fb198a8a 416 continue
417 formats.append(self._extract_format_for_quality(
418 api_data, video_id, audio_quality, video_quality))
419
420 # Get flv/swf info
7978e172 421 timestamp = None
fb198a8a 422 video_real_url = try_get(api_data, lambda x: x['video']['smileInfo']['url'])
2291dbce 423 if video_real_url:
7978e172 424 is_economy = video_real_url.endswith('low')
425
426 if is_economy:
427 self.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
428
429 # Invoking ffprobe to determine resolution
430 pp = FFmpegPostProcessor(self._downloader)
431 cookies = self._get_cookies('https://nicovideo.jp').output(header='', sep='; path=/; domain=nicovideo.jp;\n')
432
433 self.to_screen('%s: %s' % (video_id, 'Checking smile format with ffprobe'))
434
435 try:
436 metadata = pp.get_metadata_object(video_real_url, ['-cookies', cookies])
437 except PostProcessingError as err:
438 raise ExtractorError(err.msg, expected=True)
439
440 v_stream = a_stream = {}
441
442 # Some complex swf files doesn't have video stream (e.g. nm4809023)
443 for stream in metadata['streams']:
444 if stream['codec_type'] == 'video':
445 v_stream = stream
446 elif stream['codec_type'] == 'audio':
447 a_stream = stream
448
449 # Community restricted videos seem to have issues with the thumb API not returning anything at all
450 filesize = int(
451 (get_video_info_xml('size_high') if not is_economy else get_video_info_xml('size_low'))
452 or metadata['format']['size']
453 )
454 extension = (
455 get_video_info_xml('movie_type')
456 or 'mp4' if 'mp4' in metadata['format']['format_name'] else metadata['format']['format_name']
457 )
458
459 # 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
460 timestamp = (
461 parse_iso8601(get_video_info_web('first_retrieve'))
462 or unified_timestamp(get_video_info_web('postedDateTime'))
463 )
464 metadata_timestamp = (
465 parse_iso8601(try_get(v_stream, lambda x: x['tags']['creation_time']))
466 or timestamp if extension != 'mp4' else 0
467 )
468
469 # According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
470 smile_threshold_timestamp = parse_iso8601('2016-12-08T00:00:00+09:00')
471
472 is_source = timestamp < smile_threshold_timestamp or metadata_timestamp > 0
473
474 # If movie file size is unstable, old server movie is not source movie.
475 if filesize > 1:
476 formats.append({
477 'url': video_real_url,
478 'format_id': 'smile' if not is_economy else 'smile_low',
479 'format_note': 'SMILEVIDEO source' if not is_economy else 'SMILEVIDEO low quality',
480 'ext': extension,
481 'container': extension,
482 'vcodec': v_stream.get('codec_name'),
483 'acodec': a_stream.get('codec_name'),
484 # Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
485 'tbr': int_or_none(metadata['format'].get('bit_rate'), scale=1000),
486 'vbr': int_or_none(v_stream.get('bit_rate'), scale=1000),
487 'abr': int_or_none(a_stream.get('bit_rate'), scale=1000),
488 'height': int_or_none(v_stream.get('height')),
489 'width': int_or_none(v_stream.get('width')),
490 'source_preference': 5 if not is_economy else -2,
491 'quality': 5 if is_source and not is_economy else None,
492 'filesize': filesize
493 })
fb198a8a 494
fb198a8a 495 self._sort_formats(formats)
ee6a6116 496
52ad14ae 497 # Start extracting information
e1feb88f 498 title = (
6b1d8c1e
C
499 get_video_info_xml('title') # prefer to get the untranslated original title
500 or get_video_info_web(['originalTitle', 'title'])
e1feb88f 501 or self._og_search_title(webpage, default=None)
502 or self._html_search_regex(
bb865f3a 503 r'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
e1feb88f 504 webpage, 'video title'))
bb865f3a 505
b2e8e7da
YCH
506 watch_api_data_string = self._html_search_regex(
507 r'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
508 webpage, 'watch api data', default=None)
509 watch_api_data = self._parse_json(watch_api_data_string, video_id) if watch_api_data_string else {}
510 video_detail = watch_api_data.get('videoDetail', {})
511
b2e8e7da 512 thumbnail = (
fb198a8a 513 self._html_search_regex(r'<meta property="og:image" content="([^"]+)">', webpage, 'thumbnail data', default=None)
40078a55 514 or dict_get( # choose highest from 720p to 240p
6b1d8c1e
C
515 get_video_info_web('thumbnail'),
516 ['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
3089bc74
S
517 or self._html_search_meta('image', webpage, 'thumbnail', default=None)
518 or video_detail.get('thumbnail'))
b2e8e7da 519
fb198a8a 520 description = get_video_info_web('description')
b2e8e7da 521
b2e8e7da
YCH
522 if not timestamp:
523 match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
524 if match:
525 timestamp = parse_iso8601(match.replace('+', ':00+'))
526 if not timestamp and video_detail.get('postedAt'):
527 timestamp = parse_iso8601(
528 video_detail['postedAt'].replace('/', '-'),
529 delimiter=' ', timezone=datetime.timedelta(hours=9))
7978e172 530 timestamp = timestamp or try_get(api_data, lambda x: parse_iso8601(x['video']['registeredAt']))
b2e8e7da 531
fb198a8a 532 view_count = int_or_none(get_video_info_web(['view_counter', 'viewCount']))
b2e8e7da
YCH
533 if not view_count:
534 match = self._html_search_regex(
535 r'>Views: <strong[^>]*>([^<]+)</strong>',
536 webpage, 'view count', default=None)
537 if match:
538 view_count = int_or_none(match.replace(',', ''))
7978e172 539 view_count = (
540 view_count
541 or video_detail.get('viewCount')
542 or try_get(api_data, lambda x: x['video']['count']['view']))
543
544 comment_count = (
545 int_or_none(get_video_info_web('comment_num'))
546 or video_detail.get('commentCount')
547 or try_get(api_data, lambda x: x['video']['count']['comment']))
b2e8e7da 548
b2e8e7da
YCH
549 if not comment_count:
550 match = self._html_search_regex(
551 r'>Comments: <strong[^>]*>([^<]+)</strong>',
552 webpage, 'comment count', default=None)
553 if match:
554 comment_count = int_or_none(match.replace(',', ''))
b2e8e7da
YCH
555
556 duration = (parse_duration(
fb198a8a 557 get_video_info_web('length')
3089bc74
S
558 or self._html_search_meta(
559 'video:duration', webpage, 'video duration', default=None))
560 or video_detail.get('length')
fb198a8a 561 or get_video_info_web('duration'))
b2e8e7da 562
fb198a8a 563 webpage_url = get_video_info_web('watch_url') or url
15ce1338 564
78b9a616 565 # for channel movie and community movie
566 channel_id = try_get(
567 api_data,
568 (lambda x: x['channel']['globalId'],
569 lambda x: x['community']['globalId']))
570 channel = try_get(
571 api_data,
572 (lambda x: x['channel']['name'],
573 lambda x: x['community']['name']))
574
aaab8c5e
PP
575 # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
576 # in the JSON, which will cause None to be returned instead of {}.
577 owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
7978e172 578 uploader_id = str_or_none(
78b9a616 579 get_video_info_web(['ch_id', 'user_id'])
580 or owner.get('id')
581 or channel_id
582 )
583 uploader = (
584 get_video_info_web(['ch_name', 'user_nickname'])
585 or owner.get('nickname')
586 or channel
587 )
52ad14ae 588
b2e8e7da 589 return {
214c22c7 590 'id': video_id,
2291dbce 591 '_api_data': api_data,
15ce1338 592 'title': title,
ee6a6116 593 'formats': formats,
15ce1338
S
594 'thumbnail': thumbnail,
595 'description': description,
596 'uploader': uploader,
bb865f3a 597 'timestamp': timestamp,
15ce1338 598 'uploader_id': uploader_id,
78b9a616 599 'channel': channel,
600 'channel_id': channel_id,
15ce1338
S
601 'view_count': view_count,
602 'comment_count': comment_count,
603 'duration': duration,
604 'webpage_url': webpage_url,
52ad14ae 605 }
a9bad429
JMF
606
607
608class NiconicoPlaylistIE(InfoExtractor):
c1d3a4a8 609 _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
a9bad429 610
29f7c58a 611 _TESTS = [{
a9bad429
JMF
612 'url': 'http://www.nicovideo.jp/mylist/27411728',
613 'info_dict': {
614 'id': '27411728',
615 'title': 'AKB48のオールナイトニッポン',
29f7c58a 616 'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
617 'uploader': 'のっく',
618 'uploader_id': '805442',
a9bad429
JMF
619 },
620 'playlist_mincount': 225,
29f7c58a 621 }, {
622 'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
623 'only_matching': True,
624 }]
a9bad429 625
2291dbce 626 _API_HEADERS = {
627 'X-Frontend-ID': '6',
628 'X-Frontend-Version': '0'
629 }
630
a9bad429
JMF
631 def _real_extract(self, url):
632 list_id = self._match_id(url)
fb198a8a 633
634 def get_page_data(pagenum, pagesize):
635 return self._download_json(
636 'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
637 query={'page': 1 + pagenum, 'pageSize': pagesize},
2291dbce 638 headers=self._API_HEADERS).get('data').get('mylist')
fb198a8a 639
640 data = get_page_data(0, 1)
641 title = data.get('name')
642 description = data.get('description')
643 uploader = data.get('owner').get('name')
644 uploader_id = data.get('owner').get('id')
645
646 def pagefunc(pagenum):
647 data = get_page_data(pagenum, 25)
648 return ({
649 '_type': 'url',
650 'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
651 } for item in data.get('items'))
652
653 return {
654 '_type': 'playlist',
655 'id': list_id,
656 'title': title,
657 'description': description,
658 'uploader': uploader,
659 'uploader_id': uploader_id,
660 'entries': OnDemandPagedList(pagefunc, 25),
661 }
c1d3a4a8 662
663
664class NiconicoUserIE(InfoExtractor):
665 _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
666 _TEST = {
667 'url': 'https://www.nicovideo.jp/user/419948',
668 'info_dict': {
669 'id': '419948',
670 },
671 'playlist_mincount': 101,
672 }
673 _API_URL = "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
2291dbce 674 _PAGE_SIZE = 100
675
676 _API_HEADERS = {
c1d3a4a8 677 'X-Frontend-ID': '6',
2291dbce 678 'X-Frontend-Version': '0'
c1d3a4a8 679 }
c1d3a4a8 680
681 def _entries(self, list_id, ):
682 total_count = 1
683 count = page_num = 0
684 while count < total_count:
685 json_parsed = self._download_json(
686 self._API_URL % (list_id, self._PAGE_SIZE, page_num + 1), list_id,
2291dbce 687 headers=self._API_HEADERS,
c1d3a4a8 688 note='Downloading JSON metadata%s' % (' page %d' % page_num if page_num else ''))
689 if not page_num:
690 total_count = int_or_none(json_parsed['data'].get('totalCount'))
691 for entry in json_parsed["data"]["items"]:
692 count += 1
693 yield self.url_result('https://www.nicovideo.jp/watch/%s' % entry['id'])
694 page_num += 1
695
696 def _real_extract(self, url):
697 list_id = self._match_id(url)
698 return self.playlist_result(self._entries(list_id), list_id, ie=NiconicoIE.ie_key())