2 from __future__
import unicode_literals
8 from .common
import InfoExtractor
9 from ..postprocessor
.ffmpeg
import FFmpegPostProcessor
10 from ..compat
import (
13 compat_urllib_parse_urlparse
,
33 class NiconicoIE(InfoExtractor
):
38 'url': 'http://www.nicovideo.jp/watch/sm22312215',
39 'md5': 'a5bad06f1347452102953f323c69da34s',
43 'title': 'Big Buck Bunny',
44 'thumbnail': r
're:https?://.*',
45 'uploader': 'takuya0301',
46 'uploader_id': '2698420',
47 'upload_date': '20131123',
48 'timestamp': int, # timestamp is unstable
49 'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
54 'skip': 'Requires an account',
56 # File downloaded with and without credentials are different, so omit
58 'url': 'http://www.nicovideo.jp/watch/nm14296458',
62 'title': '【鏡音リン】Dance on media【オリジナル】take2!',
63 'description': 'md5:689f066d74610b3b22e0f1739add0f58',
64 'thumbnail': r
're:https?://.*',
66 'uploader_id': '18822557',
67 'upload_date': '20110429',
68 'timestamp': 1304065916,
71 'skip': 'Requires an account',
73 # 'video exists but is marked as "deleted"
75 'url': 'http://www.nicovideo.jp/watch/sm10000',
78 'ext': 'unknown_video',
79 'description': 'deleted',
80 'title': 'ドラえもんエターナル第3話「決戦第3新東京市」<前編>',
81 'thumbnail': r
're:https?://.*',
82 'upload_date': '20071224',
83 'timestamp': int, # timestamp field has different value if logged in
87 'skip': 'Requires an account',
89 'url': 'http://www.nicovideo.jp/watch/so22543406',
93 'title': '【第1回】RADIOアニメロミックス ラブライブ!~のぞえりRadio Garden~',
94 'description': 'md5:b27d224bb0ff53d3c8269e9f8b561cf1',
95 'thumbnail': r
're:https?://.*',
96 'timestamp': 1388851200,
97 'upload_date': '20140104',
98 'uploader': 'アニメロチャンネル',
101 'skip': 'The viewing period of the video you were searching for has expired.',
103 # video not available via `getflv`; "old" HTML5 video
104 'url': 'http://www.nicovideo.jp/watch/sm1151009',
105 'md5': '8fa81c364eb619d4085354eab075598a',
109 'title': 'マスターシステム本体内蔵のスペハリのメインテーマ(PSG版)',
110 'description': 'md5:6ee077e0581ff5019773e2e714cdd0b7',
111 'thumbnail': r
're:https?://.*',
113 'timestamp': 1190868283,
114 'upload_date': '20070927',
115 'uploader': 'denden2',
116 'uploader_id': '1392194',
118 'comment_count': int,
120 'skip': 'Requires an account',
124 'url': 'http://www.nicovideo.jp/watch/sm31464864',
128 'title': '新作TVアニメ「戦姫絶唱シンフォギアAXZ」PV 最高画質',
129 'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
130 'timestamp': 1498514060,
131 'upload_date': '20170626',
133 'uploader_id': '40826363',
134 'thumbnail': r
're:https?://.*',
137 'comment_count': int,
139 'skip': 'Requires an account',
141 # Video without owner
142 'url': 'http://www.nicovideo.jp/watch/sm18238488',
143 'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
147 'title': '【実写版】ミュータントタートルズ',
148 'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
149 'timestamp': 1341160408,
150 'upload_date': '20120701',
153 'thumbnail': r
're:https?://.*',
156 'comment_count': int,
158 'skip': 'Requires an account',
160 'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
161 'only_matching': True,
164 _VALID_URL
= r
'https?://(?:www\.|secure\.|sp\.)?nicovideo\.jp/watch/(?P<id>(?:[a-z]{2})?[0-9]+)'
165 _NETRC_MACHINE
= 'niconico'
168 'X-Frontend-ID': '6',
169 'X-Frontend-Version': '0'
172 def _real_initialize(self
):
176 username
, password
= self
._get
_login
_info
()
177 # No authentication to be performed
184 'mail_tel': username
,
185 'password': password
,
187 urlh
= self
._request
_webpage
(
188 'https://account.nicovideo.jp/api/v1/login', None,
189 note
='Logging in', errnote
='Unable to log in',
190 data
=urlencode_postdata(login_form_strs
))
194 parts
= compat_urllib_parse_urlparse(urlh
.geturl())
195 if compat_parse_qs(parts
.query
).get('message', [None])[0] == 'cant_login':
198 self
.report_warning('unable to log in: bad username or password')
201 def _get_heartbeat_info(self
, info_dict
):
203 video_id
, video_src_id
, audio_src_id
= info_dict
['url'].split(':')[1].split('/')
206 info_dict
.get('_api_data')
208 self
._html
_search
_regex
(
209 'data-api-data="([^"]+)"',
210 self
._download
_webpage
('http://www.nicovideo.jp/watch/' + video_id
, video_id
),
211 'API data', default
='{}'),
214 session_api_data
= try_get(api_data
, lambda x
: x
['media']['delivery']['movie']['session'])
215 session_api_endpoint
= try_get(session_api_data
, lambda x
: x
['urls'][0])
220 'https://nvapi.nicovideo.jp/v1/2ab0cbaa/watch', video_id
,
221 query
={'t': try_get(api_data, lambda x: x['media']['delivery']['trackingId'])}
,
222 note
='Acquiring permission for downloading video',
223 headers
=self
._API
_HEADERS
),
224 lambda x
: x
['meta']['status'])
226 self
.report_warning('Failed to acquire permission for playing video. The video may not download.')
228 yesno
= lambda x
: 'yes' if x
else 'no'
231 if try_get(api_data
, lambda x
: x
['media']['delivery']['encryption']) is not None:
233 encryption
= self
._parse
_json
(session_api_data
['token'], video_id
)['hls_encryption']
234 session_api_http_parameters
= {
239 'encrypted_key': try_get(api_data
, lambda x
: x
['media']['delivery']['encryption']['encryptedKey']),
240 'key_uri': try_get(api_data
, lambda x
: x
['media']['delivery']['encryption']['keyUri'])
243 'transfer_preset': '',
244 'use_ssl': yesno(session_api_endpoint
['isSsl']),
245 'use_well_known_port': yesno(session_api_endpoint
['isWellKnownPort']),
246 'segment_duration': 6000,
253 session_api_http_parameters
= {
255 'http_output_download_parameters': {
256 'use_ssl': yesno(session_api_endpoint
['isSsl']),
257 'use_well_known_port': yesno(session_api_endpoint
['isWellKnownPort']),
262 session_response
= self
._download
_json
(
263 session_api_endpoint
['url'], video_id
,
264 query
={'_format': 'json'}
,
265 headers
={'Content-Type': 'application/json'}
,
266 note
='Downloading JSON metadata for %s' % info_dict
['format_id'],
270 'player_id': session_api_data
.get('playerId'),
273 'auth_type': try_get(session_api_data
, lambda x
: x
['authTypes'][session_api_data
['protocols'][0]]),
274 'content_key_timeout': session_api_data
.get('contentKeyTimeout'),
275 'service_id': 'nicovideo',
276 'service_user_id': session_api_data
.get('serviceUserId')
278 'content_id': session_api_data
.get('contentId'),
279 'content_src_id_sets': [{
280 'content_src_ids': [{
282 'audio_src_ids': [audio_src_id
],
283 'video_src_ids': [video_src_id
],
287 'content_type': 'movie',
291 'lifetime': session_api_data
.get('heartbeatLifetime')
294 'priority': session_api_data
.get('priority'),
298 'http_parameters': session_api_http_parameters
301 'recipe_id': session_api_data
.get('recipeId'),
302 'session_operation_auth': {
303 'session_operation_auth_by_signature': {
304 'signature': session_api_data
.get('signature'),
305 'token': session_api_data
.get('token'),
308 'timing_constraint': 'unlimited'
312 info_dict
['url'] = session_response
['data']['session']['content_uri']
313 info_dict
['protocol'] = protocol
316 heartbeat_info_dict
= {
317 'url': session_api_endpoint
['url'] + '/' + session_response
['data']['session']['id'] + '?_format=json&_method=PUT',
318 'data': json
.dumps(session_response
['data']),
319 # interval, convert milliseconds to seconds, then halve to make a buffer.
320 'interval': float_or_none(session_api_data
.get('heartbeatLifetime'), scale
=3000),
324 return info_dict
, heartbeat_info_dict
326 def _extract_format_for_quality(self
, api_data
, video_id
, audio_quality
, video_quality
):
327 def parse_format_id(id_code
):
328 mobj
= re
.match(r
'''(?x)
330 (?:(?P<codec>[^_]+)_)?
331 (?:(?P<br>[\d]+)kbps_)?
332 (?:(?P<res>[\d+]+)p_)?
333 ''', '%s_' % id_code
)
334 return mobj
.groupdict() if mobj
else {}
336 protocol
= 'niconico_dmc'
337 format_id
= '-'.join(map(lambda s
: remove_start(s
['id'], 'archive_'), [video_quality
, audio_quality
]))
338 vdict
= parse_format_id(video_quality
['id'])
339 adict
= parse_format_id(audio_quality
['id'])
340 resolution
= try_get(video_quality
, lambda x
: x
['metadata']['resolution'], dict) or {'height': vdict.get('res')}
341 vbr
= try_get(video_quality
, lambda x
: x
['metadata']['bitrate'], float)
344 'url': '%s:%s/%s/%s' % (protocol
, video_id
, video_quality
['id'], audio_quality
['id']),
345 'format_id': format_id
,
346 'format_note': 'DMC %s' % try_get(video_quality
, lambda x
: x
['metadata']['label'], compat_str
),
347 'ext': 'mp4', # Session API are used in HTML5, which always serves mp4
348 'vcodec': vdict
.get('codec'),
349 'acodec': adict
.get('codec'),
350 'vbr': float_or_none(vbr
, 1000) or float_or_none(vdict
.get('br')),
351 'abr': float_or_none(audio_quality
.get('bitrate'), 1000) or float_or_none(adict
.get('br')),
352 'height': int_or_none(resolution
.get('height', vdict
.get('res'))),
353 'width': int_or_none(resolution
.get('width')),
354 'quality': -2 if 'low' in format_id
else -1, # Default quality value is -1
355 'protocol': protocol
,
357 'Origin': 'https://www.nicovideo.jp',
358 'Referer': 'https://www.nicovideo.jp/watch/' + video_id
,
362 def _real_extract(self
, url
):
363 video_id
= self
._match
_id
(url
)
365 # Get video webpage for API data.
366 webpage
, handle
= self
._download
_webpage
_handle
(
367 'http://www.nicovideo.jp/watch/' + video_id
, video_id
)
368 if video_id
.startswith('so'):
369 video_id
= self
._match
_id
(handle
.geturl())
371 api_data
= self
._parse
_json
(self
._html
_search
_regex
(
372 'data-api-data="([^"]+)"', webpage
,
373 'API data', default
='{}'), video_id
)
375 def get_video_info_web(items
):
376 return dict_get(api_data
['video'], items
)
379 video_info_xml
= self
._download
_xml
(
380 'http://ext.nicovideo.jp/api/getthumbinfo/' + video_id
,
381 video_id
, note
='Downloading video info page')
383 def get_video_info_xml(items
):
384 if not isinstance(items
, list):
387 ret
= xpath_text(video_info_xml
, './/' + item
)
391 if get_video_info_xml('error'):
392 error_code
= get_video_info_xml('code')
394 if error_code
== 'DELETED':
395 raise ExtractorError('The video has been deleted.',
397 elif error_code
== 'NOT_FOUND':
398 raise ExtractorError('The video is not found.',
400 elif error_code
== 'COMMUNITY':
401 self
.to_screen('%s: The video is community members only.' % video_id
)
403 raise ExtractorError('%s reports error: %s' % (self
.IE_NAME
, error_code
))
405 # Start extracting video formats
408 # Get HTML5 videos info
409 quality_info
= try_get(api_data
, lambda x
: x
['media']['delivery']['movie'])
411 raise ExtractorError('The video can\'t be downloaded', expected
=True)
413 for audio_quality
in quality_info
.get('audios') or {}:
414 for video_quality
in quality_info
.get('videos') or {}:
415 if not audio_quality
.get('isAvailable') or not video_quality
.get('isAvailable'):
417 formats
.append(self
._extract
_format
_for
_quality
(
418 api_data
, video_id
, audio_quality
, video_quality
))
422 video_real_url
= try_get(api_data
, lambda x
: x
['video']['smileInfo']['url'])
424 is_economy
= video_real_url
.endswith('low')
427 self
.report_warning('Site is currently in economy mode! You will only have access to lower quality streams')
429 # Invoking ffprobe to determine resolution
430 pp
= FFmpegPostProcessor(self
._downloader
)
431 cookies
= self
._get
_cookies
('https://nicovideo.jp').output(header
='', sep
='; path=/; domain=nicovideo.jp;\n')
433 self
.to_screen('%s: %s' % (video_id
, 'Checking smile format with ffprobe'))
436 metadata
= pp
.get_metadata_object(video_real_url
, ['-cookies', cookies
])
437 except PostProcessingError
as err
:
438 raise ExtractorError(err
.msg
, expected
=True)
440 v_stream
= a_stream
= {}
442 # Some complex swf files doesn't have video stream (e.g. nm4809023)
443 for stream
in metadata
['streams']:
444 if stream
['codec_type'] == 'video':
446 elif stream
['codec_type'] == 'audio':
449 # Community restricted videos seem to have issues with the thumb API not returning anything at all
451 (get_video_info_xml('size_high') if not is_economy
else get_video_info_xml('size_low'))
452 or metadata
['format']['size']
455 get_video_info_xml('movie_type')
456 or 'mp4' if 'mp4' in metadata
['format']['format_name'] else metadata
['format']['format_name']
459 # 'creation_time' tag on video stream of re-encoded SMILEVIDEO mp4 files are '1970-01-01T00:00:00.000000Z'.
461 parse_iso8601(get_video_info_web('first_retrieve'))
462 or unified_timestamp(get_video_info_web('postedDateTime'))
464 metadata_timestamp
= (
465 parse_iso8601(try_get(v_stream
, lambda x
: x
['tags']['creation_time']))
466 or timestamp
if extension
!= 'mp4' else 0
469 # According to compconf, smile videos from pre-2017 are always better quality than their DMC counterparts
470 smile_threshold_timestamp
= parse_iso8601('2016-12-08T00:00:00+09:00')
472 is_source
= timestamp
< smile_threshold_timestamp
or metadata_timestamp
> 0
474 # If movie file size is unstable, old server movie is not source movie.
477 'url': video_real_url
,
478 'format_id': 'smile' if not is_economy
else 'smile_low',
479 'format_note': 'SMILEVIDEO source' if not is_economy
else 'SMILEVIDEO low quality',
481 'container': extension
,
482 'vcodec': v_stream
.get('codec_name'),
483 'acodec': a_stream
.get('codec_name'),
484 # Some complex swf files doesn't have total bit rate metadata (e.g. nm6049209)
485 'tbr': int_or_none(metadata
['format'].get('bit_rate'), scale
=1000),
486 'vbr': int_or_none(v_stream
.get('bit_rate'), scale
=1000),
487 'abr': int_or_none(a_stream
.get('bit_rate'), scale
=1000),
488 'height': int_or_none(v_stream
.get('height')),
489 'width': int_or_none(v_stream
.get('width')),
490 'source_preference': 5 if not is_economy
else -2,
491 'quality': 5 if is_source
and not is_economy
else None,
495 self
._sort
_formats
(formats
)
497 # Start extracting information
499 get_video_info_xml('title') # prefer to get the untranslated original title
500 or get_video_info_web(['originalTitle', 'title'])
501 or self
._og
_search
_title
(webpage
, default
=None)
502 or self
._html
_search
_regex
(
503 r
'<span[^>]+class="videoHeaderTitle"[^>]*>([^<]+)</span>',
504 webpage
, 'video title'))
506 watch_api_data_string
= self
._html
_search
_regex
(
507 r
'<div[^>]+id="watchAPIDataContainer"[^>]+>([^<]+)</div>',
508 webpage
, 'watch api data', default
=None)
509 watch_api_data
= self
._parse
_json
(watch_api_data_string
, video_id
) if watch_api_data_string
else {}
510 video_detail
= watch_api_data
.get('videoDetail', {})
513 self
._html
_search
_regex
(r
'<meta property="og:image" content="([^"]+)">', webpage
, 'thumbnail data', default
=None)
514 or dict_get( # choose highest from 720p to 240p
515 get_video_info_web('thumbnail'),
516 ['ogp', 'player', 'largeUrl', 'middleUrl', 'url'])
517 or self
._html
_search
_meta
('image', webpage
, 'thumbnail', default
=None)
518 or video_detail
.get('thumbnail'))
520 description
= get_video_info_web('description')
523 match
= self
._html
_search
_meta
('datePublished', webpage
, 'date published', default
=None)
525 timestamp
= parse_iso8601(match
.replace('+', ':00+'))
526 if not timestamp
and video_detail
.get('postedAt'):
527 timestamp
= parse_iso8601(
528 video_detail
['postedAt'].replace('/', '-'),
529 delimiter
=' ', timezone
=datetime
.timedelta(hours
=9))
530 timestamp
= timestamp
or try_get(api_data
, lambda x
: parse_iso8601(x
['video']['registeredAt']))
532 view_count
= int_or_none(get_video_info_web(['view_counter', 'viewCount']))
534 match
= self
._html
_search
_regex
(
535 r
'>Views: <strong[^>]*>([^<]+)</strong>',
536 webpage
, 'view count', default
=None)
538 view_count
= int_or_none(match
.replace(',', ''))
541 or video_detail
.get('viewCount')
542 or try_get(api_data
, lambda x
: x
['video']['count']['view']))
545 int_or_none(get_video_info_web('comment_num'))
546 or video_detail
.get('commentCount')
547 or try_get(api_data
, lambda x
: x
['video']['count']['comment']))
549 if not comment_count
:
550 match
= self
._html
_search
_regex
(
551 r
'>Comments: <strong[^>]*>([^<]+)</strong>',
552 webpage
, 'comment count', default
=None)
554 comment_count
= int_or_none(match
.replace(',', ''))
556 duration
= (parse_duration(
557 get_video_info_web('length')
558 or self
._html
_search
_meta
(
559 'video:duration', webpage
, 'video duration', default
=None))
560 or video_detail
.get('length')
561 or get_video_info_web('duration'))
563 webpage_url
= get_video_info_web('watch_url') or url
565 # for channel movie and community movie
566 channel_id
= try_get(
568 (lambda x
: x
['channel']['globalId'],
569 lambda x
: x
['community']['globalId']))
572 (lambda x
: x
['channel']['name'],
573 lambda x
: x
['community']['name']))
575 # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
576 # in the JSON, which will cause None to be returned instead of {}.
577 owner
= try_get(api_data
, lambda x
: x
.get('owner'), dict) or {}
578 uploader_id
= str_or_none(
579 get_video_info_web(['ch_id', 'user_id'])
584 get_video_info_web(['ch_name', 'user_nickname'])
585 or owner
.get('nickname')
591 '_api_data': api_data
,
594 'thumbnail': thumbnail
,
595 'description': description
,
596 'uploader': uploader
,
597 'timestamp': timestamp
,
598 'uploader_id': uploader_id
,
600 'channel_id': channel_id
,
601 'view_count': view_count
,
602 'comment_count': comment_count
,
603 'duration': duration
,
604 'webpage_url': webpage_url
,
608 class NiconicoPlaylistIE(InfoExtractor
):
609 _VALID_URL
= r
'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
612 'url': 'http://www.nicovideo.jp/mylist/27411728',
615 'title': 'AKB48のオールナイトニッポン',
616 'description': 'md5:d89694c5ded4b6c693dea2db6e41aa08',
618 'uploader_id': '805442',
620 'playlist_mincount': 225,
622 'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
623 'only_matching': True,
627 'X-Frontend-ID': '6',
628 'X-Frontend-Version': '0'
631 def _real_extract(self
, url
):
632 list_id
= self
._match
_id
(url
)
634 def get_page_data(pagenum
, pagesize
):
635 return self
._download
_json
(
636 'http://nvapi.nicovideo.jp/v2/mylists/' + list_id
, list_id
,
637 query
={'page': 1 + pagenum, 'pageSize': pagesize}
,
638 headers
=self
._API
_HEADERS
).get('data').get('mylist')
640 data
= get_page_data(0, 1)
641 title
= data
.get('name')
642 description
= data
.get('description')
643 uploader
= data
.get('owner').get('name')
644 uploader_id
= data
.get('owner').get('id')
646 def pagefunc(pagenum
):
647 data
= get_page_data(pagenum
, 25)
650 'url': 'http://www.nicovideo.jp/watch/' + item
.get('watchId'),
651 } for item
in data
.get('items'))
657 'description': description
,
658 'uploader': uploader
,
659 'uploader_id': uploader_id
,
660 'entries': OnDemandPagedList(pagefunc
, 25),
664 class NiconicoUserIE(InfoExtractor
):
665 _VALID_URL
= r
'https?://(?:www\.)?nicovideo\.jp/user/(?P<id>\d+)/?(?:$|[#?])'
667 'url': 'https://www.nicovideo.jp/user/419948',
671 'playlist_mincount': 101,
673 _API_URL
= "https://nvapi.nicovideo.jp/v1/users/%s/videos?sortKey=registeredAt&sortOrder=desc&pageSize=%s&page=%s"
677 'X-Frontend-ID': '6',
678 'X-Frontend-Version': '0'
681 def _entries(self
, list_id
, ):
684 while count
< total_count
:
685 json_parsed
= self
._download
_json
(
686 self
._API
_URL
% (list_id
, self
._PAGE
_SIZE
, page_num
+ 1), list_id
,
687 headers
=self
._API
_HEADERS
,
688 note
='Downloading JSON metadata%s' % (' page %d' % page_num
if page_num
else ''))
690 total_count
= int_or_none(json_parsed
['data'].get('totalCount'))
691 for entry
in json_parsed
["data"]["items"]:
693 yield self
.url_result('https://www.nicovideo.jp/watch/%s' % entry
['id'])
696 def _real_extract(self
, url
):
697 list_id
= self
._match
_id
(url
)
698 return self
.playlist_result(self
._entries
(list_id
), list_id
, ie
=NiconicoIE
.ie_key())