X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/a44ca5a470e09b5170fc9c3a46733f050fadbfae..9f14daf22b4080ae1531a772ee7574959af4e2fa:/yt_dlp/extractor/niconico.py diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 4eb6ed070..210303759 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -1,6 +1,3 @@ -# coding: utf-8 -from __future__ import unicode_literals - import datetime import functools import itertools @@ -10,8 +7,6 @@ from .common import InfoExtractor, SearchInfoExtractor from ..compat import ( - compat_parse_qs, - compat_urllib_parse_urlparse, compat_HTTPError, ) from ..utils import ( @@ -35,6 +30,7 @@ update_url_query, url_or_none, urlencode_postdata, + urljoin, ) @@ -195,7 +191,7 @@ def _perform_login(self, username, password): self._request_webpage( 'https://account.nicovideo.jp/login', None, note='Acquiring Login session') - urlh = self._request_webpage( + page = self._download_webpage( 'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None, note='Logging in', errnote='Unable to log in', data=urlencode_postdata(login_form_strs), @@ -203,26 +199,39 @@ def _perform_login(self, username, password): 'Referer': 'https://account.nicovideo.jp/login', 'Content-Type': 'application/x-www-form-urlencoded', }) - if urlh is False: - login_ok = False - else: - parts = compat_urllib_parse_urlparse(urlh.geturl()) - if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login': - login_ok = False + if 'oneTimePw' in page: + post_url = self._search_regex( + r']+action=(["\'])(?P.+?)\1', page, 'post url', group='url') + page = self._download_webpage( + urljoin('https://account.nicovideo.jp', post_url), None, + note='Performing MFA', errnote='Unable to complete MFA', + data=urlencode_postdata({ + 'otp': self._get_tfa_info('6 digits code') + }), headers={ + 'Content-Type': 'application/x-www-form-urlencoded', + }) + if 'oneTimePw' in page or 'formError' in page: + err_msg = self._html_search_regex( + r'formError["\']+>(.*?)', page, 'form_error', + default='There\'s an error but the message can\'t be parsed.', + flags=re.DOTALL) + self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"') + return False + login_ok = 'class="notice error"' not in page if not login_ok: - self.report_warning('unable to log in: bad username or password') + self.report_warning('Unable to log in: bad username or password') return login_ok def _get_heartbeat_info(self, info_dict): video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/') - dmc_protocol = info_dict['_expected_protocol'] + dmc_protocol = info_dict['expected_protocol'] api_data = ( info_dict.get('_api_data') or self._parse_json( self._html_search_regex( 'data-api-data="([^"]+)"', - self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id), + self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id), 'API data', default='{}'), video_id)) @@ -369,7 +378,7 @@ def extract_video_quality(video_quality): 'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')), 'quality': -2 if 'low' in video_quality['id'] else None, 'protocol': 'niconico_dmc', - '_expected_protocol': dmc_protocol, + 'expected_protocol': dmc_protocol, # XXX: This is not a documented field 'http_headers': { 'Origin': 'https://www.nicovideo.jp', 'Referer': 'https://www.nicovideo.jp/watch/' + video_id, @@ -381,7 +390,7 @@ def _real_extract(self, url): try: webpage, handle = self._download_webpage_handle( - 'http://www.nicovideo.jp/watch/' + video_id, video_id) + 'https://www.nicovideo.jp/watch/' + video_id, video_id) if video_id.startswith('so'): video_id = self._match_id(handle.geturl()) @@ -416,8 +425,6 @@ def get_video_info(*items, get_first=True, **kwargs): if fmt: formats.append(fmt) - self._sort_formats(formats) - # Start extracting information tags = None if webpage: @@ -548,8 +555,7 @@ class NiconicoPlaylistBaseIE(InfoExtractor): } def _call_api(self, list_id, resource, query): - "Implement this in child class" - pass + raise NotImplementedError('Must be implemented in subclasses') @staticmethod def _parse_owner(item): @@ -638,14 +644,14 @@ class NiconicoSeriesIE(InfoExtractor): 'id': '110226', 'title': 'ご立派ァ!のシリーズ', }, - 'playlist_mincount': 10, # as of 2021/03/17 + 'playlist_mincount': 10, }, { 'url': 'https://www.nicovideo.jp/series/12312/', 'info_dict': { 'id': '12312', 'title': 'バトルスピリッツ お勧めカード紹介(調整中)', }, - 'playlist_mincount': 97, # as of 2021/03/17 + 'playlist_mincount': 103, }, { 'url': 'https://nico.ms/series/203559', 'only_matching': True, @@ -663,7 +669,7 @@ def _real_extract(self, url): title = unescapeHTML(title) playlist = [ self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id) - for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)] + for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)] return self.playlist_result(playlist, list_id, title) @@ -720,7 +726,7 @@ def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'): webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num}) results = re.findall(r'(?<=data-video-id=)["\']?(?P.*?)(?=["\'])', webpage) for item in results: - yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item) + yield self.url_result(f'https://www.nicovideo.jp/watch/{item}', 'Niconico', item) if not results: break