X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/a44ca5a470e09b5170fc9c3a46733f050fadbfae..9f14daf22b4080ae1531a772ee7574959af4e2fa:/yt_dlp/extractor/niconico.py

diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py
index 4eb6ed070..210303759 100644
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
 import datetime
 import functools
 import itertools
@@ -10,8 +7,6 @@
 
 from .common import InfoExtractor, SearchInfoExtractor
 from ..compat import (
-    compat_parse_qs,
-    compat_urllib_parse_urlparse,
     compat_HTTPError,
 )
 from ..utils import (
@@ -35,6 +30,7 @@
     update_url_query,
     url_or_none,
     urlencode_postdata,
+    urljoin,
 )
 
 
@@ -195,7 +191,7 @@ def _perform_login(self, username, password):
         self._request_webpage(
             'https://account.nicovideo.jp/login', None,
             note='Acquiring Login session')
-        urlh = self._request_webpage(
+        page = self._download_webpage(
             'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
             note='Logging in', errnote='Unable to log in',
             data=urlencode_postdata(login_form_strs),
@@ -203,26 +199,39 @@ def _perform_login(self, username, password):
                 'Referer': 'https://account.nicovideo.jp/login',
                 'Content-Type': 'application/x-www-form-urlencoded',
             })
-        if urlh is False:
-            login_ok = False
-        else:
-            parts = compat_urllib_parse_urlparse(urlh.geturl())
-            if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
-                login_ok = False
+        if 'oneTimePw' in page:
+            post_url = self._search_regex(
+                r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url')
+            page = self._download_webpage(
+                urljoin('https://account.nicovideo.jp', post_url), None,
+                note='Performing MFA', errnote='Unable to complete MFA',
+                data=urlencode_postdata({
+                    'otp': self._get_tfa_info('6 digits code')
+                }), headers={
+                    'Content-Type': 'application/x-www-form-urlencoded',
+                })
+            if 'oneTimePw' in page or 'formError' in page:
+                err_msg = self._html_search_regex(
+                    r'formError["\']+>(.*?)</div>', page, 'form_error',
+                    default='There\'s an error but the message can\'t be parsed.',
+                    flags=re.DOTALL)
+                self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"')
+                return False
+        login_ok = 'class="notice error"' not in page
         if not login_ok:
-            self.report_warning('unable to log in: bad username or password')
+            self.report_warning('Unable to log in: bad username or password')
         return login_ok
 
     def _get_heartbeat_info(self, info_dict):
         video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
-        dmc_protocol = info_dict['_expected_protocol']
+        dmc_protocol = info_dict['expected_protocol']
 
         api_data = (
             info_dict.get('_api_data')
             or self._parse_json(
                 self._html_search_regex(
                     'data-api-data="([^"]+)"',
-                    self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
+                    self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id),
                     'API data', default='{}'),
                 video_id))
 
@@ -369,7 +378,7 @@ def extract_video_quality(video_quality):
             'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')),
             'quality': -2 if 'low' in video_quality['id'] else None,
             'protocol': 'niconico_dmc',
-            '_expected_protocol': dmc_protocol,
+            'expected_protocol': dmc_protocol,  # XXX: This is not a documented field
             'http_headers': {
                 'Origin': 'https://www.nicovideo.jp',
                 'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
@@ -381,7 +390,7 @@ def _real_extract(self, url):
 
         try:
             webpage, handle = self._download_webpage_handle(
-                'http://www.nicovideo.jp/watch/' + video_id, video_id)
+                'https://www.nicovideo.jp/watch/' + video_id, video_id)
             if video_id.startswith('so'):
                 video_id = self._match_id(handle.geturl())
 
@@ -416,8 +425,6 @@ def get_video_info(*items, get_first=True, **kwargs):
             if fmt:
                 formats.append(fmt)
 
-        self._sort_formats(formats)
-
         # Start extracting information
         tags = None
         if webpage:
@@ -548,8 +555,7 @@ class NiconicoPlaylistBaseIE(InfoExtractor):
     }
 
     def _call_api(self, list_id, resource, query):
-        "Implement this in child class"
-        pass
+        raise NotImplementedError('Must be implemented in subclasses')
 
     @staticmethod
     def _parse_owner(item):
@@ -638,14 +644,14 @@ class NiconicoSeriesIE(InfoExtractor):
             'id': '110226',
             'title': 'ãç«æ´¾ã¡ï¼ã®ã·ãªã¼ãº',
         },
-        'playlist_mincount': 10,  # as of 2021/03/17
+        'playlist_mincount': 10,
     }, {
         'url': 'https://www.nicovideo.jp/series/12312/',
         'info_dict': {
             'id': '12312',
             'title': 'ããã«ã¹ããªããããå§ãã«ã¼ãç´¹ä»(èª¿æ´ä¸­)',
         },
-        'playlist_mincount': 97,  # as of 2021/03/17
+        'playlist_mincount': 103,
     }, {
         'url': 'https://nico.ms/series/203559',
         'only_matching': True,
@@ -663,7 +669,7 @@ def _real_extract(self, url):
             title = unescapeHTML(title)
         playlist = [
             self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
-            for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)]
+            for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)]
         return self.playlist_result(playlist, list_id, title)
 
 
@@ -720,7 +726,7 @@ def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
             webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
             results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage)
             for item in results:
-                yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item)
+                yield self.url_result(f'https://www.nicovideo.jp/watch/{item}', 'Niconico', item)
             if not results:
                 break