[extractor] Deprecate `_sort_formats`

[yt-dlp.git] / yt_dlp / extractor / niconico.py
diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py

index 4eb6ed070ad493f939c2dceb9cbf0dfa7586fa71..2103037596db0a90207c19503e5a7c5bdeab252d 100644 (file)
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@@ -1,6 +1,3 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
  import datetime
  import functools
  import itertools
@@ -10,8 +7,6 @@
  
  from .common import InfoExtractor, SearchInfoExtractor
  from ..compat import (
-    compat_parse_qs,
-    compat_urllib_parse_urlparse,
      compat_HTTPError,
  )
  from ..utils import (
@@ -35,6 +30,7 @@
      update_url_query,
      url_or_none,
      urlencode_postdata,
+    urljoin,
  )
  
  
@@ -195,7 +191,7 @@ def _perform_login(self, username, password):
          self._request_webpage(
              'https://account.nicovideo.jp/login', None,
              note='Acquiring Login session')
-        urlh = self._request_webpage(
+        page = self._download_webpage(
              'https://account.nicovideo.jp/login/redirector?show_button_twitter=1&site=niconico&show_button_facebook=1', None,
              note='Logging in', errnote='Unable to log in',
              data=urlencode_postdata(login_form_strs),
@@ -203,26 +199,39 @@ def _perform_login(self, username, password):
                  'Referer': 'https://account.nicovideo.jp/login',
                  'Content-Type': 'application/x-www-form-urlencoded',
              })
-        if urlh is False:
-            login_ok = False
-        else:
-            parts = compat_urllib_parse_urlparse(urlh.geturl())
-            if compat_parse_qs(parts.query).get('message', [None])[0] == 'cant_login':
-                login_ok = False
+        if 'oneTimePw' in page:
+            post_url = self._search_regex(
+                r'<form[^>]+action=(["\'])(?P<url>.+?)\1', page, 'post url', group='url')
+            page = self._download_webpage(
+                urljoin('https://account.nicovideo.jp', post_url), None,
+                note='Performing MFA', errnote='Unable to complete MFA',
+                data=urlencode_postdata({
+                    'otp': self._get_tfa_info('6 digits code')
+                }), headers={
+                    'Content-Type': 'application/x-www-form-urlencoded',
+                })
+            if 'oneTimePw' in page or 'formError' in page:
+                err_msg = self._html_search_regex(
+                    r'formError["\']+>(.*?)</div>', page, 'form_error',
+                    default='There\'s an error but the message can\'t be parsed.',
+                    flags=re.DOTALL)
+                self.report_warning(f'Unable to log in: MFA challenge failed, "{err_msg}"')
+                return False
+        login_ok = 'class="notice error"' not in page
          if not login_ok:
-            self.report_warning('unable to log in: bad username or password')
+            self.report_warning('Unable to log in: bad username or password')
          return login_ok
  
      def _get_heartbeat_info(self, info_dict):
          video_id, video_src_id, audio_src_id = info_dict['url'].split(':')[1].split('/')
-        dmc_protocol = info_dict['_expected_protocol']
+        dmc_protocol = info_dict['expected_protocol']
  
          api_data = (
              info_dict.get('_api_data')
              or self._parse_json(
                  self._html_search_regex(
                      'data-api-data="([^"]+)"',
-                    self._download_webpage('http://www.nicovideo.jp/watch/' + video_id, video_id),
+                    self._download_webpage('https://www.nicovideo.jp/watch/' + video_id, video_id),
                      'API data', default='{}'),
                  video_id))
  
@@ -369,7 +378,7 @@ def extract_video_quality(video_quality):
              'width': traverse_obj(video_quality, ('metadata', 'resolution', 'width')),
              'quality': -2 if 'low' in video_quality['id'] else None,
              'protocol': 'niconico_dmc',
-            '_expected_protocol': dmc_protocol,
+            'expected_protocol': dmc_protocol,  # XXX: This is not a documented field
              'http_headers': {
                  'Origin': 'https://www.nicovideo.jp',
                  'Referer': 'https://www.nicovideo.jp/watch/' + video_id,
@@ -381,7 +390,7 @@ def _real_extract(self, url):
  
          try:
              webpage, handle = self._download_webpage_handle(
-                'http://www.nicovideo.jp/watch/' + video_id, video_id)
+                'https://www.nicovideo.jp/watch/' + video_id, video_id)
              if video_id.startswith('so'):
                  video_id = self._match_id(handle.geturl())
  
@@ -416,8 +425,6 @@ def get_video_info(*items, get_first=True, **kwargs):
              if fmt:
                  formats.append(fmt)
  
-        self._sort_formats(formats)
-
          # Start extracting information
          tags = None
          if webpage:
@@ -548,8 +555,7 @@ class NiconicoPlaylistBaseIE(InfoExtractor):
      }
  
      def _call_api(self, list_id, resource, query):
-        "Implement this in child class"
-        pass
+        raise NotImplementedError('Must be implemented in subclasses')
  
      @staticmethod
      def _parse_owner(item):
@@ -638,14 +644,14 @@ class NiconicoSeriesIE(InfoExtractor):
              'id': '110226',
              'title': 'ご立派ァ！のシリーズ',
          },
-        'playlist_mincount': 10,  # as of 2021/03/17
+        'playlist_mincount': 10,
      }, {
          'url': 'https://www.nicovideo.jp/series/12312/',
          'info_dict': {
              'id': '12312',
              'title': 'バトルスピリッツ　お勧めカード紹介(調整中)',
          },
-        'playlist_mincount': 97,  # as of 2021/03/17
+        'playlist_mincount': 103,
      }, {
          'url': 'https://nico.ms/series/203559',
          'only_matching': True,
@@ -663,7 +669,7 @@ def _real_extract(self, url):
              title = unescapeHTML(title)
          playlist = [
              self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
-            for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)]
+            for v_id in re.findall(r'data-href=[\'"](?:https://www\.nicovideo\.jp)?/watch/([a-z0-9]+)', webpage)]
          return self.playlist_result(playlist, list_id, title)
  
  
@@ -720,7 +726,7 @@ def _entries(self, url, item_id, query=None, note='Downloading page %(page)s'):
              webpage = self._download_webpage(url, item_id, query=query, note=note % {'page': page_num})
              results = re.findall(r'(?<=data-video-id=)["\']?(?P<videoid>.*?)(?=["\'])', webpage)
              for item in results:
-                yield self.url_result(f'http://www.nicovideo.jp/watch/{item}', 'Niconico', item)
+                yield self.url_result(f'https://www.nicovideo.jp/watch/{item}', 'Niconico', item)
              if not results:
                  break