]> jfr.im git - yt-dlp.git/commitdiff
[niconico] Fix extraction of thumbnails and uploader (#3266)
authorLesmiscore (Naoya Ozaki) <redacted>
Fri, 1 Apr 2022 10:31:58 +0000 (19:31 +0900)
committerGitHub <redacted>
Fri, 1 Apr 2022 10:31:58 +0000 (19:31 +0900)
yt_dlp/extractor/niconico.py
yt_dlp/utils.py

index 74828f8331be29140c7b3c3cd87bf47ba5bee1d5..a5a1a01e09a17f3c64a6ce2e0000319d7a2987e1 100644 (file)
     parse_duration,
     parse_filesize,
     parse_iso8601,
+    parse_resolution,
+    qualities,
     remove_start,
+    str_or_none,
     traverse_obj,
     try_get,
     unescapeHTML,
@@ -430,18 +433,25 @@ def get_video_info(*items, get_first=True, **kwargs):
             # find in json (logged in)
             tags = traverse_obj(api_data, ('tag', 'items', ..., 'name'))
 
+        thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp'])
+
         return {
             'id': video_id,
             '_api_data': api_data,
             'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None),
             'formats': formats,
-            'thumbnail': get_video_info('thumbnail', 'url') or self._html_search_meta(
-                ('image', 'og:image'), webpage, 'thumbnail', default=None),
+            'thumbnails': [{
+                'id': key,
+                'url': url,
+                'ext': 'jpg',
+                'preference': thumb_prefs(key),
+                **parse_resolution(url, lenient=True),
+            } for key, url in (get_video_info('thumbnail') or {}).items() if url],
             'description': clean_html(get_video_info('description')),
-            'uploader': traverse_obj(api_data, ('owner', 'nickname')),
+            'uploader': traverse_obj(api_data, ('owner', 'nickname'), ('channel', 'name'), ('community', 'name')),
+            'uploader_id': str_or_none(traverse_obj(api_data, ('owner', 'id'), ('channel', 'id'), ('community', 'id'))),
             'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601(
                 self._html_search_meta('video:release_date', webpage, 'date published', default=None)),
-            'uploader_id': traverse_obj(api_data, ('owner', 'id')),
             'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')),
             'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')),
             'view_count': int_or_none(get_video_info('count', 'view')),
index a2fa29afe71bb56869e4ffe837d1133a9822f888..ce918750d4285e491bbe69b905e4fa8816b3abd1 100644 (file)
@@ -2418,11 +2418,14 @@ def parse_count(s):
         return str_to_int(mobj.group(1))
 
 
-def parse_resolution(s):
+def parse_resolution(s, *, lenient=False):
     if s is None:
         return {}
 
-    mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
+    if lenient:
+        mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
+    else:
+        mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
     if mobj:
         return {
             'width': int(mobj.group('w')),