]> jfr.im git - yt-dlp.git/commitdiff
[BilibiliChannel] Fix pagination
authorpukkandan <redacted>
Wed, 21 Apr 2021 22:43:04 +0000 (04:13 +0530)
committerpukkandan <redacted>
Wed, 21 Apr 2021 22:49:33 +0000 (04:19 +0530)
Closes #222

https://github.com/nao20010128nao/ytdl-patched/commit/ccca21d7f57f1330a205f07d369e31883ba006c2
Coauthored by: nao20010128nao, pukkandan

yt_dlp/extractor/bilibili.py

index 0029a7e49bd3934f316ab351c484c21e2c8d73ec..b7b60b77ebb6b2f1c5e187168d145e253e0767e5 100644 (file)
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import hashlib
+import itertools
 import json
 import re
 
@@ -498,28 +499,40 @@ def _real_extract(self, url):
 
 class BilibiliChannelIE(InfoExtractor):
     _VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)'
-    # May need to add support for pagination? Need to find a user with many video uploads to test
-    _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=1&ps=25&jsonp=jsonp"
-    _TEST = {}  # TODO: Add tests
+    _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
+    _TESTS = [{
+        'url': 'https://space.bilibili.com/3985676/video',
+        'info_dict': {},
+        'playlist_mincount': 112,
+    }]
 
-    def _real_extract(self, url):
-        list_id = self._match_id(url)
-        json_str = self._download_webpage(self._API_URL % list_id, "None")
+    def _entries(self, list_id):
+        count, max_count = 0, None
 
-        json_parsed = json.loads(json_str)
-        entries = [{
-            '_type': 'url',
-            'ie_key': BiliBiliIE.ie_key(),
-            'url': ('https://www.bilibili.com/video/%s' %
-                    entry['bvid']),
-            'id': entry['bvid'],
-        } for entry in json_parsed['data']['list']['vlist']]
+        for page_num in itertools.count(1):
+            data = self._parse_json(
+                self._download_webpage(
+                    self._API_URL % (list_id, page_num), list_id,
+                    note='Downloading page %d' % page_num),
+                list_id)['data']
 
-        return {
-            '_type': 'playlist',
-            'id': list_id,
-            'entries': entries
-        }
+            max_count = max_count or try_get(data, lambda x: x['page']['count'])
+
+            entries = try_get(data, lambda x: x['list']['vlist'])
+            if not entries:
+                return
+            for entry in entries:
+                yield self.url_result(
+                    'https://www.bilibili.com/video/%s' % entry['bvid'],
+                    BiliBiliIE.ie_key(), entry['bvid'])
+
+            count += len(entries)
+            if max_count and count >= max_count:
+                return
+
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+        return self.playlist_result(self._entries(list_id), list_id)
 
 
 class BiliBiliSearchIE(SearchInfoExtractor):