from __future__ import unicode_literals
import hashlib
+import itertools
import json
import re
class BilibiliChannelIE(InfoExtractor):
_VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)'
- # May need to add support for pagination? Need to find a user with many video uploads to test
- _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=1&ps=25&jsonp=jsonp"
- _TEST = {} # TODO: Add tests
+ _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
+ _TESTS = [{
+ 'url': 'https://space.bilibili.com/3985676/video',
+ 'info_dict': {},
+ 'playlist_mincount': 112,
+ }]
- def _real_extract(self, url):
- list_id = self._match_id(url)
- json_str = self._download_webpage(self._API_URL % list_id, "None")
+ def _entries(self, list_id):
+ count, max_count = 0, None
- json_parsed = json.loads(json_str)
- entries = [{
- '_type': 'url',
- 'ie_key': BiliBiliIE.ie_key(),
- 'url': ('https://www.bilibili.com/video/%s' %
- entry['bvid']),
- 'id': entry['bvid'],
- } for entry in json_parsed['data']['list']['vlist']]
+ for page_num in itertools.count(1):
+ data = self._parse_json(
+ self._download_webpage(
+ self._API_URL % (list_id, page_num), list_id,
+ note='Downloading page %d' % page_num),
+ list_id)['data']
- return {
- '_type': 'playlist',
- 'id': list_id,
- 'entries': entries
- }
+ max_count = max_count or try_get(data, lambda x: x['page']['count'])
+
+ entries = try_get(data, lambda x: x['list']['vlist'])
+ if not entries:
+ return
+ for entry in entries:
+ yield self.url_result(
+ 'https://www.bilibili.com/video/%s' % entry['bvid'],
+ BiliBiliIE.ie_key(), entry['bvid'])
+
+ count += len(entries)
+ if max_count and count >= max_count:
+ return
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ return self.playlist_result(self._entries(list_id), list_id)
class BiliBiliSearchIE(SearchInfoExtractor):