]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/kankanews.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / kankanews.py
CommitLineData
e897bd82 1import hashlib
074b2fae 2import random
3import string
e897bd82 4import time
074b2fae 5import urllib.parse
6
7from .common import InfoExtractor
8
9
10class KankaNewsIE(InfoExtractor):
df773c3d 11 _WORKING = False
074b2fae 12 _VALID_URL = r'https?://(?:www\.)?kankanews\.com/a/\d+\-\d+\-\d+/(?P<id>\d+)\.shtml'
13 _TESTS = [{
14 'url': 'https://www.kankanews.com/a/2022-11-08/00310276054.shtml?appid=1088227',
15 'md5': '05e126513c74b1258d657452a6f4eef9',
16 'info_dict': {
17 'id': '4485057',
18 'url': 'http://mediaplay.kksmg.com/2022/11/08/h264_450k_mp4_1a388ad771e0e4cc28b0da44d245054e_ncm.mp4',
19 'ext': 'mp4',
20 'title': '视频|第23个中国记者节,我们在进博切蛋糕',
21 'thumbnail': r're:^https?://.*\.jpg*',
22 }
23 }]
24
25 def _real_extract(self, url):
26 display_id = self._match_id(url)
27 webpage = self._download_webpage(url, display_id)
28 video_id = self._search_regex(r'omsid\s*=\s*"(\d+)"', webpage, 'video id')
29
30 params = {
31 'nonce': ''.join(random.choices(string.ascii_lowercase + string.digits, k=8)),
32 'omsid': video_id,
33 'platform': 'pc',
34 'timestamp': int(time.time()),
35 'version': '1.0',
36 }
37 params['sign'] = hashlib.md5((hashlib.md5((
38 urllib.parse.urlencode(params) + '&28c8edde3d61a0411511d3b1866f0636'
39 ).encode()).hexdigest()).encode()).hexdigest()
40
41 meta = self._download_json('https://api-app.kankanews.com/kankan/pc/getvideo',
42 video_id, query=params)['result']['video']
43
44 return {
45 'id': video_id,
46 'url': meta['videourl'],
47 'title': self._search_regex(r'g\.title\s*=\s*"([^"]+)"', webpage, 'title'),
48 'thumbnail': meta.get('titlepic'),
49 }