]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/ketnet.py
[extractor/generic] Avoid catastrophic backtracking in KVS regex
[yt-dlp.git] / yt_dlp / extractor / ketnet.py
1 from .canvas import CanvasIE
2 from .common import InfoExtractor
3 from ..compat import compat_urllib_parse_unquote
4 from ..utils import (
5 int_or_none,
6 parse_iso8601,
7 )
8
9
10 class KetnetIE(InfoExtractor):
11 _VALID_URL = r'https?://(?:www\.)?ketnet\.be/(?P<id>(?:[^/]+/)*[^/?#&]+)'
12 _TESTS = [{
13 'url': 'https://www.ketnet.be/kijken/n/nachtwacht/3/nachtwacht-s3a1-de-greystook',
14 'md5': '37b2b7bb9b3dcaa05b67058dc3a714a9',
15 'info_dict': {
16 'id': 'pbs-pub-aef8b526-115e-4006-aa24-e59ff6c6ef6f$vid-ddb815bf-c8e7-467b-8879-6bad7a32cebd',
17 'ext': 'mp4',
18 'title': 'Nachtwacht - Reeks 3: Aflevering 1',
19 'description': 'De Nachtwacht krijgt te maken met een parasiet',
20 'thumbnail': r're:^https?://.*\.jpg$',
21 'duration': 1468.02,
22 'timestamp': 1609225200,
23 'upload_date': '20201229',
24 'series': 'Nachtwacht',
25 'season': 'Reeks 3',
26 'episode': 'De Greystook',
27 'episode_number': 1,
28 },
29 'expected_warnings': ['is not a supported codec', 'Unknown MIME type'],
30 }, {
31 'url': 'https://www.ketnet.be/themas/karrewiet/jaaroverzicht-20200/karrewiet-het-jaar-van-black-mamba',
32 'only_matching': True,
33 }]
34
35 def _real_extract(self, url):
36 display_id = self._match_id(url)
37
38 video = self._download_json(
39 'https://senior-bff.ketnet.be/graphql', display_id, query={
40 'query': '''{
41 video(id: "content/ketnet/nl/%s.model.json") {
42 description
43 episodeNr
44 imageUrl
45 mediaReference
46 programTitle
47 publicationDate
48 seasonTitle
49 subtitleVideodetail
50 titleVideodetail
51 }
52 }''' % display_id,
53 })['data']['video']
54
55 mz_id = compat_urllib_parse_unquote(video['mediaReference'])
56
57 return {
58 '_type': 'url_transparent',
59 'id': mz_id,
60 'title': video['titleVideodetail'],
61 'url': 'https://mediazone.vrt.be/api/v1/ketnet/assets/' + mz_id,
62 'thumbnail': video.get('imageUrl'),
63 'description': video.get('description'),
64 'timestamp': parse_iso8601(video.get('publicationDate')),
65 'series': video.get('programTitle'),
66 'season': video.get('seasonTitle'),
67 'episode': video.get('subtitleVideodetail'),
68 'episode_number': int_or_none(video.get('episodeNr')),
69 'ie_key': CanvasIE.ie_key(),
70 }