]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/kanal2.py
[extractor/youtube] Extract DRC formats
[yt-dlp.git] / yt_dlp / extractor / kanal2.py
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 join_nonempty,
5 traverse_obj,
6 unified_timestamp,
7 update_url_query,
8 )
9
10
11 class Kanal2IE(InfoExtractor):
12 _VALID_URL = r'https?://kanal2\.postimees\.ee/[^?#]+\?([^#]+&)?id=(?P<id>\d+)'
13 _TESTS = [{
14 'note': 'Test standard url (#5575)',
15 'url': 'https://kanal2.postimees.ee/pluss/video/?id=40792',
16 'md5': '7ea7b16266ec1798743777df241883dd',
17 'info_dict': {
18 'id': '40792',
19 'ext': 'mp4',
20 'title': 'Aedniku aabits / Osa 53 (05.08.2016 20:00)',
21 'thumbnail': r're:https?://.*\.jpg$',
22 'description': 'md5:53cabf3c5d73150d594747f727431248',
23 'upload_date': '20160805',
24 'timestamp': 1470420000,
25 },
26 }]
27
28 def _real_extract(self, url):
29 video_id = self._match_id(url)
30 playlist = self._download_json(
31 f'https://kanal2.postimees.ee/player/playlist/{video_id}',
32 video_id, query={'type': 'episodes'},
33 headers={'X-Requested-With': 'XMLHttpRequest'})
34
35 return {
36 'id': video_id,
37 'title': join_nonempty(*traverse_obj(playlist, ('info', ('title', 'subtitle'))), delim=' / '),
38 'description': traverse_obj(playlist, ('info', 'description')),
39 'thumbnail': traverse_obj(playlist, ('data', 'image')),
40 'formats': self.get_formats(playlist, video_id),
41 'timestamp': unified_timestamp(self._search_regex(
42 r'\((\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2})\)$',
43 traverse_obj(playlist, ('info', 'subtitle')), 'timestamp', default='') + ' +0200'),
44 }
45
46 def get_formats(self, playlist, video_id):
47 path = traverse_obj(playlist, ('data', 'path'))
48 if not path:
49 raise ExtractorError('Path value not found in playlist JSON response')
50 session = self._download_json(
51 'https://sts.postimees.ee/session/register',
52 video_id, note='Creating session', errnote='Error creating session',
53 headers={
54 'X-Original-URI': path,
55 'Accept': 'application/json',
56 })
57 if session.get('reason') != 'OK' or not session.get('session'):
58 reason = session.get('reason', 'unknown error')
59 raise ExtractorError(f'Unable to obtain session: {reason}')
60
61 formats = []
62 for stream in traverse_obj(playlist, ('data', 'streams', ..., 'file')):
63 formats.extend(self._extract_m3u8_formats(
64 update_url_query(stream, {'s': session['session']}), video_id, 'mp4'))
65
66 return formats