]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..utils import ( | |
5 | parse_duration, | |
6 | traverse_obj, | |
7 | unified_timestamp, | |
8 | ) | |
9 | ||
10 | ||
11 | class RTVSIE(InfoExtractor): | |
12 | _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv(?:/\d+)?/(?P<id>\d+)/?(?:[#?]|$)' | |
13 | _TESTS = [{ | |
14 | # radio archive | |
15 | 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', | |
16 | 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', | |
17 | 'info_dict': { | |
18 | 'id': '414872', | |
19 | 'ext': 'mp3', | |
20 | 'title': 'Ostrov pokladov 1 časť.mp3', | |
21 | 'duration': 2854, | |
22 | 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0000/b1R8.rtvs.jpg', | |
23 | 'display_id': '135331', | |
24 | }, | |
25 | }, { | |
26 | # tv archive | |
27 | 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', | |
28 | 'info_dict': { | |
29 | 'id': '63118', | |
30 | 'ext': 'mp4', | |
31 | 'title': 'Amaro Džives - Náš deň', | |
32 | 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.', | |
33 | 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0031/L7Qm.amaro_dzives_png.jpg', | |
34 | 'timestamp': 1428555900, | |
35 | 'upload_date': '20150409', | |
36 | 'duration': 4986, | |
37 | }, | |
38 | }, { | |
39 | # tv archive | |
40 | 'url': 'https://www.rtvs.sk/televizia/archiv/18083?utm_source=web&utm_medium=rozcestnik&utm_campaign=Robin', | |
41 | 'info_dict': { | |
42 | 'id': '18083', | |
43 | 'ext': 'mp4', | |
44 | 'title': 'Robin', | |
45 | 'description': 'md5:2f70505a7b8364491003d65ff7a0940a', | |
46 | 'timestamp': 1636652760, | |
47 | 'display_id': '307655', | |
48 | 'duration': 831, | |
49 | 'upload_date': '20211111', | |
50 | 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0916/robin.jpg', | |
51 | }, | |
52 | }] | |
53 | ||
54 | def _real_extract(self, url): | |
55 | video_id = self._match_id(url) | |
56 | ||
57 | webpage = self._download_webpage(url, video_id) | |
58 | iframe_id = self._search_regex( | |
59 | r'<iframe[^>]+id\s*=\s*"player_[^_]+_([0-9]+)"', webpage, 'Iframe ID') | |
60 | iframe_url = self._search_regex( | |
61 | fr'<iframe[^>]+id\s*=\s*"player_[^_]+_{re.escape(iframe_id)}"[^>]+src\s*=\s*"([^"]+)"', webpage, 'Iframe URL') | |
62 | ||
63 | webpage = self._download_webpage(iframe_url, video_id, 'Downloading iframe') | |
64 | json_url = self._search_regex(r'var\s+url\s*=\s*"([^"]+)"\s*\+\s*ruurl', webpage, 'json URL') | |
65 | data = self._download_json(f'https:{json_url}b=mozilla&p=win&v=97&f=0&d=1', video_id) | |
66 | ||
67 | if data.get('clip'): | |
68 | data['playlist'] = [data['clip']] | |
69 | ||
70 | if traverse_obj(data, ('playlist', 0, 'sources', 0, 'type')) == 'audio/mp3': | |
71 | formats = [{'url': traverse_obj(data, ('playlist', 0, 'sources', 0, 'src'))}] | |
72 | else: | |
73 | formats = self._extract_m3u8_formats(traverse_obj(data, ('playlist', 0, 'sources', 0, 'src')), video_id) | |
74 | ||
75 | return { | |
76 | 'id': video_id, | |
77 | 'display_id': iframe_id, | |
78 | 'title': traverse_obj(data, ('playlist', 0, 'title')), | |
79 | 'description': traverse_obj(data, ('playlist', 0, 'description')), | |
80 | 'duration': parse_duration(traverse_obj(data, ('playlist', 0, 'length'))), | |
81 | 'thumbnail': traverse_obj(data, ('playlist', 0, 'image')), | |
82 | 'timestamp': unified_timestamp(traverse_obj(data, ('playlist', 0, 'datetime_create'))), | |
83 | 'formats': formats, | |
84 | } |