]>
Commit | Line | Data |
---|---|---|
549cb2a8 B |
1 | import re |
2 | ||
df58ecbe S |
3 | from .common import InfoExtractor |
4 | ||
549cb2a8 B |
5 | from ..utils import ( |
6 | parse_duration, | |
7 | traverse_obj, | |
8 | unified_timestamp, | |
9 | ) | |
10 | ||
df58ecbe S |
11 | |
12 | class RTVSIE(InfoExtractor): | |
549cb2a8 | 13 | _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv(?:/\d+)?/(?P<id>\d+)/?(?:[#?]|$)' |
df58ecbe S |
14 | _TESTS = [{ |
15 | # radio archive | |
16 | 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', | |
17 | 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', | |
18 | 'info_dict': { | |
19 | 'id': '414872', | |
20 | 'ext': 'mp3', | |
549cb2a8 B |
21 | 'title': 'Ostrov pokladov 1 časť.mp3', |
22 | 'duration': 2854, | |
23 | 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0000/b1R8.rtvs.jpg', | |
24 | 'display_id': '135331', | |
df58ecbe S |
25 | } |
26 | }, { | |
27 | # tv archive | |
28 | 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', | |
df58ecbe S |
29 | 'info_dict': { |
30 | 'id': '63118', | |
31 | 'ext': 'mp4', | |
32 | 'title': 'Amaro Džives - Náš deň', | |
549cb2a8 B |
33 | 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.', |
34 | 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0031/L7Qm.amaro_dzives_png.jpg', | |
35 | 'timestamp': 1428555900, | |
36 | 'upload_date': '20150409', | |
37 | 'duration': 4986, | |
38 | } | |
39 | }, { | |
40 | # tv archive | |
41 | 'url': 'https://www.rtvs.sk/televizia/archiv/18083?utm_source=web&utm_medium=rozcestnik&utm_campaign=Robin', | |
42 | 'info_dict': { | |
43 | 'id': '18083', | |
44 | 'ext': 'mp4', | |
45 | 'title': 'Robin', | |
46 | 'description': 'md5:2f70505a7b8364491003d65ff7a0940a', | |
47 | 'timestamp': 1636652760, | |
48 | 'display_id': '307655', | |
49 | 'duration': 831, | |
50 | 'upload_date': '20211111', | |
51 | 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0916/robin.jpg', | |
df58ecbe S |
52 | } |
53 | }] | |
54 | ||
55 | def _real_extract(self, url): | |
56 | video_id = self._match_id(url) | |
57 | ||
58 | webpage = self._download_webpage(url, video_id) | |
549cb2a8 B |
59 | iframe_id = self._search_regex( |
60 | r'<iframe[^>]+id\s*=\s*"player_[^_]+_([0-9]+)"', webpage, 'Iframe ID') | |
61 | iframe_url = self._search_regex( | |
62 | fr'<iframe[^>]+id\s*=\s*"player_[^_]+_{re.escape(iframe_id)}"[^>]+src\s*=\s*"([^"]+)"', webpage, 'Iframe URL') | |
63 | ||
64 | webpage = self._download_webpage(iframe_url, video_id, 'Downloading iframe') | |
65 | json_url = self._search_regex(r'var\s+url\s*=\s*"([^"]+)"\s*\+\s*ruurl', webpage, 'json URL') | |
66 | data = self._download_json(f'https:{json_url}b=mozilla&p=win&v=97&f=0&d=1', video_id) | |
df58ecbe | 67 | |
549cb2a8 B |
68 | if data.get('clip'): |
69 | data['playlist'] = [data['clip']] | |
df58ecbe | 70 | |
549cb2a8 B |
71 | if traverse_obj(data, ('playlist', 0, 'sources', 0, 'type')) == 'audio/mp3': |
72 | formats = [{'url': traverse_obj(data, ('playlist', 0, 'sources', 0, 'src'))}] | |
73 | else: | |
74 | formats = self._extract_m3u8_formats(traverse_obj(data, ('playlist', 0, 'sources', 0, 'src')), video_id) | |
75 | self._sort_formats(formats) | |
76 | ||
77 | return { | |
78 | 'id': video_id, | |
79 | 'display_id': iframe_id, | |
80 | 'title': traverse_obj(data, ('playlist', 0, 'title')), | |
81 | 'description': traverse_obj(data, ('playlist', 0, 'description')), | |
82 | 'duration': parse_duration(traverse_obj(data, ('playlist', 0, 'length'))), | |
83 | 'thumbnail': traverse_obj(data, ('playlist', 0, 'image')), | |
84 | 'timestamp': unified_timestamp(traverse_obj(data, ('playlist', 0, 'datetime_create'))), | |
85 | 'formats': formats | |
86 | } |