]>
Commit | Line | Data |
---|---|---|
4dc19c09 NJ |
1 | from .common import InfoExtractor |
2 | from ..utils import ( | |
8bdd16b4 | 3 | clean_html, |
4 | merge_dicts, | |
4877f905 GS |
5 | traverse_obj, |
6 | url_or_none, | |
4dc19c09 NJ |
7 | ) |
8 | ||
9 | ||
4877f905 GS |
10 | class LRTBaseIE(InfoExtractor): |
11 | def _extract_js_var(self, webpage, var_name, default=None): | |
12 | return self._search_regex( | |
13 | fr'{var_name}\s*=\s*(["\'])((?:(?!\1).)+)\1', | |
14 | webpage, var_name.replace('_', ' '), default, group=2) | |
15 | ||
16 | ||
17 | class LRTStreamIE(LRTBaseIE): | |
18 | _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/tiesiogiai/(?P<id>[\w-]+)' | |
19 | _TESTS = [{ | |
20 | 'url': 'https://www.lrt.lt/mediateka/tiesiogiai/lrt-opus', | |
21 | 'info_dict': { | |
22 | 'id': 'lrt-opus', | |
23 | 'live_status': 'is_live', | |
24 | 'title': 're:^LRT Opus.+$', | |
25 | 'ext': 'mp4' | |
26 | } | |
27 | }] | |
28 | ||
29 | def _real_extract(self, url): | |
30 | video_id = self._match_id(url) | |
31 | webpage = self._download_webpage(url, video_id) | |
32 | streams_data = self._download_json(self._extract_js_var(webpage, 'tokenURL'), video_id) | |
33 | ||
34 | formats, subtitles = [], {} | |
35 | for stream_url in traverse_obj(streams_data, ( | |
36 | 'response', 'data', lambda k, _: k.startswith('content')), expected_type=url_or_none): | |
37 | fmts, subs = self._extract_m3u8_formats_and_subtitles(stream_url, video_id, 'mp4', m3u8_id='hls', live=True) | |
38 | formats.extend(fmts) | |
39 | subtitles = self._merge_subtitles(subtitles, subs) | |
4877f905 GS |
40 | |
41 | stream_title = self._extract_js_var(webpage, 'video_title', 'LRT') | |
42 | return { | |
43 | 'id': video_id, | |
44 | 'formats': formats, | |
45 | 'subtitles': subtitles, | |
46 | 'is_live': True, | |
47 | 'title': f'{self._og_search_title(webpage)} - {stream_title}' | |
48 | } | |
49 | ||
50 | ||
51 | class LRTVODIE(LRTBaseIE): | |
8bdd16b4 | 52 | _VALID_URL = r'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))' |
2512b174 S |
53 | _TESTS = [{ |
54 | # m3u8 download | |
8bdd16b4 | 55 | 'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene', |
4dc19c09 | 56 | 'info_dict': { |
8bdd16b4 | 57 | 'id': '2000127261', |
4dc19c09 | 58 | 'ext': 'mp4', |
8bdd16b4 | 59 | 'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė', |
60 | 'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa', | |
61 | 'duration': 3035, | |
62 | 'timestamp': 1604079000, | |
63 | 'upload_date': '20201030', | |
4877f905 GS |
64 | 'tags': ['LRT TELEVIZIJA', 'Beatos virtuvė', 'Beata Nicholson', 'Makaronai', 'Baklažanai', 'Vakarienė', 'Receptas'], |
65 | 'thumbnail': 'https://www.lrt.lt/img/2020/10/30/764041-126478-1287x836.jpg' | |
4dc19c09 | 66 | }, |
2512b174 S |
67 | }, { |
68 | # direct mp3 download | |
69 | 'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/', | |
70 | 'md5': '389da8ca3cad0f51d12bed0c844f6a0a', | |
71 | 'info_dict': { | |
72 | 'id': '1013074524', | |
73 | 'ext': 'mp3', | |
74 | 'title': 'Kita tema 2016-09-05 15:05', | |
75 | 'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5', | |
76 | 'duration': 3008, | |
77 | 'view_count': int, | |
78 | 'like_count': int, | |
4dc19c09 | 79 | }, |
2512b174 | 80 | }] |
4dc19c09 NJ |
81 | |
82 | def _real_extract(self, url): | |
5ad28e7f | 83 | path, video_id = self._match_valid_url(url).groups() |
4dc19c09 NJ |
84 | webpage = self._download_webpage(url, video_id) |
85 | ||
8bdd16b4 | 86 | media_url = self._extract_js_var(webpage, 'main_url', path) |
87 | media = self._download_json(self._extract_js_var( | |
88 | webpage, 'media_info_url', | |
89 | 'https://www.lrt.lt/servisai/stream_url/vod/media_info/'), | |
90 | video_id, query={'url': media_url}) | |
91 | jw_data = self._parse_jwplayer_data( | |
92 | media['playlist_item'], video_id, base_url=url) | |
f7e1d82d | 93 | |
8bdd16b4 | 94 | json_ld_data = self._search_json_ld(webpage, video_id) |
4dc19c09 | 95 | |
8bdd16b4 | 96 | tags = [] |
97 | for tag in (media.get('tags') or []): | |
98 | tag_name = tag.get('name') | |
99 | if not tag_name: | |
100 | continue | |
101 | tags.append(tag_name) | |
15aad84d | 102 | |
8bdd16b4 | 103 | clean_info = { |
104 | 'description': clean_html(media.get('content')), | |
105 | 'tags': tags, | |
4dc19c09 | 106 | } |
8bdd16b4 | 107 | |
108 | return merge_dicts(clean_info, jw_data, json_ld_data) |