]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/lrt.py
2 from __future__
import unicode_literals
5 from .common
import InfoExtractor
12 class LRTIE(InfoExtractor
):
14 _VALID_URL
= r
'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
17 'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
18 'md5': '85cb2bb530f31d91a9c65b479516ade4',
22 'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
23 'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
25 'timestamp': 1604079000,
26 'upload_date': '20201030',
30 'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/',
31 'md5': '389da8ca3cad0f51d12bed0c844f6a0a',
35 'title': 'Kita tema 2016-09-05 15:05',
36 'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5',
43 def _extract_js_var(self
, webpage
, var_name
, default
):
44 return self
._search
_regex
(
45 r
'%s\s*=\s*(["\'])((?
:(?
!\
1).)+)\
1' % var_name,
46 webpage, var_name.replace('_
', ' '), default, group=2)
48 def _real_extract(self, url):
49 path, video_id = self._match_valid_url(url).groups()
50 webpage = self._download_webpage(url, video_id)
52 media_url = self._extract_js_var(webpage, 'main_url
', path)
53 media = self._download_json(self._extract_js_var(
54 webpage, 'media_info_url
',
55 'https
://www
.lrt
.lt
/servisai
/stream_url
/vod
/media_info
/'),
56 video_id, query={'url': media_url})
57 jw_data = self._parse_jwplayer_data(
58 media['playlist_item
'], video_id, base_url=url)
60 json_ld_data = self._search_json_ld(webpage, video_id)
63 for tag in (media.get('tags
') or []):
64 tag_name = tag.get('name
')
70 'description
': clean_html(media.get('content
')),
74 return merge_dicts(clean_info, jw_data, json_ld_data)