]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/lrt.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
13 class LRTIE(InfoExtractor
):
15 _VALID_URL
= r
'https?://(?:www\.)?lrt\.lt(?P<path>/mediateka/irasas/(?P<id>[0-9]+))'
18 'url': 'https://www.lrt.lt/mediateka/irasas/2000127261/greita-ir-gardu-sicilijos-ikvepta-klasikiniu-makaronu-su-baklazanais-vakariene',
19 'md5': '85cb2bb530f31d91a9c65b479516ade4',
23 'title': 'Greita ir gardu: Sicilijos įkvėpta klasikinių makaronų su baklažanais vakarienė',
24 'description': 'md5:ad7d985f51b0dc1489ba2d76d7ed47fa',
26 'timestamp': 1604079000,
27 'upload_date': '20201030',
31 'url': 'http://www.lrt.lt/mediateka/irasas/1013074524/',
32 'md5': '389da8ca3cad0f51d12bed0c844f6a0a',
36 'title': 'Kita tema 2016-09-05 15:05',
37 'description': 'md5:1b295a8fc7219ed0d543fc228c931fb5',
44 def _extract_js_var(self
, webpage
, var_name
, default
):
45 return self
._search
_regex
(
46 r
'%s\s*=\s*(["\'])((?
:(?
!\
1).)+)\
1' % var_name,
47 webpage, var_name.replace('_
', ' '), default, group=2)
49 def _real_extract(self, url):
50 path, video_id = re.match(self._VALID_URL, url).groups()
51 webpage = self._download_webpage(url, video_id)
53 media_url = self._extract_js_var(webpage, 'main_url
', path)
54 media = self._download_json(self._extract_js_var(
55 webpage, 'media_info_url
',
56 'https
://www
.lrt
.lt
/servisai
/stream_url
/vod
/media_info
/'),
57 video_id, query={'url': media_url})
58 jw_data = self._parse_jwplayer_data(
59 media['playlist_item
'], video_id, base_url=url)
61 json_ld_data = self._search_json_ld(webpage, video_id)
64 for tag in (media.get('tags
') or []):
65 tag_name = tag.get('name
')
71 'description
': clean_html(media.get('content
')),
75 return merge_dicts(clean_info, jw_data, json_ld_data)