]> jfr.im git - yt-dlp.git/blame - youtube_dl/extractor/lrt.py
[lrt] Add new extractor
[yt-dlp.git] / youtube_dl / extractor / lrt.py
CommitLineData
4dc19c09
NJ
1# coding: utf-8
2from __future__ import unicode_literals
3
4import re
5import json
6
7from .common import InfoExtractor
8from ..utils import (
9 determine_ext,
10 js_to_json,
11 parse_duration,
12 remove_end,
13)
14
15
16class LRTIE(InfoExtractor):
17 IE_NAME = 'lrt.lt'
18 _VALID_URL = r'https?://(?:www\.)?lrt\.lt/mediateka/irasas/(?P<id>[0-9]+)'
19 _TEST = {
20 'url': 'http://www.lrt.lt/mediateka/irasas/54391/',
21 'info_dict': {
22 'id': '54391',
23 'ext': 'mp4',
24 'title': 'Septynios Kauno dienos',
25 'description': 'Kauno miesto ir apskrities naujienos',
26 'duration': 1783,
27 },
28 'params': {
29 'skip_download': True, # HLS download
30 },
31
32 }
33
34 def _real_extract(self, url):
35 mobj = re.match(self._VALID_URL, url)
36 video_id = mobj.group('id')
37
38 webpage = self._download_webpage(url, video_id)
39
40 title = remove_end(self._og_search_title(webpage), ' - LRT')
41 thumbnail = self._og_search_thumbnail(webpage)
42 description = self._og_search_description(webpage)
43 duration = parse_duration(self._search_regex(
44 r"'duration':\s*'([^']+)',", webpage,
45 'duration', fatal=False, default=None))
46
47 formats = []
48 for js in re.findall(r'(?s)config:\s*(\{.*?\})', webpage):
49 data = json.loads(js_to_json(js))
50 if data['provider'] == 'rtmp':
51 formats.append({
52 'format_id': 'rtmp',
53 'ext': determine_ext(data['file']),
54 'url': data['streamer'],
55 'play_path': 'mp4:%s' % data['file'],
56 'preference': -1,
57 })
58 else:
59 formats.extend(
60 self._extract_m3u8_formats(data['file'], video_id, 'mp4'))
61
62 return {
63 'id': video_id,
64 'title': title,
65 'formats': formats,
66 'thumbnail': thumbnail,
67 'description': description,
68 'duration': duration,
69 }