]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/karaoketv.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / karaoketv.py
1 from .common import InfoExtractor
2
3
4 class KaraoketvIE(InfoExtractor):
5 _VALID_URL = r'https?://(?:www\.)?karaoketv\.co\.il/[^/]+/(?P<id>\d+)'
6 _TEST = {
7 'url': 'http://www.karaoketv.co.il/%D7%A9%D7%99%D7%A8%D7%99_%D7%A7%D7%A8%D7%99%D7%95%D7%A7%D7%99/58356/%D7%90%D7%99%D7%96%D7%95%D7%9F',
8 'info_dict': {
9 'id': '58356',
10 'ext': 'flv',
11 'title': 'קריוקי של איזון',
12 },
13 'params': {
14 # rtmp download
15 'skip_download': True,
16 }
17 }
18
19 def _real_extract(self, url):
20 video_id = self._match_id(url)
21
22 webpage = self._download_webpage(url, video_id)
23 api_page_url = self._search_regex(
24 r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.karaoke\.co\.il/api_play\.php\?.+?)\1',
25 webpage, 'API play URL', group='url')
26
27 api_page = self._download_webpage(api_page_url, video_id)
28 video_cdn_url = self._search_regex(
29 r'<iframe[^>]+src=(["\'])(?P<url>https?://www\.video-cdn\.com/embed/iframe/.+?)\1',
30 api_page, 'video cdn URL', group='url')
31
32 video_cdn = self._download_webpage(video_cdn_url, video_id)
33 play_path = self._parse_json(
34 self._search_regex(
35 r'var\s+options\s*=\s*({.+?});', video_cdn, 'options'),
36 video_id)['clip']['url']
37
38 settings = self._parse_json(
39 self._search_regex(
40 r'var\s+settings\s*=\s*({.+?});', video_cdn, 'servers', default='{}'),
41 video_id, fatal=False) or {}
42
43 servers = settings.get('servers')
44 if not servers or not isinstance(servers, list):
45 servers = ('wowzail.video-cdn.com:80/vodcdn', )
46
47 formats = [{
48 'url': 'rtmp://%s' % server if not server.startswith('rtmp') else server,
49 'play_path': play_path,
50 'app': 'vodcdn',
51 'page_url': video_cdn_url,
52 'player_url': 'http://www.video-cdn.com/assets/flowplayer/flowplayer.commercial-3.2.18.swf',
53 'rtmp_real_time': True,
54 'ext': 'flv',
55 } for server in servers]
56
57 return {
58 'id': video_id,
59 'title': self._og_search_title(webpage),
60 'formats': formats,
61 }