]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/markiza.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / markiza.py
CommitLineData
ce0edda0
S
1import re
2
3from .common import InfoExtractor
4from ..compat import compat_str
5from ..utils import (
6 orderedSet,
7 parse_duration,
8 try_get,
9)
10
11
12class MarkizaIE(InfoExtractor):
df773c3d 13 _WORKING = False
ce0edda0
S
14 _VALID_URL = r'https?://(?:www\.)?videoarchiv\.markiza\.sk/(?:video/(?:[^/]+/)*|embed/)(?P<id>\d+)(?:[_/]|$)'
15 _TESTS = [{
16 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723_oteckovia-109',
17 'md5': 'ada4e9fad038abeed971843aa028c7b0',
18 'info_dict': {
19 'id': '139078',
20 'ext': 'mp4',
21 'title': 'Oteckovia 109',
22 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
23 'thumbnail': r're:^https?://.*\.jpg$',
24 'duration': 2760,
25 },
26 }, {
27 'url': 'http://videoarchiv.markiza.sk/video/televizne-noviny/televizne-noviny/85430_televizne-noviny',
28 'info_dict': {
29 'id': '85430',
30 'title': 'Televízne noviny',
31 },
32 'playlist_count': 23,
33 }, {
34 'url': 'http://videoarchiv.markiza.sk/video/oteckovia/84723',
35 'only_matching': True,
36 }, {
37 'url': 'http://videoarchiv.markiza.sk/video/84723',
38 'only_matching': True,
39 }, {
40 'url': 'http://videoarchiv.markiza.sk/video/filmy/85190_kamenak',
41 'only_matching': True,
42 }, {
43 'url': 'http://videoarchiv.markiza.sk/video/reflex/zo-zakulisia/84651_pribeh-alzbetky',
44 'only_matching': True,
45 }, {
46 'url': 'http://videoarchiv.markiza.sk/embed/85295',
47 'only_matching': True,
48 }]
49
50 def _real_extract(self, url):
51 video_id = self._match_id(url)
52
53 data = self._download_json(
54 'http://videoarchiv.markiza.sk/json/video_jwplayer7.json',
55 video_id, query={'id': video_id})
56
57 info = self._parse_jwplayer_data(data, m3u8_id='hls', mpd_id='dash')
58
59 if info.get('_type') == 'playlist':
60 info.update({
61 'id': video_id,
62 'title': try_get(
63 data, lambda x: x['details']['name'], compat_str),
64 })
65 else:
66 info['duration'] = parse_duration(
67 try_get(data, lambda x: x['details']['duration'], compat_str))
68 return info
69
70
71class MarkizaPageIE(InfoExtractor):
df773c3d 72 _WORKING = False
ce0edda0
S
73 _VALID_URL = r'https?://(?:www\.)?(?:(?:[^/]+\.)?markiza|tvnoviny)\.sk/(?:[^/]+/)*(?P<id>\d+)_'
74 _TESTS = [{
75 'url': 'http://www.markiza.sk/soubiz/zahranicny/1923705_oteckovia-maju-svoj-den-ti-slavni-nie-su-o-nic-menej-rozkosni',
76 'md5': 'ada4e9fad038abeed971843aa028c7b0',
77 'info_dict': {
78 'id': '139355',
79 'ext': 'mp4',
80 'title': 'Oteckovia 110',
81 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
82 'thumbnail': r're:^https?://.*\.jpg$',
83 'duration': 2604,
84 },
85 'params': {
86 'skip_download': True,
87 },
88 }, {
89 'url': 'http://dajto.markiza.sk/filmy-a-serialy/1774695_frajeri-vo-vegas',
90 'only_matching': True,
91 }, {
92 'url': 'http://superstar.markiza.sk/aktualne/1923870_to-je-ale-telo-spevacka-ukazala-sexy-postavicku-v-bikinach',
93 'only_matching': True,
94 }, {
95 'url': 'http://hybsa.markiza.sk/aktualne/1923790_uzasna-atmosfera-na-hybsa-v-poprade-superstaristi-si-prve-koncerty-pred-davom-ludi-poriadne-uzili',
96 'only_matching': True,
97 }, {
98 'url': 'http://doma.markiza.sk/filmy/1885250_moja-vysnivana-svadba',
99 'only_matching': True,
100 }, {
101 'url': 'http://www.tvnoviny.sk/domace/1923887_po-smrti-manzela-ju-cakalo-poriadne-prekvapenie',
102 'only_matching': True,
103 }]
104
105 @classmethod
106 def suitable(cls, url):
107 return False if MarkizaIE.suitable(url) else super(MarkizaPageIE, cls).suitable(url)
108
109 def _real_extract(self, url):
110 playlist_id = self._match_id(url)
111
00a429be
S
112 webpage = self._download_webpage(
113 # Downloading for some hosts (e.g. dajto, doma) fails with 500
114 # although everything seems to be OK, so considering 500
115 # status code to be expected.
116 url, playlist_id, expected_status=500)
ce0edda0
S
117
118 entries = [
119 self.url_result('http://videoarchiv.markiza.sk/video/%s' % video_id)
120 for video_id in orderedSet(re.findall(
121 r'(?:initPlayer_|data-entity=["\']|id=["\']player_)(\d+)',
122 webpage))]
123
124 return self.playlist_result(entries, playlist_id)