]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/mediaklikk.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / mediaklikk.py
1 from ..utils import (
2 ExtractorError,
3 traverse_obj,
4 unified_strdate,
5 url_or_none,
6 )
7 from .common import InfoExtractor
8 from ..compat import (
9 compat_urllib_parse_unquote,
10 compat_str
11 )
12
13
14 class MediaKlikkIE(InfoExtractor):
15 _VALID_URL = r'''(?x)https?://(?:www\.)?
16 (?:mediaklikk|m4sport|hirado|petofilive)\.hu/.*?(?:videok?|cikk)/
17 (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)?
18 (?P<id>[^/#?_]+)'''
19
20 _TESTS = [{
21 # (old) mediaklikk. date in html.
22 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
23 'info_dict': {
24 'id': '4754129',
25 'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig',
26 'ext': 'mp4',
27 'upload_date': '20210901',
28 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
29 },
30 'skip': 'Webpage redirects to 404 page',
31 }, {
32 # mediaklikk. date in html.
33 'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
34 'info_dict': {
35 'id': '6696133',
36 'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
37 'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
38 'ext': 'mp4',
39 'upload_date': '20230903',
40 'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
41 }
42 }, {
43 # (old) m4sport
44 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
45 'info_dict': {
46 'id': '4754999',
47 'title': 'Gyémánt Liga, Párizs',
48 'ext': 'mp4',
49 'upload_date': '20210830',
50 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
51 },
52 'skip': 'Webpage redirects to 404 page',
53 }, {
54 # m4sport
55 'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
56 'info_dict': {
57 'id': '6711136',
58 'title': 'Atlétika – Gyémánt Liga, Brüsszel',
59 'display_id': 'atletika-gyemant-liga-brusszel',
60 'ext': 'mp4',
61 'upload_date': '20230908',
62 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg'
63 }
64 }, {
65 # m4sport with *video/ url and no date
66 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
67 'info_dict': {
68 'id': '4492099',
69 'title': 'Real Madrid - Chelsea 1-1',
70 'display_id': 'real-madrid-chelsea-1-1',
71 'ext': 'mp4',
72 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
73 }
74 }, {
75 # (old) hirado
76 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
77 'info_dict': {
78 'id': '4760120',
79 'title': 'Feltételeket szabott a főváros',
80 'ext': 'mp4',
81 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
82 },
83 'skip': 'Webpage redirects to video list page',
84 }, {
85 # hirado
86 'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
87 'info_dict': {
88 'id': '6716068',
89 'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
90 'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
91 'ext': 'mp4',
92 'upload_date': '20230911',
93 'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg'
94 }
95 }, {
96 # (old) petofilive
97 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
98 'info_dict': {
99 'id': '4571948',
100 'title': 'Tha Shudras az Akusztikban',
101 'ext': 'mp4',
102 'upload_date': '20210607',
103 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
104 },
105 'skip': 'Webpage redirects to empty page',
106 }, {
107 # petofilive
108 'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
109 'info_dict': {
110 'id': '6713233',
111 'title': 'Futball Fesztivál a Margitszigeten',
112 'display_id': 'futball-fesztival-a-margitszigeten',
113 'ext': 'mp4',
114 'upload_date': '20230909',
115 'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg'
116 }
117 }]
118
119 def _real_extract(self, url):
120 mobj = self._match_valid_url(url)
121 display_id = mobj.group('id')
122 webpage = self._download_webpage(url, display_id)
123
124 player_data_str = self._html_search_regex(
125 r'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage, 'player data')
126 player_data = self._parse_json(player_data_str, display_id, compat_urllib_parse_unquote)
127 video_id = compat_str(player_data['contentId'])
128 title = player_data.get('title') or self._og_search_title(webpage, fatal=False) or \
129 self._html_search_regex(r'<h\d+\b[^>]+\bclass="article_title">([^<]+)<', webpage, 'title')
130
131 upload_date = unified_strdate(
132 '%s-%s-%s' % (mobj.group('year'), mobj.group('month'), mobj.group('day')))
133 if not upload_date:
134 upload_date = unified_strdate(self._html_search_regex(
135 r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
136
137 player_data['video'] = player_data.pop('token')
138 player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
139 player_json = self._search_json(
140 r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
141 playlist_url = traverse_obj(
142 player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False)
143 if not playlist_url:
144 raise ExtractorError('Unable to extract playlist url')
145
146 formats = self._extract_wowza_formats(
147 playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
148
149 return {
150 'id': video_id,
151 'title': title,
152 'display_id': display_id,
153 'formats': formats,
154 'upload_date': upload_date,
155 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage)
156 }