]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/mediaklikk.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / mediaklikk.py
CommitLineData
add96eb9 1import urllib.parse
2
e897bd82 3from .common import InfoExtractor
bccdbd22 4from ..utils import (
6e07e4bc 5 ExtractorError,
6 traverse_obj,
7 unified_strdate,
8 url_or_none,
bccdbd22 9)
bccdbd22 10
11
12class MediaKlikkIE(InfoExtractor):
f7b558df 13 _VALID_URL = r'''(?x)https?://(?:www\.)?
14 (?:mediaklikk|m4sport|hirado|petofilive)\.hu/.*?(?:videok?|cikk)/
bccdbd22 15 (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)?
16 (?P<id>[^/#?_]+)'''
17
18 _TESTS = [{
6e07e4bc 19 # (old) mediaklikk. date in html.
bccdbd22 20 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
21 'info_dict': {
22 'id': '4754129',
23 'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig',
24 'ext': 'mp4',
25 'upload_date': '20210901',
add96eb9 26 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
6e07e4bc 27 },
28 'skip': 'Webpage redirects to 404 page',
29 }, {
30 # mediaklikk. date in html.
31 'url': 'https://mediaklikk.hu/video/hazajaro-fabova-hegyseg-kishont-koronaja/',
32 'info_dict': {
33 'id': '6696133',
34 'title': 'Hazajáró, Fabova-hegység - Kishont koronája',
35 'display_id': 'hazajaro-fabova-hegyseg-kishont-koronaja',
36 'ext': 'mp4',
37 'upload_date': '20230903',
add96eb9 38 'thumbnail': 'https://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg',
39 },
bccdbd22 40 }, {
6e07e4bc 41 # (old) m4sport
bccdbd22 42 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
43 'info_dict': {
44 'id': '4754999',
45 'title': 'Gyémánt Liga, Párizs',
46 'ext': 'mp4',
47 'upload_date': '20210830',
add96eb9 48 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg',
6e07e4bc 49 },
50 'skip': 'Webpage redirects to 404 page',
51 }, {
52 # m4sport
53 'url': 'https://m4sport.hu/sportkozvetitesek/video/2023/09/08/atletika-gyemant-liga-brusszel/',
54 'info_dict': {
55 'id': '6711136',
56 'title': 'Atlétika – Gyémánt Liga, Brüsszel',
57 'display_id': 'atletika-gyemant-liga-brusszel',
58 'ext': 'mp4',
59 'upload_date': '20230908',
add96eb9 60 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-08-22h43m18s691.jpg',
61 },
bccdbd22 62 }, {
63 # m4sport with *video/ url and no date
64 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
65 'info_dict': {
66 'id': '4492099',
67 'title': 'Real Madrid - Chelsea 1-1',
6e07e4bc 68 'display_id': 'real-madrid-chelsea-1-1',
bccdbd22 69 'ext': 'mp4',
add96eb9 70 'thumbnail': 'https://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png',
71 },
bccdbd22 72 }, {
6e07e4bc 73 # (old) hirado
bccdbd22 74 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
75 'info_dict': {
76 'id': '4760120',
77 'title': 'Feltételeket szabott a főváros',
78 'ext': 'mp4',
add96eb9 79 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg',
6e07e4bc 80 },
81 'skip': 'Webpage redirects to video list page',
82 }, {
83 # hirado
84 'url': 'https://hirado.hu/belfold/video/2023/09/11/marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
85 'info_dict': {
86 'id': '6716068',
87 'title': 'Marad az éves elszámolás a napelemekre beruházó családoknál',
88 'display_id': 'marad-az-eves-elszamolas-a-napelemekre-beruhazo-csaladoknal',
89 'ext': 'mp4',
90 'upload_date': '20230911',
add96eb9 91 'thumbnail': 'https://hirado.hu/wp-content/uploads/sites/4/2023/09/vlcsnap-2023-09-11-09h16m09s882.jpg',
92 },
bccdbd22 93 }, {
6e07e4bc 94 # (old) petofilive
bccdbd22 95 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
96 'info_dict': {
97 'id': '4571948',
98 'title': 'Tha Shudras az Akusztikban',
99 'ext': 'mp4',
100 'upload_date': '20210607',
add96eb9 101 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg',
6e07e4bc 102 },
103 'skip': 'Webpage redirects to empty page',
104 }, {
105 # petofilive
106 'url': 'https://petofilive.hu/video/2023/09/09/futball-fesztival-a-margitszigeten/',
107 'info_dict': {
108 'id': '6713233',
109 'title': 'Futball Fesztivál a Margitszigeten',
110 'display_id': 'futball-fesztival-a-margitszigeten',
111 'ext': 'mp4',
112 'upload_date': '20230909',
add96eb9 113 'thumbnail': 'https://petofilive.hu/wp-content/uploads/sites/4/2023/09/Clipboard11-2.jpg',
114 },
bccdbd22 115 }]
116
117 def _real_extract(self, url):
118 mobj = self._match_valid_url(url)
119 display_id = mobj.group('id')
120 webpage = self._download_webpage(url, display_id)
121
122 player_data_str = self._html_search_regex(
123 r'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage, 'player data')
add96eb9 124 player_data = self._parse_json(player_data_str, display_id, urllib.parse.unquote)
125 video_id = str(player_data['contentId'])
bccdbd22 126 title = player_data.get('title') or self._og_search_title(webpage, fatal=False) or \
127 self._html_search_regex(r'<h\d+\b[^>]+\bclass="article_title">([^<]+)<', webpage, 'title')
128
129 upload_date = unified_strdate(
add96eb9 130 '{}-{}-{}'.format(mobj.group('year'), mobj.group('month'), mobj.group('day')))
bccdbd22 131 if not upload_date:
132 upload_date = unified_strdate(self._html_search_regex(
133 r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
134
135 player_data['video'] = player_data.pop('token')
136 player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
6e07e4bc 137 player_json = self._search_json(
138 r'\bpl\.setup\s*\(', player_page, 'player json', video_id, end_pattern=r'\);')
139 playlist_url = traverse_obj(
140 player_json, ('playlist', lambda _, v: v['type'] == 'hls', 'file', {url_or_none}), get_all=False)
141 if not playlist_url:
142 raise ExtractorError('Unable to extract playlist url')
bccdbd22 143
144 formats = self._extract_wowza_formats(
145 playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
bccdbd22 146
147 return {
148 'id': video_id,
149 'title': title,
150 'display_id': display_id,
151 'formats': formats,
152 'upload_date': upload_date,
add96eb9 153 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage),
bccdbd22 154 }