]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/mediaklikk.py
[VLive] Add upload_date and thumbnail (#1486)
[yt-dlp.git] / yt_dlp / extractor / mediaklikk.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from ..utils import (
5 unified_strdate
6 )
7 from .common import InfoExtractor
8 from ..compat import (
9 compat_urllib_parse_unquote,
10 compat_str
11 )
12
13
14 class MediaKlikkIE(InfoExtractor):
15 _VALID_URL = r'''(?x)^https?:\/\/(?:www\.)?
16 (?:mediaklikk|m4sport|hirado|petofilive)\.hu\/.*?videok?\/
17 (?:(?P<year>[0-9]{4})/(?P<month>[0-9]{1,2})/(?P<day>[0-9]{1,2})/)?
18 (?P<id>[^/#?_]+)'''
19
20 _TESTS = [{
21 # mediaklikk. date in html.
22 'url': 'https://mediaklikk.hu/video/hazajaro-delnyugat-bacska-a-duna-menten-palankatol-doroszloig/',
23 'info_dict': {
24 'id': '4754129',
25 'title': 'Hazajáró, DÉLNYUGAT-BÁCSKA – A Duna mentén Palánkától Doroszlóig',
26 'ext': 'mp4',
27 'upload_date': '20210901',
28 'thumbnail': 'http://mediaklikk.hu/wp-content/uploads/sites/4/2014/02/hazajarouj_JO.jpg'
29 }
30 }, {
31 # m4sport
32 'url': 'https://m4sport.hu/video/2021/08/30/gyemant-liga-parizs/',
33 'info_dict': {
34 'id': '4754999',
35 'title': 'Gyémánt Liga, Párizs',
36 'ext': 'mp4',
37 'upload_date': '20210830',
38 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/08/vlcsnap-2021-08-30-18h21m20s10-1024x576.jpg'
39 }
40 }, {
41 # m4sport with *video/ url and no date
42 'url': 'https://m4sport.hu/bl-video/real-madrid-chelsea-1-1/',
43 'info_dict': {
44 'id': '4492099',
45 'title': 'Real Madrid - Chelsea 1-1',
46 'ext': 'mp4',
47 'thumbnail': 'http://m4sport.hu/wp-content/uploads/sites/4/2021/04/Sequence-01.Still001-1024x576.png'
48 }
49 }, {
50 # hirado
51 'url': 'https://hirado.hu/videok/felteteleket-szabott-a-fovaros/',
52 'info_dict': {
53 'id': '4760120',
54 'title': 'Feltételeket szabott a főváros',
55 'ext': 'mp4',
56 'thumbnail': 'http://hirado.hu/wp-content/uploads/sites/4/2021/09/vlcsnap-2021-09-01-20h20m37s165.jpg'
57 }
58 }, {
59 # petofilive
60 'url': 'https://petofilive.hu/video/2021/06/07/tha-shudras-az-akusztikban/',
61 'info_dict': {
62 'id': '4571948',
63 'title': 'Tha Shudras az Akusztikban',
64 'ext': 'mp4',
65 'upload_date': '20210607',
66 'thumbnail': 'http://petofilive.hu/wp-content/uploads/sites/4/2021/06/vlcsnap-2021-06-07-22h14m23s915-1024x576.jpg'
67 }
68 }]
69
70 def _real_extract(self, url):
71 mobj = self._match_valid_url(url)
72 display_id = mobj.group('id')
73 webpage = self._download_webpage(url, display_id)
74
75 player_data_str = self._html_search_regex(
76 r'mtva_player_manager\.player\(document.getElementById\(.*\),\s?(\{.*\}).*\);', webpage, 'player data')
77 player_data = self._parse_json(player_data_str, display_id, compat_urllib_parse_unquote)
78 video_id = compat_str(player_data['contentId'])
79 title = player_data.get('title') or self._og_search_title(webpage, fatal=False) or \
80 self._html_search_regex(r'<h\d+\b[^>]+\bclass="article_title">([^<]+)<', webpage, 'title')
81
82 upload_date = unified_strdate(
83 '%s-%s-%s' % (mobj.group('year'), mobj.group('month'), mobj.group('day')))
84 if not upload_date:
85 upload_date = unified_strdate(self._html_search_regex(
86 r'<p+\b[^>]+\bclass="article_date">([^<]+)<', webpage, 'upload date', default=None))
87
88 player_data['video'] = player_data.pop('token')
89 player_page = self._download_webpage('https://player.mediaklikk.hu/playernew/player.php', video_id, query=player_data)
90 playlist_url = self._proto_relative_url(compat_urllib_parse_unquote(
91 self._html_search_regex(r'\"file\":\s*\"(\\?/\\?/.*playlist\.m3u8)\"', player_page, 'playlist_url')).replace('\\/', '/'))
92
93 formats = self._extract_wowza_formats(
94 playlist_url, video_id, skip_protocols=['f4m', 'smil', 'dash'])
95 self._sort_formats(formats)
96
97 return {
98 'id': video_id,
99 'title': title,
100 'display_id': display_id,
101 'formats': formats,
102 'upload_date': upload_date,
103 'thumbnail': player_data.get('bgImage') or self._og_search_thumbnail(webpage)
104 }