]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/pladform.py
[extractor/FranceCulture] Fix extractor (#3874)
[yt-dlp.git] / yt_dlp / extractor / pladform.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5 determine_ext,
6 ExtractorError,
7 int_or_none,
8 parse_qs,
9 xpath_text,
10 qualities,
11 )
12
13
14 class PladformIE(InfoExtractor):
15 _VALID_URL = r'''(?x)
16 https?://
17 (?:
18 (?:
19 out\.pladform\.ru/player|
20 static\.pladform\.ru/player\.swf
21 )
22 \?.*\bvideoid=|
23 video\.pladform\.ru/catalog/video/videoid/
24 )
25 (?P<id>\d+)
26 '''
27 _TESTS = [{
28 'url': 'http://out.pladform.ru/player?pl=18079&type=html5&videoid=100231282',
29 'info_dict': {
30 'id': '6216d548e755edae6e8280667d774791',
31 'ext': 'mp4',
32 'timestamp': 1406117012,
33 'title': 'Гарик Мартиросян и Гарик Харламов - Кастинг на концерт ко Дню милиции',
34 'age_limit': 0,
35 'upload_date': '20140723',
36 'thumbnail': str,
37 'view_count': int,
38 'description': str,
39 'category': list,
40 'uploader_id': '12082',
41 'uploader': 'Comedy Club',
42 'duration': 367,
43 },
44 'expected_warnings': ['HTTP Error 404: Not Found']
45 }, {
46 'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0',
47 'md5': '53362fac3a27352da20fa2803cc5cd6f',
48 'info_dict': {
49 'id': '3777899',
50 'ext': 'mp4',
51 'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко',
52 'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95',
53 'thumbnail': r're:^https?://.*\.jpg$',
54 'duration': 3190,
55 },
56 }, {
57 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
58 'only_matching': True,
59 }, {
60 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
61 'only_matching': True,
62 }]
63
64 @staticmethod
65 def _extract_url(webpage):
66 mobj = re.search(
67 r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1', webpage)
68 if mobj:
69 return mobj.group('url')
70
71 def _real_extract(self, url):
72 video_id = self._match_id(url)
73
74 qs = parse_qs(url)
75 pl = qs.get('pl', ['1'])[0]
76
77 video = self._download_xml(
78 'http://out.pladform.ru/getVideo', video_id, query={
79 'pl': pl,
80 'videoid': video_id,
81 }, fatal=False)
82
83 def fail(text):
84 raise ExtractorError(
85 '%s returned error: %s' % (self.IE_NAME, text),
86 expected=True)
87
88 if not video:
89 targetUrl = self._request_webpage(url, video_id, note='Resolving final URL').geturl()
90 if targetUrl == url:
91 raise ExtractorError('Can\'t parse page')
92 return self.url_result(targetUrl)
93
94 if video.tag == 'error':
95 fail(video.text)
96
97 quality = qualities(('ld', 'sd', 'hd'))
98
99 formats = []
100 for src in video.findall('./src'):
101 if src is None:
102 continue
103 format_url = src.text
104 if not format_url:
105 continue
106 if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8':
107 formats.extend(self._extract_m3u8_formats(
108 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
109 m3u8_id='hls', fatal=False))
110 else:
111 formats.append({
112 'url': src.text,
113 'format_id': src.get('quality'),
114 'quality': quality(src.get('quality')),
115 })
116
117 if not formats:
118 error = xpath_text(video, './cap', 'error', default=None)
119 if error:
120 fail(error)
121
122 self._sort_formats(formats)
123
124 webpage = self._download_webpage(
125 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
126 video_id)
127
128 title = self._og_search_title(webpage, fatal=False) or xpath_text(
129 video, './/title', 'title', fatal=True)
130 description = self._search_regex(
131 r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
132 thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
133 video, './/cover', 'cover')
134
135 duration = int_or_none(xpath_text(video, './/time', 'duration'))
136 age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
137
138 return {
139 'id': video_id,
140 'title': title,
141 'description': description,
142 'thumbnail': thumbnail,
143 'duration': duration,
144 'age_limit': age_limit,
145 'formats': formats,
146 }