]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/sport5.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / sport5.py
1 from .common import InfoExtractor
2 from ..utils import ExtractorError
3
4
5 class Sport5IE(InfoExtractor):
6 _VALID_URL = r'https?://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
7 _TESTS = [
8 {
9 'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1',
10 'info_dict': {
11 'id': 's5-Y59xx1-GUh2',
12 'ext': 'mp4',
13 'title': 'ולנסיה-קורדובה 0:3',
14 'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה',
15 'duration': 228,
16 'categories': list,
17 },
18 'skip': 'Blocked outside of Israel',
19 }, {
20 'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE',
21 'info_dict': {
22 'id': 's5-SiXxx1-hKh2',
23 'ext': 'mp4',
24 'title': 'GOALS_CELTIC_270914.mp4',
25 'description': '',
26 'duration': 87,
27 'categories': list,
28 },
29 'skip': 'Blocked outside of Israel',
30 },
31 ]
32
33 def _real_extract(self, url):
34 mobj = self._match_valid_url(url)
35 media_id = mobj.group('id')
36
37 webpage = self._download_webpage(url, media_id)
38
39 video_id = self._html_search_regex(r'clipId=([\w-]+)', webpage, 'video id')
40
41 metadata = self._download_xml(
42 f'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/{video_id}/HDS/metadata.xml',
43 video_id)
44
45 error = metadata.find('./Error')
46 if error is not None:
47 raise ExtractorError(
48 '{} returned error: {} - {}'.format(
49 self.IE_NAME,
50 error.find('./Name').text,
51 error.find('./Description').text),
52 expected=True)
53
54 title = metadata.find('./Title').text
55 description = metadata.find('./Description').text
56 duration = int(metadata.find('./Duration').text)
57
58 posters_el = metadata.find('./PosterLinks')
59 thumbnails = [{
60 'url': thumbnail.text,
61 'width': int(thumbnail.get('width')),
62 'height': int(thumbnail.get('height')),
63 } for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else []
64
65 categories_el = metadata.find('./Categories')
66 categories = [
67 cat.get('name') for cat in categories_el.findall('./Category')
68 ] if categories_el is not None else []
69
70 formats = [{
71 'url': fmt.text,
72 'ext': 'mp4',
73 'vbr': int(fmt.get('bitrate')),
74 'width': int(fmt.get('width')),
75 'height': int(fmt.get('height')),
76 } for fmt in metadata.findall('./PlaybackLinks/FileURL')]
77
78 return {
79 'id': video_id,
80 'title': title,
81 'description': description,
82 'thumbnails': thumbnails,
83 'duration': duration,
84 'categories': categories,
85 'formats': formats,
86 }