yt_dlp/extractor/sport5.py

   1 from .common import InfoExtractor
   2 from ..utils import ExtractorError
   3
   4
   5 class Sport5IE(InfoExtractor):
   6     _VALID_URL = r'https?://(?:www|vod)?\.sport5\.co\.il/.*\b(?:Vi|docID)=(?P<id>\d+)'
   7     _TESTS = [
   8         {
   9             'url': 'http://vod.sport5.co.il/?Vc=147&Vi=176331&Page=1',
  10             'info_dict': {
  11                 'id': 's5-Y59xx1-GUh2',
  12                 'ext': 'mp4',
  13                 'title': 'ולנסיה-קורדובה 0:3',
  14                 'description': 'אלקאסר, גאייה ופגולי סידרו לקבוצה של נונו ניצחון על קורדובה ואת המקום הראשון בליגה',
  15                 'duration': 228,
  16                 'categories': list,
  17             },
  18             'skip': 'Blocked outside of Israel',
  19         }, {
  20             'url': 'http://www.sport5.co.il/articles.aspx?FolderID=3075&docID=176372&lang=HE',
  21             'info_dict': {
  22                 'id': 's5-SiXxx1-hKh2',
  23                 'ext': 'mp4',
  24                 'title': 'GOALS_CELTIC_270914.mp4',
  25                 'description': '',
  26                 'duration': 87,
  27                 'categories': list,
  28             },
  29             'skip': 'Blocked outside of Israel',
  30         },
  31     ]
  32
  33     def _real_extract(self, url):
  34         mobj = self._match_valid_url(url)
  35         media_id = mobj.group('id')
  36
  37         webpage = self._download_webpage(url, media_id)
  38
  39         video_id = self._html_search_regex(r'clipId=([\w-]+)', webpage, 'video id')
  40
  41         metadata = self._download_xml(
  42             f'http://sport5-metadata-rr-d.nsacdn.com/vod/vod/{video_id}/HDS/metadata.xml',
  43             video_id)
  44
  45         error = metadata.find('./Error')
  46         if error is not None:
  47             raise ExtractorError(
  48                 '{} returned error: {} - {}'.format(
  49                     self.IE_NAME,
  50                     error.find('./Name').text,
  51                     error.find('./Description').text),
  52                 expected=True)
  53
  54         title = metadata.find('./Title').text
  55         description = metadata.find('./Description').text
  56         duration = int(metadata.find('./Duration').text)
  57
  58         posters_el = metadata.find('./PosterLinks')
  59         thumbnails = [{
  60             'url': thumbnail.text,
  61             'width': int(thumbnail.get('width')),
  62             'height': int(thumbnail.get('height')),
  63         } for thumbnail in posters_el.findall('./PosterIMG')] if posters_el is not None else []
  64
  65         categories_el = metadata.find('./Categories')
  66         categories = [
  67             cat.get('name') for cat in categories_el.findall('./Category')
  68         ] if categories_el is not None else []
  69
  70         formats = [{
  71             'url': fmt.text,
  72             'ext': 'mp4',
  73             'vbr': int(fmt.get('bitrate')),
  74             'width': int(fmt.get('width')),
  75             'height': int(fmt.get('height')),
  76         } for fmt in metadata.findall('./PlaybackLinks/FileURL')]
  77
  78         return {
  79             'id': video_id,
  80             'title': title,
  81             'description': description,
  82             'thumbnails': thumbnails,
  83             'duration': duration,
  84             'categories': categories,
  85             'formats': formats,
  86         }