yt_dlp/extractor/walla.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     int_or_none,
   6     xpath_text,
   7 )
   8
   9
  10 class WallaIE(InfoExtractor):
  11     _VALID_URL = r'https?://vod\.walla\.co\.il/[^/]+/(?P<id>\d+)/(?P<display_id>.+)'
  12     _TEST = {
  13         'url': 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one',
  14         'info_dict': {
  15             'id': '2642630',
  16             'display_id': 'one-direction-all-for-one',
  17             'ext': 'flv',
  18             'title': 'וואן דיירקשן: ההיסטריה',
  19             'description': 'md5:de9e2512a92442574cdb0913c49bc4d8',
  20             'thumbnail': r're:^https?://.*\.jpg',
  21             'duration': 3600,
  22         },
  23         'params': {
  24             # rtmp download
  25             'skip_download': True,
  26         },
  27     }
  28
  29     _SUBTITLE_LANGS = {
  30         'עברית': 'heb',
  31     }
  32
  33     def _real_extract(self, url):
  34         mobj = self._match_valid_url(url)
  35         video_id = mobj.group('id')
  36         display_id = mobj.group('display_id')
  37
  38         video = self._download_xml(
  39             f'http://video2.walla.co.il/?w=null/null/{video_id}/@@/video/flv_pl',
  40             display_id)
  41
  42         item = video.find('./items/item')
  43
  44         title = xpath_text(item, './title', 'title')
  45         description = xpath_text(item, './synopsis', 'description')
  46         thumbnail = xpath_text(item, './preview_pic', 'thumbnail')
  47         duration = int_or_none(xpath_text(item, './duration', 'duration'))
  48
  49         subtitles = {}
  50         for subtitle in item.findall('./subtitles/subtitle'):
  51             lang = xpath_text(subtitle, './title')
  52             subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
  53                 'ext': 'srt',
  54                 'url': xpath_text(subtitle, './src'),
  55             }]
  56
  57         formats = []
  58         for quality in item.findall('./qualities/quality'):
  59             format_id = xpath_text(quality, './title')
  60             fmt = {
  61                 'url': 'rtmp://wafla.walla.co.il/vod',
  62                 'play_path': xpath_text(quality, './src'),
  63                 'player_url': 'http://isc.walla.co.il/w9/swf/video_swf/vod/WallaMediaPlayerAvod.swf',
  64                 'page_url': url,
  65                 'ext': 'flv',
  66                 'format_id': xpath_text(quality, './title'),
  67             }
  68             m = re.search(r'^(?P<height>\d+)[Pp]', format_id)
  69             if m:
  70                 fmt['height'] = int(m.group('height'))
  71             formats.append(fmt)
  72
  73         return {
  74             'id': video_id,
  75             'display_id': display_id,
  76             'title': title,
  77             'description': description,
  78             'thumbnail': thumbnail,
  79             'duration': duration,
  80             'formats': formats,
  81             'subtitles': subtitles,
  82         }