yt_dlp/extractor/springboardplatform.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     ExtractorError,
   6     int_or_none,
   7     xpath_attr,
   8     xpath_text,
   9     xpath_element,
  10     unescapeHTML,
  11     unified_timestamp,
  12 )
  13
  14
  15 class SpringboardPlatformIE(InfoExtractor):
  16     _VALID_URL = r'''(?x)
  17                     https?://
  18                         cms\.springboardplatform\.com/
  19                         (?:
  20                             (?:previews|embed_iframe)/(?P<index>\d+)/video/(?P<id>\d+)|
  21                             xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+)
  22                         )
  23                     '''
  24     _TESTS = [{
  25         'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1',
  26         'md5': '5c3cb7b5c55740d482561099e920f192',
  27         'info_dict': {
  28             'id': '981017',
  29             'ext': 'mp4',
  30             'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
  31             'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
  32             'thumbnail': r're:^https?://.*\.jpg$',
  33             'timestamp': 1409132328,
  34             'upload_date': '20140827',
  35             'duration': 193,
  36         },
  37     }, {
  38         'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1',
  39         'only_matching': True,
  40     }, {
  41         'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10',
  42         'only_matching': True,
  43     }, {
  44         'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/',
  45         'only_matching': True,
  46     }]
  47
  48     @staticmethod
  49     def _extract_urls(webpage):
  50         return [
  51             mobj.group('url')
  52             for mobj in re.finditer(
  53                 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1',
  54                 webpage)]
  55
  56     def _real_extract(self, url):
  57         mobj = self._match_valid_url(url)
  58         video_id = mobj.group('id') or mobj.group('id_2')
  59         index = mobj.group('index') or mobj.group('index_2')
  60
  61         video = self._download_xml(
  62             'http://cms.springboardplatform.com/xml_feeds_advanced/index/%s/rss3/%s'
  63             % (index, video_id), video_id)
  64
  65         item = xpath_element(video, './/item', 'item', fatal=True)
  66
  67         content = xpath_element(
  68             item, './{http://search.yahoo.com/mrss/}content', 'content',
  69             fatal=True)
  70         title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True))
  71
  72         video_url = content.attrib['url']
  73
  74         if 'error_video.mp4' in video_url:
  75             raise ExtractorError(
  76                 'Video %s no longer exists' % video_id, expected=True)
  77
  78         duration = int_or_none(content.get('duration'))
  79         tbr = int_or_none(content.get('bitrate'))
  80         filesize = int_or_none(content.get('fileSize'))
  81         width = int_or_none(content.get('width'))
  82         height = int_or_none(content.get('height'))
  83
  84         description = unescapeHTML(xpath_text(
  85             item, './description', 'description'))
  86         thumbnail = xpath_attr(
  87             item, './{http://search.yahoo.com/mrss/}thumbnail', 'url',
  88             'thumbnail')
  89
  90         timestamp = unified_timestamp(xpath_text(
  91             item, './{http://cms.springboardplatform.com/namespaces.html}created',
  92             'timestamp'))
  93
  94         formats = [{
  95             'url': video_url,
  96             'format_id': 'http',
  97             'tbr': tbr,
  98             'filesize': filesize,
  99             'width': width,
 100             'height': height,
 101         }]
 102
 103         m3u8_format = formats[0].copy()
 104         m3u8_format.update({
 105             'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8',
 106             'ext': 'mp4',
 107             'format_id': 'hls',
 108             'protocol': 'm3u8_native',
 109         })
 110         formats.append(m3u8_format)
 111
 112         self._sort_formats(formats)
 113
 114         return {
 115             'id': video_id,
 116             'title': title,
 117             'description': description,
 118             'thumbnail': thumbnail,
 119             'timestamp': timestamp,
 120             'duration': duration,
 121             'formats': formats,
 122         }