youtube_dl/extractor/screenwavemedia.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     int_or_none,
   7     unified_strdate,
   8     js_to_json,
   9 )
  10
  11
  12 class ScreenwaveMediaIE(InfoExtractor):
  13     _VALID_URL = r'http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=(?P<id>.+)'
  14
  15     _TESTS = [{
  16         'url': 'http://player.screenwavemedia.com/play/play.php?playerdiv=videoarea&companiondiv=squareAd&id=Cinemassacre-19911',
  17         'only_matching': True,
  18     }]
  19
  20     def _real_extract(self, url):
  21         video_id = self._match_id(url)
  22
  23         playerdata = self._download_webpage(
  24             'http://player.screenwavemedia.com/player.php?id=%s' % video_id,
  25             video_id, 'Downloading player webpage')
  26
  27         vidtitle = self._search_regex(
  28             r'\'vidtitle\'\s*:\s*"([^"]+)"', playerdata, 'vidtitle').replace('\\/', '/')
  29
  30         playerconfig = self._download_webpage(
  31             'http://player.screenwavemedia.com/player.js',
  32             video_id, 'Downloading playerconfig webpage')
  33
  34         videoserver = self._search_regex(r"\[ipaddress\]\s*=>\s*([\d\.]+)", playerdata, 'videoserver')
  35
  36         sources = self._parse_json(
  37             js_to_json(
  38                 self._search_regex(
  39                     r"sources\s*:\s*(\[[^\]]+?\])", playerconfig,
  40                     'sources',
  41                 ).replace(
  42                     "' + thisObj.options.videoserver + '",
  43                     videoserver
  44                 ).replace(
  45                     "' + playerVidId + '",
  46                     video_id
  47                 )
  48             ),
  49             video_id
  50         )
  51
  52         formats = []
  53         for source in sources:
  54             if source['type'] == 'hls':
  55                 formats.extend(self._extract_m3u8_formats(source['file'], video_id))
  56             else:
  57                 format_label = source.get('label')
  58                 height = int_or_none(self._search_regex(
  59                     r'^(\d+)[pP]', format_label, 'height', default=None))
  60                 formats.append({
  61                     'url': source['file'],
  62                     'format': format_label,
  63                     'ext': source.get('type'),
  64                     'height': height,
  65                 })
  66         self._sort_formats(formats)
  67
  68         return {
  69             'id': video_id,
  70             'title': vidtitle,
  71             'formats': formats,
  72         }
  73
  74
  75 class TeamFourIE(InfoExtractor):
  76     _VALID_URL = r'https?://(?:www\.)?teamfourstar\.com/video/(?P<id>[a-z0-9\-]+)/?'
  77     _TEST = {
  78         'url': 'http://teamfourstar.com/video/a-moment-with-tfs-episode-4/',
  79         'info_dict': {
  80             'id': 'TeamFourStar-5292a02f20bfa',
  81             'ext': 'mp4',
  82             'upload_date': '20130401',
  83             'description': 'Check out this and more on our website: http://teamfourstar.com\nTFS Store: http://sharkrobot.com/team-four-star\nFollow on Twitter: http://twitter.com/teamfourstar\nLike on FB: http://facebook.com/teamfourstar',
  84             'title': 'A Moment With TFS Episode 4',
  85         }
  86     }
  87
  88     def _real_extract(self, url):
  89         display_id = self._match_id(url)
  90         webpage = self._download_webpage(url, display_id)
  91
  92         playerdata_url = self._search_regex(
  93             r'src="(http://player\d?\.screenwavemedia\.com/(?:play/)?[a-zA-Z]+\.php\?[^"]*\bid=.+?)"',
  94             webpage, 'player data URL')
  95
  96         video_title = self._html_search_regex(
  97             r'<div class="heroheadingtitle">(?P<title>.+?)</div>',
  98             webpage, 'title')
  99         video_date = unified_strdate(self._html_search_regex(
 100             r'<div class="heroheadingdate">(?P<date>.+?)</div>',
 101             webpage, 'date', fatal=False))
 102         video_description = self._html_search_regex(
 103             r'(?s)<div class="postcontent">(?P<description>.+?)</div>',
 104             webpage, 'description', fatal=False)
 105         video_thumbnail = self._og_search_thumbnail(webpage)
 106
 107         return {
 108             '_type': 'url_transparent',
 109             'display_id': display_id,
 110             'title': video_title,
 111             'description': video_description,
 112             'upload_date': video_date,
 113             'thumbnail': video_thumbnail,
 114             'url': playerdata_url,
 115         }