youtube_dl/extractor/chilloutzone.py

   1 import re
   2 import base64
   3 import urllib
   4 import json
   5
   6 from .common import InfoExtractor
   7
   8 video_container = ('.mp4', '.mkv', '.flv')
   9
  10 class ChilloutzoneIE(InfoExtractor):
  11     _VALID_URL = r'(?:https?://)?(?:www\.)?chilloutzone\.net/video/(?P<id>[\w|-]+).html'
  12
  13     _TEST = {
  14         u'url': u'http://www.chilloutzone.net/video/enemene-meck-alle-katzen-weg.html',
  15         u'file': u'18088-enemene-meck-alle-katzen-weg.mp4',
  16         u'md5': u'a76f3457e813ea0037e5244f509e66d1',
  17         u'info_dict': {
  18                 u"id": u"18088",
  19                 u"ext": u"mp4",
  20                 u"title": u"Enemene Meck - Alle Katzen weg"
  21         }
  22         }
  23
  24     def _real_extract(self, url):
  25         mobj = re.match(self._VALID_URL, url)
  26         video_id = mobj.group('id')
  27
  28         webpage_url = 'http://www.chilloutzone.net/video/' + video_id + '.html'
  29
  30         # Log that we are starting to download the page
  31         self.report_download_webpage(webpage_url)
  32         webpage = self._download_webpage(webpage_url, video_id)
  33
  34
  35
  36         # Log that we are starting to parse the page
  37         self.report_extraction(video_id)
  38         # Find base64 decoded file info
  39         base64_video_info = self._html_search_regex(r'var cozVidData = "(.+?)";', webpage, u'video Data')
  40         # decode string and find video file
  41         decoded_video_info = base64.b64decode(base64_video_info)
  42         video_info_dict = json.loads(decoded_video_info)
  43         # get video information from dict
  44         media_url = video_info_dict['mediaUrl']
  45         description = video_info_dict['description']
  46         title = video_info_dict['title']
  47         native_platform = video_info_dict['nativePlatform']
  48         native_video_id = video_info_dict['nativeVideoId']
  49         source_priority = video_info_dict['sourcePriority']
  50
  51
  52         # Start video extraction
  53         video_url = ''
  54         # If nativePlatform is None a fallback mechanism is used (i.e. youtube embed)
  55         if native_platform == None:
  56                 # Look for other video urls
  57                 video_url = self._html_search_regex(r'<iframe.* src="(.+?)".*', webpage, u'fallback Video URL')
  58                 if 'youtube' in video_url:
  59                         self.to_screen(u'Youtube video detected:')
  60                         print video_url
  61                         return self.url_result(video_url, ie='Youtube')
  62
  63         # For debugging purposes
  64         #print video_info_dict
  65         #print native_platform
  66         #print native_video_id
  67         #print source_priority
  68         #print media_url
  69
  70         # Non Fallback: Decide to use native source (e.g. youtube or vimeo) or
  71         # the own CDN
  72         if source_priority == 'native':
  73             if native_platform == 'youtube':
  74                 self.to_screen(u'Youtube video detected:')
  75                 video_url = 'https://www.youtube.com/watch?v=' + native_video_id
  76                 print video_url
  77                 return self.url_result(video_url, ie='Youtube')
  78             if native_platform == 'vimeo':
  79                 self.to_screen(u'Vimeo video detected:')
  80                 video_url = 'http://vimeo.com/' + native_video_id
  81                 print video_url
  82                 return self.url_result(video_url, ie='Vimeo')
  83
  84         # No redirect, use coz media url
  85         video_url = media_url
  86         if video_url.endswith('.mp4') == False:
  87                         self.report_warning(u'Url does not contain a video container')
  88                         return []
  89
  90
  91         return [{
  92                 'id':        video_id,
  93                 'url':       video_url,
  94                 'ext':       'mp4',
  95                 'title':     title,
  96                 'description': description
  97                 }]
  98
  99
 100