youtube_dl/extractor/freevideo.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     ExtractorError,
   8 )
   9
  10
  11 class FreeVideoIE(InfoExtractor):
  12     _VALID_URL = r'^http://www.freevideo.cz/vase-videa/(?P<videoid>[^.]+)\.html$'
  13
  14     _TEST = {
  15         'url': 'http://www.freevideo.cz/vase-videa/vysukany-zadecek-22033.html',
  16         'file': 'vysukany-zadecek-22033.mp4',
  17         'info_dict': {
  18             "title": "vysukany-zadecek-22033",
  19             "age_limit": 18,
  20         }
  21     }
  22
  23     def _real_extract(self, url):
  24         mobj = re.match(self._VALID_URL, url)
  25         if mobj is None:
  26             raise ExtractorError('Invalid search query "%s"' % query)
  27
  28         video_id = mobj.group('videoid')
  29
  30         # Get webpage content
  31         webpage = self._download_webpage(url, video_id)
  32
  33         age_limit = self._rta_search(webpage)
  34         if age_limit == 0:
  35             # interpret 0 as mis-detection since this site is adult-content only.
  36             # However, if we get non-0, assume the rtalabel started giving proper
  37             # results
  38             age_limit = 18
  39
  40         url = re.search(r'\s+url: "(http://[a-z0-9-]+.cdn.freevideo.cz/stream/.*/video.mp4)"', webpage)
  41         if url is None:
  42             raise ExtractorError('ERROR: unable to extract video url')
  43
  44         return {
  45             'id': video_id,
  46             'url': url.groups()[0],
  47             'title': video_id,
  48             'age_limit': age_limit,
  49         }