yt_dlp/extractor/thisvid.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3 import re
   4
   5 from .common import InfoExtractor
   6
   7
   8 class ThisVidIE(InfoExtractor):
   9     _VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+/?)'
  10     _TESTS = [{
  11         'url': 'https://thisvid.com/videos/french-boy-pantsed/',
  12         'md5': '3397979512c682f6b85b3b04989df224',
  13         'info_dict': {
  14             'id': '2400174',
  15             'ext': 'mp4',
  16             'title': 'French Boy Pantsed',
  17             'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
  18             'age_limit': 18,
  19         }
  20     }, {
  21         'url': 'https://thisvid.com/embed/2400174/',
  22         'md5': '3397979512c682f6b85b3b04989df224',
  23         'info_dict': {
  24             'id': '2400174',
  25             'ext': 'mp4',
  26             'title': 'French Boy Pantsed',
  27             'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
  28             'age_limit': 18,
  29         }
  30     }]
  31
  32     def _real_extract(self, url):
  33         main_id = self._match_id(url)
  34         webpage = self._download_webpage(url, main_id)
  35
  36         # URL decryptor was reversed from version 4.0.4, later verified working with 5.2.0 and may change in the future.
  37         kvs_version = self._html_search_regex(r'<script [^>]+?src="https://thisvid\.com/player/kt_player\.js\?v=(\d+(\.\d+)+)">', webpage, 'kvs_version', fatal=False)
  38         if not kvs_version.startswith("5."):
  39             self.report_warning("Major version change (" + kvs_version + ") in player engine--Download may fail.")
  40
  41         title = self._html_search_regex(r'<title>(?:Video: )?(.+?)(?: - (?:\w+ porn at )?ThisVid(?:.com| tube))?</title>', webpage, 'title')
  42         # video_id, video_url and license_code from the 'flashvars' JSON object:
  43         video_id = self._html_search_regex(r"video_id: '([0-9]+)',", webpage, 'video_id')
  44         video_url = self._html_search_regex(r"video_url: '(function/0/.+?)',", webpage, 'video_url')
  45         license_code = self._html_search_regex(r"license_code: '([0-9$]{16})',", webpage, 'license_code')
  46         thumbnail = self._html_search_regex(r"preview_url: '((?:https?:)?//media.thisvid.com/.+?.jpg)',", webpage, 'thumbnail', fatal=False)
  47         if thumbnail.startswith("//"):
  48             thumbnail = "https:" + thumbnail
  49         if (re.match(self._VALID_URL, url).group('type') == "videos"):
  50             display_id = main_id
  51         else:
  52             display_id = self._search_regex(r'<link rel="canonical" href="' + self._VALID_URL + r'">', webpage, 'display_id', fatal=False),
  53
  54         return {
  55             'id': video_id,
  56             'display_id': display_id,
  57             'title': title,
  58             'url': getrealurl(video_url, license_code),
  59             'thumbnail': thumbnail,
  60             'age_limit': 18,
  61         }
  62
  63
  64 def getrealurl(video_url, license_code):
  65     urlparts = video_url.split('/')[2:]
  66     license = getlicensetoken(license_code)
  67     newmagic = urlparts[5][:32]
  68
  69     for o in range(len(newmagic) - 1, -1, -1):
  70         new = ""
  71         l = (o + sum([int(n) for n in license[o:]])) % 32
  72
  73         for i in range(0, len(newmagic)):
  74             if i == o:
  75                 new += newmagic[l]
  76             elif i == l:
  77                 new += newmagic[o]
  78             else:
  79                 new += newmagic[i]
  80         newmagic = new
  81
  82     urlparts[5] = newmagic + urlparts[5][32:]
  83     return "/".join(urlparts)
  84
  85
  86 def getlicensetoken(license):
  87     modlicense = license.replace("$", "").replace("0", "1")
  88     center = int(len(modlicense) / 2)
  89     fronthalf = int(modlicense[:center + 1])
  90     backhalf = int(modlicense[center:])
  91
  92     modlicense = str(4 * abs(fronthalf - backhalf))
  93     retval = ""
  94     for o in range(0, center + 1):
  95         for i in range(1, 5):
  96             retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
  97     return retval