]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/thisvid.py
Completely change project name to yt-dlp (#85)
[yt-dlp.git] / yt_dlp / extractor / thisvid.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3 import re
4
5 from .common import InfoExtractor
6
7
8 class ThisVidIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:www\.)?thisvid\.com/(?P<type>videos|embed)/(?P<id>[A-Za-z0-9-]+/?)'
10 _TESTS = [{
11 'url': 'https://thisvid.com/videos/french-boy-pantsed/',
12 'md5': '3397979512c682f6b85b3b04989df224',
13 'info_dict': {
14 'id': '2400174',
15 'ext': 'mp4',
16 'title': 'French Boy Pantsed',
17 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
18 'age_limit': 18,
19 }
20 }, {
21 'url': 'https://thisvid.com/embed/2400174/',
22 'md5': '3397979512c682f6b85b3b04989df224',
23 'info_dict': {
24 'id': '2400174',
25 'ext': 'mp4',
26 'title': 'French Boy Pantsed',
27 'thumbnail': 'https://media.thisvid.com/contents/videos_screenshots/2400000/2400174/preview.mp4.jpg',
28 'age_limit': 18,
29 }
30 }]
31
32 def _real_extract(self, url):
33 main_id = self._match_id(url)
34 webpage = self._download_webpage(url, main_id)
35
36 # URL decryptor was reversed from version 4.0.4, later verified working with 5.2.0 and may change in the future.
37 kvs_version = self._html_search_regex(r'<script [^>]+?src="https://thisvid\.com/player/kt_player\.js\?v=(\d+(\.\d+)+)">', webpage, 'kvs_version', fatal=False)
38 if not kvs_version.startswith("5."):
39 self.report_warning("Major version change (" + kvs_version + ") in player engine--Download may fail.")
40
41 title = self._html_search_regex(r'<title>(?:Video: )?(.+?)(?: - (?:\w+ porn at )?ThisVid(?:.com| tube))?</title>', webpage, 'title')
42 # video_id, video_url and license_code from the 'flashvars' JSON object:
43 video_id = self._html_search_regex(r"video_id: '([0-9]+)',", webpage, 'video_id')
44 video_url = self._html_search_regex(r"video_url: '(function/0/.+?)',", webpage, 'video_url')
45 license_code = self._html_search_regex(r"license_code: '([0-9$]{16})',", webpage, 'license_code')
46 thumbnail = self._html_search_regex(r"preview_url: '((?:https?:)?//media.thisvid.com/.+?.jpg)',", webpage, 'thumbnail', fatal=False)
47 if thumbnail.startswith("//"):
48 thumbnail = "https:" + thumbnail
49 if (re.match(self._VALID_URL, url).group('type') == "videos"):
50 display_id = main_id
51 else:
52 display_id = self._search_regex(r'<link rel="canonical" href="' + self._VALID_URL + r'">', webpage, 'display_id', fatal=False),
53
54 return {
55 'id': video_id,
56 'display_id': display_id,
57 'title': title,
58 'url': getrealurl(video_url, license_code),
59 'thumbnail': thumbnail,
60 'age_limit': 18,
61 }
62
63
64 def getrealurl(video_url, license_code):
65 urlparts = video_url.split('/')[2:]
66 license = getlicensetoken(license_code)
67 newmagic = urlparts[5][:32]
68
69 for o in range(len(newmagic) - 1, -1, -1):
70 new = ""
71 l = (o + sum([int(n) for n in license[o:]])) % 32
72
73 for i in range(0, len(newmagic)):
74 if i == o:
75 new += newmagic[l]
76 elif i == l:
77 new += newmagic[o]
78 else:
79 new += newmagic[i]
80 newmagic = new
81
82 urlparts[5] = newmagic + urlparts[5][32:]
83 return "/".join(urlparts)
84
85
86 def getlicensetoken(license):
87 modlicense = license.replace("$", "").replace("0", "1")
88 center = int(len(modlicense) / 2)
89 fronthalf = int(modlicense[:center + 1])
90 backhalf = int(modlicense[center:])
91
92 modlicense = str(4 * abs(fronthalf - backhalf))
93 retval = ""
94 for o in range(0, center + 1):
95 for i in range(1, 5):
96 retval += str((int(license[o + i]) + int(modlicense[o])) % 10)
97 return retval