]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/popcorntimes.py
[extractor] Common function `_match_valid_url`
[yt-dlp.git] / yt_dlp / extractor / popcorntimes.py
CommitLineData
7d55b62f
S
1# coding: utf-8
2from __future__ import unicode_literals
3
7d55b62f
S
4
5from .common import InfoExtractor
6from ..compat import (
7 compat_b64decode,
8 compat_chr,
9)
10from ..utils import int_or_none
11
12
13class PopcorntimesIE(InfoExtractor):
14 _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
15 _TEST = {
16 'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
17 'md5': '93f210991ad94ba8c3485950a2453257',
18 'info_dict': {
19 'id': 'A1XCFvz',
20 'display_id': 'haensel-und-gretel-opera-fantasy',
21 'ext': 'mp4',
22 'title': 'Hänsel und Gretel',
23 'description': 'md5:1b8146791726342e7b22ce8125cf6945',
24 'thumbnail': r're:^https?://.*\.jpg$',
25 'creator': 'John Paul',
26 'release_date': '19541009',
27 'duration': 4260,
28 'tbr': 5380,
29 'width': 720,
30 'height': 540,
31 },
32 }
33
34 def _real_extract(self, url):
5ad28e7f 35 mobj = self._match_valid_url(url)
7d55b62f
S
36 video_id, display_id = mobj.group('id', 'display_id')
37
38 webpage = self._download_webpage(url, display_id)
39
40 title = self._search_regex(
41 r'<h1>([^<]+)', webpage, 'title',
42 default=None) or self._html_search_meta(
43 'ya:ovs:original_name', webpage, 'title', fatal=True)
44
45 loc = self._search_regex(
46 r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
47 group='value')
48
49 loc_b64 = ''
50 for c in loc:
51 c_ord = ord(c)
52 if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
53 upper = ord('Z') if c_ord <= ord('Z') else ord('z')
54 c_ord += 13
55 if upper < c_ord:
56 c_ord -= 26
57 loc_b64 += compat_chr(c_ord)
58
59 video_url = compat_b64decode(loc_b64).decode('utf-8')
60
61 description = self._html_search_regex(
62 r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
63 'description', fatal=False)
64
65 thumbnail = self._search_regex(
66 r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
67 webpage, 'thumbnail', default=None,
68 group='value') or self._og_search_thumbnail(webpage)
69
70 creator = self._html_search_meta(
71 'video:director', webpage, 'creator', default=None)
72
73 release_date = self._html_search_meta(
74 'video:release_date', webpage, default=None)
75 if release_date:
76 release_date = release_date.replace('-', '')
77
78 def int_meta(name):
79 return int_or_none(self._html_search_meta(
80 name, webpage, default=None))
81
82 return {
83 'id': video_id,
84 'display_id': display_id,
85 'url': video_url,
86 'title': title,
87 'description': description,
88 'thumbnail': thumbnail,
89 'creator': creator,
90 'release_date': release_date,
91 'duration': int_meta('video:duration'),
92 'tbr': int_meta('ya:ovs:bitrate'),
93 'width': int_meta('og:video:width'),
94 'height': int_meta('og:video:height'),
95 'http_headers': {
96 'Referer': url,
97 },
98 }