[yt-dlp.git] / yt_dlp / extractor / popcorntimes.py

# coding: utf-8
from __future__ import unicode_literals


from .common import InfoExtractor
from ..compat import (
    compat_b64decode,
    compat_chr,
)
from ..utils import int_or_none


class PopcorntimesIE(InfoExtractor):
    _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
    _TEST = {
        'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
        'md5': '93f210991ad94ba8c3485950a2453257',
        'info_dict': {
            'id': 'A1XCFvz',
            'display_id': 'haensel-und-gretel-opera-fantasy',
            'ext': 'mp4',
            'title': 'Hänsel und Gretel',
            'description': 'md5:1b8146791726342e7b22ce8125cf6945',
            'thumbnail': r're:^https?://.*\.jpg$',
            'creator': 'John Paul',
            'release_date': '19541009',
            'duration': 4260,
            'tbr': 5380,
            'width': 720,
            'height': 540,
        },
    }

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        video_id, display_id = mobj.group('id', 'display_id')

        webpage = self._download_webpage(url, display_id)

        title = self._search_regex(
            r'<h1>([^<]+)', webpage, 'title',
            default=None) or self._html_search_meta(
            'ya:ovs:original_name', webpage, 'title', fatal=True)

        loc = self._search_regex(
            r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
            group='value')

        loc_b64 = ''
        for c in loc:
            c_ord = ord(c)
            if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
                upper = ord('Z') if c_ord <= ord('Z') else ord('z')
                c_ord += 13
                if upper < c_ord:
                    c_ord -= 26
            loc_b64 += compat_chr(c_ord)

        video_url = compat_b64decode(loc_b64).decode('utf-8')

        description = self._html_search_regex(
            r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
            'description', fatal=False)

        thumbnail = self._search_regex(
            r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
            webpage, 'thumbnail', default=None,
            group='value') or self._og_search_thumbnail(webpage)

        creator = self._html_search_meta(
            'video:director', webpage, 'creator', default=None)

        release_date = self._html_search_meta(
            'video:release_date', webpage, default=None)
        if release_date:
            release_date = release_date.replace('-', '')

        def int_meta(name):
            return int_or_none(self._html_search_meta(
                name, webpage, default=None))

        return {
            'id': video_id,
            'display_id': display_id,
            'url': video_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'creator': creator,
            'release_date': release_date,
            'duration': int_meta('video:duration'),
            'tbr': int_meta('ya:ovs:bitrate'),
            'width': int_meta('og:video:width'),
            'height': int_meta('og:video:height'),
            'http_headers': {
                'Referer': url,
            },
        }
Commit	Line	Data
7d55b62f S	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
7d55b62f S	4
	5	from .common import InfoExtractor
	6	from ..compat import (
	7	compat_b64decode,
	8	compat_chr,
	9	)
	10	from ..utils import int_or_none
	11
	12
	13	class PopcorntimesIE(InfoExtractor):
	14	_VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
	15	_TEST = {
	16	'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
	17	'md5': '93f210991ad94ba8c3485950a2453257',
	18	'info_dict': {
	19	'id': 'A1XCFvz',
	20	'display_id': 'haensel-und-gretel-opera-fantasy',
	21	'ext': 'mp4',
	22	'title': 'Hänsel und Gretel',
	23	'description': 'md5:1b8146791726342e7b22ce8125cf6945',
	24	'thumbnail': r're:^https?://.*\.jpg$',
	25	'creator': 'John Paul',
	26	'release_date': '19541009',
	27	'duration': 4260,
	28	'tbr': 5380,
	29	'width': 720,
	30	'height': 540,
	31	},
	32	}
	33
	34	def _real_extract(self, url):
5ad28e7f	35	mobj = self._match_valid_url(url)
7d55b62f S	36	video_id, display_id = mobj.group('id', 'display_id')
	37
	38	webpage = self._download_webpage(url, display_id)
	39
	40	title = self._search_regex(
	41	r'<h1>([^<]+)', webpage, 'title',
	42	default=None) or self._html_search_meta(
	43	'ya:ovs:original_name', webpage, 'title', fatal=True)
	44
	45	loc = self._search_regex(
	46	r'PCTMLOC\s=\s(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
	47	group='value')
	48
	49	loc_b64 = ''
	50	for c in loc:
	51	c_ord = ord(c)
	52	if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
	53	upper = ord('Z') if c_ord <= ord('Z') else ord('z')
	54	c_ord += 13
	55	if upper < c_ord:
	56	c_ord -= 26
	57	loc_b64 += compat_chr(c_ord)
	58
	59	video_url = compat_b64decode(loc_b64).decode('utf-8')
	60
	61	description = self._html_search_regex(
	62	r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
	63	'description', fatal=False)
	64
	65	thumbnail = self._search_regex(
	66	r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
	67	webpage, 'thumbnail', default=None,
	68	group='value') or self._og_search_thumbnail(webpage)
	69
	70	creator = self._html_search_meta(
	71	'video:director', webpage, 'creator', default=None)
	72
	73	release_date = self._html_search_meta(
	74	'video:release_date', webpage, default=None)
	75	if release_date:
	76	release_date = release_date.replace('-', '')
	77
	78	def int_meta(name):
	79	return int_or_none(self._html_search_meta(
	80	name, webpage, default=None))
	81
	82	return {
	83	'id': video_id,
	84	'display_id': display_id,
	85	'url': video_url,
	86	'title': title,
	87	'description': description,
	88	'thumbnail': thumbnail,
	89	'creator': creator,
	90	'release_date': release_date,
	91	'duration': int_meta('video:duration'),
	92	'tbr': int_meta('ya:ovs:bitrate'),
	93	'width': int_meta('og:video:width'),
	94	'height': int_meta('og:video:height'),
	95	'http_headers': {
	96	'Referer': url,
	97	},
	98	}