[yt-dlp.git] / youtube_dlc / extractor / popcorntimes.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_b64decode,
    compat_chr,
)
from ..utils import int_or_none


class PopcorntimesIE(InfoExtractor):
    _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
    _TEST = {
        'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
        'md5': '93f210991ad94ba8c3485950a2453257',
        'info_dict': {
            'id': 'A1XCFvz',
            'display_id': 'haensel-und-gretel-opera-fantasy',
            'ext': 'mp4',
            'title': 'Hänsel und Gretel',
            'description': 'md5:1b8146791726342e7b22ce8125cf6945',
            'thumbnail': r're:^https?://.*\.jpg$',
            'creator': 'John Paul',
            'release_date': '19541009',
            'duration': 4260,
            'tbr': 5380,
            'width': 720,
            'height': 540,
        },
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id, display_id = mobj.group('id', 'display_id')

        webpage = self._download_webpage(url, display_id)

        title = self._search_regex(
            r'<h1>([^<]+)', webpage, 'title',
            default=None) or self._html_search_meta(
            'ya:ovs:original_name', webpage, 'title', fatal=True)

        loc = self._search_regex(
            r'PCTMLOC\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
            group='value')

        loc_b64 = ''
        for c in loc:
            c_ord = ord(c)
            if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
                upper = ord('Z') if c_ord <= ord('Z') else ord('z')
                c_ord += 13
                if upper < c_ord:
                    c_ord -= 26
            loc_b64 += compat_chr(c_ord)

        video_url = compat_b64decode(loc_b64).decode('utf-8')

        description = self._html_search_regex(
            r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
            'description', fatal=False)

        thumbnail = self._search_regex(
            r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
            webpage, 'thumbnail', default=None,
            group='value') or self._og_search_thumbnail(webpage)

        creator = self._html_search_meta(
            'video:director', webpage, 'creator', default=None)

        release_date = self._html_search_meta(
            'video:release_date', webpage, default=None)
        if release_date:
            release_date = release_date.replace('-', '')

        def int_meta(name):
            return int_or_none(self._html_search_meta(
                name, webpage, default=None))

        return {
            'id': video_id,
            'display_id': display_id,
            'url': video_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'creator': creator,
            'release_date': release_date,
            'duration': int_meta('video:duration'),
            'tbr': int_meta('ya:ovs:bitrate'),
            'width': int_meta('og:video:width'),
            'height': int_meta('og:video:height'),
            'http_headers': {
                'Referer': url,
            },
        }
Commit	Line	Data
7d55b62f S	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import re
	5
	6	from .common import InfoExtractor
	7	from ..compat import (
	8	compat_b64decode,
	9	compat_chr,
	10	)
	11	from ..utils import int_or_none
	12
	13
	14	class PopcorntimesIE(InfoExtractor):
	15	_VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P<id>[^/]+)/(?P<display_id>[^/?#&]+)'
	16	_TEST = {
	17	'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy',
	18	'md5': '93f210991ad94ba8c3485950a2453257',
	19	'info_dict': {
	20	'id': 'A1XCFvz',
	21	'display_id': 'haensel-und-gretel-opera-fantasy',
	22	'ext': 'mp4',
	23	'title': 'Hänsel und Gretel',
	24	'description': 'md5:1b8146791726342e7b22ce8125cf6945',
	25	'thumbnail': r're:^https?://.*\.jpg$',
	26	'creator': 'John Paul',
	27	'release_date': '19541009',
	28	'duration': 4260,
	29	'tbr': 5380,
	30	'width': 720,
	31	'height': 540,
	32	},
	33	}
	34
	35	def _real_extract(self, url):
	36	mobj = re.match(self._VALID_URL, url)
	37	video_id, display_id = mobj.group('id', 'display_id')
	38
	39	webpage = self._download_webpage(url, display_id)
	40
	41	title = self._search_regex(
	42	r'<h1>([^<]+)', webpage, 'title',
	43	default=None) or self._html_search_meta(
	44	'ya:ovs:original_name', webpage, 'title', fatal=True)
	45
	46	loc = self._search_regex(
	47	r'PCTMLOC\s=\s(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'loc',
	48	group='value')
	49
	50	loc_b64 = ''
	51	for c in loc:
	52	c_ord = ord(c)
	53	if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'):
	54	upper = ord('Z') if c_ord <= ord('Z') else ord('z')
	55	c_ord += 13
	56	if upper < c_ord:
	57	c_ord -= 26
	58	loc_b64 += compat_chr(c_ord)
	59
	60	video_url = compat_b64decode(loc_b64).decode('utf-8')
	61
	62	description = self._html_search_regex(
	63	r'(?s)<div[^>]+class=["\']pt-movie-desc[^>]+>(.+?)</div>', webpage,
	64	'description', fatal=False)
65
66	thumbnail = self._search_regex(
67	r'<img[^>]+class=["\']video-preview[^>]+\bsrc=(["\'])(?P<value>(?:(?!\1).)+)\1',
68	webpage, 'thumbnail', default=None,
69	group='value') or self._og_search_thumbnail(webpage)
70
71	creator = self._html_search_meta(
72	'video:director', webpage, 'creator', default=None)
73
74	release_date = self._html_search_meta(
75	'video:release_date', webpage, default=None)
76	if release_date:
77	release_date = release_date.replace('-', '')
78
79	def int_meta(name):
80	return int_or_none(self._html_search_meta(
81	name, webpage, default=None))
82
83	return {
84	'id': video_id,
85	'display_id': display_id,
86	'url': video_url,
87	'title': title,
88	'description': description,
89	'thumbnail': thumbnail,
90	'creator': creator,
91	'release_date': release_date,
92	'duration': int_meta('video:duration'),
93	'tbr': int_meta('ya:ovs:bitrate'),
94	'width': int_meta('og:video:width'),
95	'height': int_meta('og:video:height'),
96	'http_headers': {
97	'Referer': url,
98	},
99	}