[yt-dlp.git] / youtube_dl / extractor / criterion.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor


class CriterionIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
    _TEST = {
        'url': 'http://www.criterion.com/films/184-le-samourai',
        'md5': 'bc51beba55685509883a9a7830919ec3',
        'info_dict': {
            'id': '184',
            'ext': 'mp4',
            'title': 'Le Samouraï',
            'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
            'thumbnail': r're:^https?://.*\.jpg$',
        }
    }

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)

        final_url = self._search_regex(
            r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
        title = self._og_search_title(webpage)
        description = self._html_search_meta('description', webpage)
        thumbnail = self._search_regex(
            r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
            webpage, 'thumbnail url')

        return {
            'id': video_id,
            'url': final_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
        }
Commit	Line	Data
9c51a246	1	# coding: utf-8
4e415288	2	from __future__ import unicode_literals
159736c1	3
159736c1	4	from .common import InfoExtractor
4e415288	5
159736c1 YK	6
159736c1 YK	7	class CriterionIE(InfoExtractor):
92519402	8	_VALID_URL = r'https?://(?:www\.)?criterion\.com/films/(?P<id>[0-9]+)-.+'
2d5a8b55	9	_TEST = {
4e415288 PH	10	'url': 'http://www.criterion.com/films/184-le-samourai',
	11	'md5': 'bc51beba55685509883a9a7830919ec3',
	12	'info_dict': {
	13	'id': '184',
	14	'ext': 'mp4',
	15	'title': 'Le Samouraï',
	16	'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
ec85ded8	17	'thumbnail': r're:^https?://.*\.jpg$',
2d5a8b55 YK	18	}
2d5a8b55 YK	19	}
159736c1 YK	20
159736c1 YK	21	def _real_extract(self, url):
9c51a246	22	video_id = self._match_id(url)
159736c1 YK	23	webpage = self._download_webpage(url, video_id)
159736c1 YK	24
4e415288	25	final_url = self._search_regex(
9c51a246	26	r'so\.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
4e415288	27	title = self._og_search_title(webpage)
36bb63fa	28	description = self._html_search_meta('description', webpage)
4e415288	29	thumbnail = self._search_regex(
9c51a246	30	r'so\.addVariable\("thumbnailURL", "(.+?)"\)\;',
4e415288	31	webpage, 'thumbnail url')
159736c1	32
4e415288 PH	33	return {
	34	'id': video_id,
	35	'url': final_url,
	36	'title': title,
	37	'description': description,
	38	'thumbnail': thumbnail,
	39	}