[yt-dlp.git] / youtube_dl / extractor / criterion.py

# -*- coding: utf-8 -*-
from __future__ import unicode_literals

import re

from .common import InfoExtractor


class CriterionIE(InfoExtractor):
    _VALID_URL = r'https?://www\.criterion\.com/films/(?P<id>[0-9]+)-.+'
    _TEST = {
        'url': 'http://www.criterion.com/films/184-le-samourai',
        'md5': 'bc51beba55685509883a9a7830919ec3',
        'info_dict': {
            'id': '184',
            'ext': 'mp4',
            'title': 'Le Samouraï',
            'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
        }
    }

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        webpage = self._download_webpage(url, video_id)

        final_url = self._search_regex(
            r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
        title = self._og_search_title(webpage)
        description = self._html_search_regex(
            r'<meta name="description" content="(.+?)" />',
            webpage, 'video description')
        thumbnail = self._search_regex(
            r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
            webpage, 'thumbnail url')

        return {
            'id': video_id,
            'url': final_url,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
        }
Commit	Line	Data
159736c1	1	# -- coding: utf-8 --
4e415288	2	from __future__ import unicode_literals
159736c1 YK	3
	4	import re
	5
	6	from .common import InfoExtractor
4e415288	7
159736c1 YK	8
159736c1 YK	9	class CriterionIE(InfoExtractor):
4e415288	10	_VALID_URL = r'https?://www\.criterion\.com/films/(?P<id>[0-9]+)-.+'
2d5a8b55	11	_TEST = {
4e415288 PH	12	'url': 'http://www.criterion.com/films/184-le-samourai',
	13	'md5': 'bc51beba55685509883a9a7830919ec3',
	14	'info_dict': {
	15	'id': '184',
	16	'ext': 'mp4',
	17	'title': 'Le Samouraï',
	18	'description': 'md5:a2b4b116326558149bef81f76dcbb93f',
2d5a8b55 YK	19	}
2d5a8b55 YK	20	}
159736c1 YK	21
	22	def _real_extract(self, url):
	23	mobj = re.match(self._VALID_URL, url)
4e415288	24	video_id = mobj.group('id')
159736c1 YK	25	webpage = self._download_webpage(url, video_id)
159736c1 YK	26
4e415288 PH	27	final_url = self._search_regex(
	28	r'so.addVariable\("videoURL", "(.+?)"\)\;', webpage, 'video url')
	29	title = self._og_search_title(webpage)
	30	description = self._html_search_regex(
	31	r'<meta name="description" content="(.+?)" />',
	32	webpage, 'video description')
	33	thumbnail = self._search_regex(
	34	r'so.addVariable\("thumbnailURL", "(.+?)"\)\;',
	35	webpage, 'thumbnail url')
159736c1	36
4e415288 PH	37	return {
	38	'id': video_id,
	39	'url': final_url,
	40	'title': title,
	41	'description': description,
	42	'thumbnail': thumbnail,
	43	}