[yt-dlp.git] / yt_dlp / extractor / rottentomatoes.py

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    clean_html,
    float_or_none,
    get_element_by_class,
    join_nonempty,
    traverse_obj,
    url_or_none,
)


class RottenTomatoesIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/(?P<playlist>[^/]+)(?:/(?P<tr>trailers)(?:/(?P<id>\w+))?)?'

    _TESTS = [{
        'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
        'info_dict': {
            'id': '11028566',
            'ext': 'mp4',
            'title': 'Toy Story 3',
            'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.'
        },
        'skip': 'No longer available',
    }, {
        'url': 'https://www.rottentomatoes.com/m/toy_story_3/trailers/VycaVoBKhGuk',
        'info_dict': {
            'id': 'VycaVoBKhGuk',
            'ext': 'mp4',
            'title': 'Toy Story 3: Trailer 2',
            'description': '',
            'thumbnail': r're:^https?://.*\.jpg$',
            'duration': 149.941
        },
    }, {
        'url': 'http://www.rottentomatoes.com/m/toy_story_3',
        'info_dict': {
            'id': 'toy_story_3',
            'title': 'Toy Story 3',
        },
        'playlist_mincount': 4,
    }, {
        'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers',
        'info_dict': {
            'id': 'toy_story_3-trailers',
        },
        'playlist_mincount': 5,
    }]

    def _extract_videos(self, data, display_id):
        for video in traverse_obj(data, (lambda _, v: v['publicId'] and v['file'] and v['type'] == 'hls')):
            yield {
                'formats': self._extract_m3u8_formats(
                    video['file'], display_id, 'mp4', m3u8_id='hls', fatal=False),
                **traverse_obj(video, {
                    'id': 'publicId',
                    'title': 'title',
                    'description': 'description',
                    'duration': ('durationInSeconds', {float_or_none}),
                    'thumbnail': ('image', {url_or_none}),
                }),
            }

    def _real_extract(self, url):
        playlist_id, trailers, video_id = self._match_valid_url(url).group('playlist', 'tr', 'id')
        playlist_id = join_nonempty(playlist_id, trailers)
        webpage = self._download_webpage(url, playlist_id)
        data = self._search_json(
            r'<script[^>]+\bid=["\'](?:heroV|v)ideos["\'][^>]*>', webpage,
            'data', playlist_id, contains_pattern=r'\[{(?s:.+)}\]')

        if video_id:
            video_data = traverse_obj(data, lambda _, v: v['publicId'] == video_id)
            if not video_data:
                raise ExtractorError('Unable to extract video from webpage')
            return next(self._extract_videos(video_data, video_id))

        return self.playlist_result(
            self._extract_videos(data, playlist_id), playlist_id,
            clean_html(get_element_by_class('scoreboard__title', webpage)))
Commit	Line	Data
dae2a058	1	from .common import InfoExtractor
2d306c03 J	2	from ..utils import (
	3	ExtractorError,
	4	clean_html,
	5	float_or_none,
	6	get_element_by_class,
	7	join_nonempty,
	8	traverse_obj,
	9	url_or_none,
	10	)
4b7b839f JMF	11
4b7b839f JMF	12
dae2a058	13	class RottenTomatoesIE(InfoExtractor):
2d306c03	14	_VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/(?P<playlist>[^/]+)(?:/(?P<tr>trailers)(?:/(?P<id>\w+))?)?'
4b7b839f	15
2d306c03	16	_TESTS = [{
924f47f7	17	'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/',
924f47f7	18	'info_dict': {
62263851	19	'id': '11028566',
96aded8d	20	'ext': 'mp4',
dae2a058	21	'title': 'Toy Story 3',
2d306c03 J	22	'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.'
	23	},
	24	'skip': 'No longer available',
	25	}, {
	26	'url': 'https://www.rottentomatoes.com/m/toy_story_3/trailers/VycaVoBKhGuk',
	27	'info_dict': {
	28	'id': 'VycaVoBKhGuk',
	29	'ext': 'mp4',
	30	'title': 'Toy Story 3: Trailer 2',
	31	'description': '',
ec85ded8	32	'thumbnail': r're:^https?://.*\.jpg$',
2d306c03 J	33	'duration': 149.941
	34	},
	35	}, {
	36	'url': 'http://www.rottentomatoes.com/m/toy_story_3',
	37	'info_dict': {
	38	'id': 'toy_story_3',
	39	'title': 'Toy Story 3',
	40	},
	41	'playlist_mincount': 4,
	42	}, {
	43	'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers',
	44	'info_dict': {
	45	'id': 'toy_story_3-trailers',
4b7b839f	46	},
2d306c03 J	47	'playlist_mincount': 5,
	48	}]
	49
	50	def _extract_videos(self, data, display_id):
	51	for video in traverse_obj(data, (lambda _, v: v['publicId'] and v['file'] and v['type'] == 'hls')):
	52	yield {
	53	'formats': self._extract_m3u8_formats(
	54	video['file'], display_id, 'mp4', m3u8_id='hls', fatal=False),
	55	**traverse_obj(video, {
	56	'id': 'publicId',
	57	'title': 'title',
	58	'description': 'description',
	59	'duration': ('durationInSeconds', {float_or_none}),
	60	'thumbnail': ('image', {url_or_none}),
	61	}),
	62	}
dae2a058 YCH	63
dae2a058 YCH	64	def _real_extract(self, url):
2d306c03 J	65	playlist_id, trailers, video_id = self._match_valid_url(url).group('playlist', 'tr', 'id')
	66	playlist_id = join_nonempty(playlist_id, trailers)
	67	webpage = self._download_webpage(url, playlist_id)
	68	data = self._search_json(
	69	r'<script[^>]+\bid=["\'](?:heroV\|v)ideos["\'][^>]*>', webpage,
	70	'data', playlist_id, contains_pattern=r'\[{(?s:.+)}\]')
	71
	72	if video_id:
	73	video_data = traverse_obj(data, lambda _, v: v['publicId'] == video_id)
	74	if not video_data:
	75	raise ExtractorError('Unable to extract video from webpage')
	76	return next(self._extract_videos(video_data, video_id))
dae2a058	77
2d306c03 J	78	return self.playlist_result(
	79	self._extract_videos(data, playlist_id), playlist_id,
	80	clean_html(get_element_by_class('scoreboard__title', webpage)))