]>
Commit | Line | Data |
---|---|---|
6bdb64e2 | 1 | import functools |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from .jwplatform import JWPlatformIE | |
6 | from ..utils import ( | |
7 | ExtractorError, | |
8 | OnDemandPagedList, | |
9 | extract_attributes, | |
10 | get_element_by_class, | |
11 | get_element_html_by_class, | |
12 | ) | |
13 | ||
14 | ||
15 | class HollywoodReporterIE(InfoExtractor): | |
16 | _VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/video/(?P<id>[\w-]+)' | |
17 | _TESTS = [{ | |
18 | 'url': 'https://www.hollywoodreporter.com/video/chris-pine-michelle-rodriguez-dungeons-dragons-cast-directors-on-what-it-took-to-make-film-sxsw-2023/', | |
19 | 'info_dict': { | |
20 | 'id': 'zH4jZaR5', | |
21 | 'ext': 'mp4', | |
22 | 'title': 'md5:a9a1c073770a32f178955997712c4bd9', | |
23 | 'description': 'The cast and directors of \'Dungeons & Dragons: Honor Among Thieves\' talk about their new film.', | |
24 | 'thumbnail': 'https://cdn.jwplayer.com/v2/media/zH4jZaR5/poster.jpg?width=720', | |
25 | 'upload_date': '20230312', | |
26 | 'timestamp': 1678586423, | |
27 | 'duration': 242.0, | |
28 | }, | |
29 | 'params': {'skip_download': 'm3u8'}, | |
30 | }] | |
31 | ||
32 | def _real_extract(self, url): | |
33 | display_id = self._match_id(url) | |
34 | webpage = self._download_webpage(url, display_id) | |
35 | ||
36 | data = extract_attributes(get_element_html_by_class('vlanding-video-card__link', webpage) or '') | |
37 | video_id = data['data-video-showcase-trigger'] | |
38 | showcase_type = data['data-video-showcase-type'] | |
39 | ||
40 | if showcase_type == 'jwplayer': | |
41 | return self.url_result(f'jwplatform:{video_id}', JWPlatformIE) | |
42 | elif showcase_type == 'youtube': | |
43 | return self.url_result(video_id, 'Youtube') | |
44 | else: | |
45 | raise ExtractorError(f'Unsupported showcase type "{showcase_type}"') | |
46 | ||
47 | ||
48 | class HollywoodReporterPlaylistIE(InfoExtractor): | |
49 | _VALID_URL = r'https?://(?:www\.)?hollywoodreporter\.com/vcategory/(?P<slug>[\w-]+)-(?P<id>\d+)' | |
50 | _TESTS = [{ | |
51 | 'url': 'https://www.hollywoodreporter.com/vcategory/heat-vision-breakdown-57822/', | |
52 | 'playlist_mincount': 109, | |
53 | 'info_dict': { | |
54 | 'id': '57822', | |
55 | 'title': 'heat-vision-breakdown', | |
add96eb9 | 56 | }, |
6bdb64e2 | 57 | }] |
58 | ||
59 | def _fetch_page(self, slug, pl_id, page): | |
60 | page += 1 | |
61 | webpage = self._download_webpage( | |
62 | f'https://www.hollywoodreporter.com/vcategory/{slug}-{pl_id}/page/{page}/', | |
63 | pl_id, note=f'Downloading playlist page {page}') | |
64 | section = get_element_by_class('video-playlist-river', webpage) or '' | |
65 | ||
66 | for url in re.findall(r'<a[^>]+href="([^"]+)"[^>]+class="c-title__link', section): | |
67 | yield self.url_result(url, HollywoodReporterIE) | |
68 | ||
69 | def _real_extract(self, url): | |
70 | slug, pl_id = self._match_valid_url(url).group('slug', 'id') | |
71 | return self.playlist_result( | |
72 | OnDemandPagedList(functools.partial(self._fetch_page, slug, pl_id), 15), pl_id, slug) |