]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/nzonscreen.py
[cleanup, ie] Match both `http` and `https` in `_VALID_URL` (#8968)
[yt-dlp.git] / yt_dlp / extractor / nzonscreen.py
CommitLineData
d3bb187f
GS
1from .common import InfoExtractor
2from ..utils import (
3 float_or_none,
4 int_or_none,
5 remove_end,
6 strip_or_none,
7 traverse_obj,
8 url_or_none,
9)
10
11
12class NZOnScreenIE(InfoExtractor):
a687226b 13 _VALID_URL = r'^https?://www\.nzonscreen\.com/title/(?P<id>[^/?#]+)'
d3bb187f
GS
14 _TESTS = [{
15 'url': 'https://www.nzonscreen.com/title/shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
16 'info_dict': {
17 'id': '726ed6585c6bfb30',
18 'ext': 'mp4',
19 'format_id': 'hi',
20 'display_id': 'shoop-shoop-diddy-wop-cumma-cumma-wang-dang-1982',
21 'title': 'Monte Video - "Shoop Shoop, Diddy Wop"',
22 'description': 'Monte Video - "Shoop Shoop, Diddy Wop"',
23 'alt_title': 'Shoop Shoop Diddy Wop Cumma Cumma Wang Dang | Music Video',
24 'thumbnail': r're:https://www\.nzonscreen\.com/content/images/.+\.jpg',
25 'duration': 158,
26 },
27 'params': {'skip_download': 'm3u8'},
28 }, {
29 'url': 'https://www.nzonscreen.com/title/shes-a-mod-1964?collection=best-of-the-60s',
30 'info_dict': {
31 'id': '3dbe709ff03c36f1',
32 'ext': 'mp4',
33 'format_id': 'hi',
34 'display_id': 'shes-a-mod-1964',
35 'title': 'Ray Columbus - \'She\'s A Mod\'',
36 'description': 'Ray Columbus - \'She\'s A Mod\'',
37 'alt_title': 'She\'s a Mod | Music Video',
38 'thumbnail': r're:https://www\.nzonscreen\.com/content/images/.+\.jpg',
39 'duration': 130,
40 },
41 'params': {'skip_download': 'm3u8'},
42 }, {
43 'url': 'https://www.nzonscreen.com/title/puha-and-pakeha-1968/overview',
44 'info_dict': {
45 'id': 'f86342544385ad8a',
46 'ext': 'mp4',
47 'format_id': 'hi',
48 'display_id': 'puha-and-pakeha-1968',
49 'title': 'Looking At New Zealand - Puha and Pakeha',
50 'alt_title': 'Looking at New Zealand - \'Pūhā and Pākehā\' | Television',
51 'description': 'An excerpt from this television programme.',
52 'duration': 212,
53 'thumbnail': r're:https://www\.nzonscreen\.com/content/images/.+\.jpg',
54 },
55 'params': {'skip_download': 'm3u8'},
56 }]
57
58 def _extract_formats(self, playlist):
59 for quality, (id_, url) in enumerate(traverse_obj(
60 playlist, ('h264', {'lo': 'lo_res', 'hi': 'hi_res'}), expected_type=url_or_none).items()):
61 yield {
62 'url': url,
63 'format_id': id_,
64 'ext': 'mp4',
65 'quality': quality,
66 'height': int_or_none(playlist.get('height')) if id_ == 'hi' else None,
67 'width': int_or_none(playlist.get('width')) if id_ == 'hi' else None,
68 'filesize_approx': float_or_none(traverse_obj(playlist, ('h264', f'{id_}_res_mb')), invscale=1024**2),
69 }
70
71 def _real_extract(self, url):
72 video_id = self._match_id(url)
73 webpage = self._download_webpage(url, video_id)
74
75 playlist = self._parse_json(self._html_search_regex(
76 r'data-video-config=\'([^\']+)\'', webpage, 'media data'), video_id)
77
78 return {
79 'id': playlist['uuid'],
80 'display_id': video_id,
81 'title': strip_or_none(playlist.get('label')),
82 'description': strip_or_none(playlist.get('description')),
83 'alt_title': strip_or_none(remove_end(
84 self._html_extract_title(webpage, default=None) or self._og_search_title(webpage),
85 ' | NZ On Screen')),
86 'thumbnail': traverse_obj(playlist, ('thumbnail', 'path')),
87 'duration': float_or_none(playlist.get('duration')),
88 'formats': list(self._extract_formats(playlist)),
89 'http_headers': {
90 'Referer': 'https://www.nzonscreen.com/',
91 'Origin': 'https://www.nzonscreen.com/',
92 }
93 }