]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/onenewsnz.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / onenewsnz.py
1 from .brightcove import BrightcoveNewIE
2 from .common import InfoExtractor
3 from ..utils import ExtractorError, traverse_obj
4
5
6 class OneNewsNZIE(InfoExtractor):
7 IE_NAME = '1News'
8 IE_DESC = '1news.co.nz article videos'
9 _VALID_URL = r'https?://(?:www\.)?(?:1|one)news\.co\.nz/\d+/\d+/\d+/(?P<id>[^/?#&]+)'
10 _TESTS = [
11 { # Brightcove video
12 'url': 'https://www.1news.co.nz/2022/09/29/cows-painted-green-on-parliament-lawn-in-climate-protest/',
13 'info_dict': {
14 'id': 'cows-painted-green-on-parliament-lawn-in-climate-protest',
15 'title': '\'Cows\' painted green on Parliament lawn in climate protest',
16 },
17 'playlist': [{
18 'info_dict': {
19 'id': '6312993358112',
20 'title': 'Activists dressed as cows painted green outside Parliament in climate protest',
21 'ext': 'mp4',
22 'tags': 'count:6',
23 'uploader_id': '963482464001',
24 'timestamp': 1664416255,
25 'upload_date': '20220929',
26 'duration': 38.272,
27 'thumbnail': r're:^https?://.*\.jpg$',
28 'description': 'Greenpeace accused the Government of "greenwashing" instead of taking climate action.',
29 }
30 }]
31 }, {
32 # YouTube video
33 'url': 'https://www.1news.co.nz/2022/09/30/now-is-the-time-to-care-about-womens-rugby/',
34 'info_dict': {
35 'id': 'now-is-the-time-to-care-about-womens-rugby',
36 'title': 'Now is the time to care about women\'s rugby',
37 },
38 'playlist': [{
39 'info_dict': {
40 'id': 's4wEB9neTfU',
41 'title': 'Why I love women’s rugby: Black Fern Ruahei Demant',
42 'ext': 'mp4',
43 'channel_follower_count': int,
44 'channel_url': 'https://www.youtube.com/channel/UC2BQ3U9IxoYIJyulv0bN5PQ',
45 'tags': 'count:12',
46 'uploader': 'Re: News',
47 'upload_date': '20211215',
48 'uploader_id': 'UC2BQ3U9IxoYIJyulv0bN5PQ',
49 'uploader_url': 'http://www.youtube.com/channel/UC2BQ3U9IxoYIJyulv0bN5PQ',
50 'channel_id': 'UC2BQ3U9IxoYIJyulv0bN5PQ',
51 'channel': 'Re: News',
52 'like_count': int,
53 'thumbnail': 'https://i.ytimg.com/vi/s4wEB9neTfU/maxresdefault.jpg',
54 'age_limit': 0,
55 'view_count': int,
56 'categories': ['Sports'],
57 'duration': 222,
58 'description': 'md5:8874410e5740ed1d8fd0df839f849813',
59 'availability': 'public',
60 'playable_in_embed': True,
61 'live_status': 'not_live',
62 }
63 }]
64 }, {
65 # 2 Brightcove videos
66 'url': 'https://www.1news.co.nz/2022/09/29/raw-videos-capture-hurricane-ians-fury-as-it-slams-florida/',
67 'info_dict': {
68 'id': 'raw-videos-capture-hurricane-ians-fury-as-it-slams-florida',
69 'title': 'Raw videos capture Hurricane Ian\'s fury as it slams Florida',
70 },
71 'playlist_mincount': 2,
72 }, {
73 'url': 'https://www.onenews.co.nz/2022/09/29/cows-painted-green-on-parliament-lawn-in-climate-protest/',
74 'only_matching': True,
75 }]
76
77 BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/0xpHIR6IB_default/index.html?videoId=%s'
78
79 def _real_extract(self, url):
80 display_id = self._match_id(url)
81 webpage = self._download_webpage(url, display_id)
82
83 fusion_metadata = self._search_json(r'Fusion\.globalContent\s*=', webpage, 'fusion metadata', display_id)
84
85 entries = []
86 for item in traverse_obj(fusion_metadata, 'content_elements') or []:
87 item_type = traverse_obj(item, 'subtype')
88 if item_type == 'video':
89 brightcove_config = traverse_obj(item, ('embed', 'config'))
90 brightcove_url = self.BRIGHTCOVE_URL_TEMPLATE % (
91 traverse_obj(brightcove_config, 'brightcoveAccount') or '963482464001',
92 traverse_obj(brightcove_config, 'brightcoveVideoId')
93 )
94 entries.append(self.url_result(brightcove_url, BrightcoveNewIE))
95 elif item_type == 'youtube':
96 video_id_or_url = traverse_obj(item, ('referent', 'id'), ('raw_oembed', '_id'))
97 if video_id_or_url:
98 entries.append(self.url_result(video_id_or_url, ie='Youtube'))
99
100 if not entries:
101 raise ExtractorError('This article does not have a video.', expected=True)
102
103 playlist_title = (
104 traverse_obj(fusion_metadata, ('headlines', 'basic'))
105 or self._generic_title('', webpage)
106 )
107 return self.playlist_result(entries, display_id, playlist_title)