]>
Commit | Line | Data |
---|---|---|
2f4b5759 H |
1 | from .common import InfoExtractor |
2 | from ..utils import merge_dicts, unified_timestamp, url_or_none | |
3 | from ..utils.traversal import traverse_obj | |
4 | ||
5 | ||
6 | class ZetlandDKArticleIE(InfoExtractor): | |
7 | _VALID_URL = r'https?://www\.zetland\.dk/\w+/(?P<id>(?P<story_id>\w{8})-(?P<uploader_id>\w{8})-(?:\w{5}))' | |
8 | _TESTS = [{ | |
9 | 'url': 'https://www.zetland.dk/historie/sO9aq2MY-a81VP3BY-66e69?utm_source=instagram&utm_medium=linkibio&utm_campaign=artikel', | |
10 | 'info_dict': { | |
11 | 'id': 'sO9aq2MY-a81VP3BY-66e69', | |
12 | 'ext': 'mp3', | |
13 | 'modified_date': '20240118', | |
14 | 'title': 'Afsnit 1: “Det føltes som en kidnapning.” ', | |
15 | 'upload_date': '20240116', | |
16 | 'uploader_id': 'a81VP3BY', | |
17 | 'modified_timestamp': 1705568739, | |
18 | 'release_timestamp': 1705377592, | |
19 | 'uploader_url': 'https://www.zetland.dk/skribent/a81VP3BY', | |
20 | 'uploader': 'Helle Fuusager', | |
21 | 'release_date': '20240116', | |
22 | 'thumbnail': r're:https://zetland\.imgix\.net/2aafe500-b14e-11ee-bf83-65d5e1283a57/Zetland_Image_1\.jpg', | |
23 | 'description': 'md5:9619d426772c133f5abb26db27f26a01', | |
24 | 'timestamp': 1705377592, | |
25 | 'series_id': '62d54630-e87b-4ab1-a255-8de58dbe1b14', | |
26 | } | |
27 | ||
28 | }] | |
29 | ||
30 | def _real_extract(self, url): | |
31 | display_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') | |
32 | webpage = self._download_webpage(url, display_id) | |
33 | ||
34 | next_js_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps'] | |
35 | story_data = traverse_obj(next_js_data, ('initialState', 'consume', 'story', 'story')) | |
36 | ||
37 | formats = [] | |
38 | for audio_url in traverse_obj(story_data, ('story_content', 'meta', 'audioFiles', ..., {url_or_none})): | |
39 | formats.append({ | |
40 | 'url': audio_url, | |
41 | 'vcodec': 'none', | |
42 | }) | |
43 | ||
44 | return merge_dicts({ | |
45 | 'id': display_id, | |
46 | 'formats': formats, | |
47 | 'uploader_id': uploader_id | |
48 | }, traverse_obj(story_data, { | |
49 | 'title': ((('story_content', 'content', 'title'), 'title'), {str}), | |
50 | 'uploader': ('sharer', 'name'), | |
51 | 'uploader_id': ('sharer', 'sharer_id'), | |
52 | 'description': ('story_content', 'content', 'socialDescription'), | |
53 | 'series_id': ('story_content', 'meta', 'seriesId'), | |
54 | 'release_timestamp': ('published_at', {unified_timestamp}), | |
55 | 'modified_timestamp': ('revised_at', {unified_timestamp}), | |
56 | }, get_all=False), traverse_obj(next_js_data, ('metaInfo', { | |
57 | 'title': ((('meta', 'title'), ('ld', 'headline'), ('og', 'og:title'), ('og', 'twitter:title')), {str}), | |
58 | 'description': ((('meta', 'description'), ('ld', 'description'), ('og', 'og:description'), ('og', 'twitter:description')), {str}), | |
59 | 'uploader': ((('meta', 'author'), ('ld', 'author', 'name')), {str}), | |
60 | 'uploader_url': ('ld', 'author', 'url', {url_or_none}), | |
61 | 'thumbnail': ((('ld', 'image'), ('og', 'og:image'), ('og', 'twitter:image')), {url_or_none}), | |
62 | 'modified_timestamp': ('ld', 'dateModified', {unified_timestamp}), | |
63 | 'release_timestamp': ('ld', 'datePublished', {unified_timestamp}), | |
64 | 'timestamp': ('ld', 'dateCreated', {unified_timestamp}), | |
65 | }), get_all=False), { | |
66 | 'title': self._html_search_meta(['title', 'og:title', 'twitter:title'], webpage), | |
67 | 'description': self._html_search_meta(['description', 'og:description', 'twitter:description'], webpage), | |
68 | 'thumbnail': self._html_search_meta(['og:image', 'twitter:image'], webpage), | |
69 | 'uploader': self._html_search_meta(['author'], webpage), | |
70 | 'release_timestamp': unified_timestamp(self._html_search_meta(['article:published_time'], webpage)), | |
71 | }, self._search_json_ld(webpage, display_id, fatal=False)) |