]>
Commit | Line | Data |
---|---|---|
28b8f57b H |
1 | from .common import InfoExtractor |
2 | from ..utils import ( | |
3 | clean_html, | |
4 | determine_ext, | |
5 | int_or_none, | |
6 | parse_iso8601, | |
7 | traverse_obj, | |
8 | variadic, | |
9 | ) | |
10 | ||
11 | ||
12 | class NoicePodcastIE(InfoExtractor): | |
13 | _VALID_URL = r'https?://open\.noice\.id/content/(?P<id>[a-fA-F0-9-]+)' | |
14 | _TESTS = [{ | |
15 | 'url': 'https://open.noice.id/content/7694bb04-ff0f-40fa-a60b-5b39f29584b2', | |
16 | 'info_dict': { | |
17 | 'id': '7694bb04-ff0f-40fa-a60b-5b39f29584b2', | |
18 | 'ext': 'm4a', | |
19 | 'season': 'Season 1', | |
20 | 'description': 'md5:58d1274e6857b6fbbecf47075885380d', | |
21 | 'release_date': '20221115', | |
22 | 'timestamp': 1668496642, | |
23 | 'season_number': 1, | |
24 | 'upload_date': '20221115', | |
25 | 'release_timestamp': 1668496642, | |
26 | 'title': 'Eps 1. Belajar dari Wishnutama: Kreatif Bukan Followers! (bersama Wishnutama)', | |
27 | 'modified_date': '20221121', | |
28 | 'categories': ['Bisnis dan Keuangan'], | |
29 | 'duration': 3567, | |
30 | 'modified_timestamp': 1669030647, | |
31 | 'thumbnail': 'https://images.noiceid.cc/catalog/content-1668496302560', | |
32 | 'channel_id': '9dab1024-5b92-4265-ae1c-63da87359832', | |
33 | 'like_count': int, | |
34 | 'channel': 'Noice Space Talks', | |
35 | 'comment_count': int, | |
36 | 'dislike_count': int, | |
37 | 'channel_follower_count': int, | |
38 | } | |
39 | }, { | |
40 | 'url': 'https://open.noice.id/content/222134e4-99f2-456f-b8a2-b8be404bf063', | |
41 | 'info_dict': { | |
42 | 'id': '222134e4-99f2-456f-b8a2-b8be404bf063', | |
43 | 'ext': 'm4a', | |
44 | 'release_timestamp': 1653488220, | |
45 | 'description': 'md5:35074f6190cef52b05dd133bb2ef460e', | |
46 | 'upload_date': '20220525', | |
47 | 'timestamp': 1653460637, | |
48 | 'release_date': '20220525', | |
49 | 'thumbnail': 'https://images.noiceid.cc/catalog/content-1653460337625', | |
50 | 'title': 'Eps 1: Dijodohin Sama Anak Pak RT', | |
51 | 'modified_timestamp': 1669030647, | |
52 | 'season_number': 1, | |
53 | 'modified_date': '20221121', | |
54 | 'categories': ['Cerita dan Drama'], | |
55 | 'duration': 1830, | |
56 | 'season': 'Season 1', | |
57 | 'channel_id': '60193f6b-d24d-4b23-913b-ceed5a731e74', | |
58 | 'dislike_count': int, | |
59 | 'like_count': int, | |
60 | 'comment_count': int, | |
61 | 'channel': 'Dear Jerome', | |
62 | 'channel_follower_count': int, | |
63 | } | |
64 | }] | |
65 | ||
66 | def _get_formats_and_subtitles(self, media_url, video_id): | |
67 | formats, subtitles = [], {} | |
68 | for url in variadic(media_url): | |
69 | ext = determine_ext(url) | |
70 | if ext == 'm3u8': | |
71 | fmts, subs = self._extract_m3u8_formats_and_subtitles(url, video_id) | |
72 | formats.extend(fmts) | |
73 | self._merge_subtitles(subs, target=subtitles) | |
74 | else: | |
75 | formats.append({ | |
76 | 'url': url, | |
77 | 'ext': 'mp3', | |
78 | 'vcodec': 'none', | |
79 | 'acodec': 'mp3', | |
80 | }) | |
81 | return formats, subtitles | |
82 | ||
83 | def _real_extract(self, url): | |
84 | display_id = self._match_id(url) | |
85 | webpage = self._download_webpage(url, display_id) | |
86 | ||
87 | nextjs_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['contentDetails'] | |
88 | ||
89 | media_url_list = traverse_obj(nextjs_data, (('rawContentUrl', 'url'), )) | |
90 | formats, subtitles = self._get_formats_and_subtitles(media_url_list, display_id) | |
91 | ||
92 | return { | |
93 | 'id': nextjs_data.get('id') or display_id, | |
94 | 'title': nextjs_data.get('title') or self._html_search_meta('og:title', webpage), | |
95 | 'formats': formats, | |
96 | 'subtitles': subtitles, | |
97 | 'description': (nextjs_data.get('description') or clean_html(nextjs_data.get('htmlDescription')) | |
98 | or self._html_search_meta(['description', 'og:description'], webpage)), | |
99 | 'thumbnail': nextjs_data.get('image') or self._html_search_meta('og:image', webpage), | |
100 | 'timestamp': parse_iso8601(nextjs_data.get('createdAt')), | |
101 | 'release_timestamp': parse_iso8601(nextjs_data.get('publishedAt')), | |
102 | 'modified_timestamp': parse_iso8601( | |
103 | nextjs_data.get('updatedAt') or self._html_search_meta('og:updated_time', webpage)), | |
104 | 'duration': int_or_none(nextjs_data.get('duration')), | |
105 | 'categories': traverse_obj(nextjs_data, ('genres', ..., 'name')), | |
106 | 'season': nextjs_data.get('seasonName'), | |
107 | 'season_number': int_or_none(nextjs_data.get('seasonNumber')), | |
108 | 'channel': traverse_obj(nextjs_data, ('catalog', 'title')), | |
109 | 'channel_id': traverse_obj(nextjs_data, ('catalog', 'id'), 'catalogId'), | |
110 | **traverse_obj(nextjs_data, ('meta', 'aggregations', { | |
111 | 'like_count': 'likes', | |
112 | 'dislike_count': 'dislikes', | |
113 | 'comment_count': 'comments', | |
114 | 'channel_follower_count': 'followers', | |
115 | })) | |
116 | } |