]>
Commit | Line | Data |
---|---|---|
21633673 | 1 | import itertools |
2 | ||
e040bb0a THD |
3 | from .common import InfoExtractor |
4 | from ..compat import compat_str | |
5 | from ..utils import ( | |
6 | ExtractorError, | |
7 | smuggle_url, | |
b69fd25c | 8 | str_or_none, |
e040bb0a | 9 | traverse_obj, |
e040bb0a | 10 | unified_strdate, |
b69fd25c | 11 | unsmuggle_url, |
e040bb0a THD |
12 | ) |
13 | ||
e040bb0a THD |
14 | |
15 | class VoicyBaseIE(InfoExtractor): | |
16 | def _extract_from_playlist_data(self, value): | |
17 | voice_id = compat_str(value.get('PlaylistId')) | |
18 | upload_date = unified_strdate(value.get('Published'), False) | |
19 | items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']] | |
20 | return { | |
21 | '_type': 'multi_video', | |
22 | 'entries': items, | |
23 | 'id': voice_id, | |
24 | 'title': compat_str(value.get('PlaylistName')), | |
25 | 'uploader': value.get('SpeakerName'), | |
b69fd25c | 26 | 'uploader_id': str_or_none(value.get('SpeakerId')), |
e040bb0a | 27 | 'channel': value.get('ChannelName'), |
b69fd25c | 28 | 'channel_id': str_or_none(value.get('ChannelId')), |
e040bb0a THD |
29 | 'upload_date': upload_date, |
30 | } | |
31 | ||
32 | def _extract_single_article(self, entry): | |
33 | formats = [{ | |
34 | 'url': entry['VoiceHlsFile'], | |
35 | 'format_id': 'hls', | |
36 | 'ext': 'm4a', | |
37 | 'acodec': 'aac', | |
38 | 'vcodec': 'none', | |
39 | 'protocol': 'm3u8_native', | |
40 | }, { | |
41 | 'url': entry['VoiceFile'], | |
42 | 'format_id': 'mp3', | |
43 | 'ext': 'mp3', | |
44 | 'acodec': 'mp3', | |
45 | 'vcodec': 'none', | |
46 | }] | |
47 | self._sort_formats(formats) | |
48 | return { | |
49 | 'id': compat_str(entry.get('ArticleId')), | |
50 | 'title': entry.get('ArticleTitle'), | |
51 | 'description': entry.get('MediaName'), | |
52 | 'formats': formats, | |
53 | } | |
54 | ||
55 | def _call_api(self, url, video_id, **kwargs): | |
56 | response = self._download_json(url, video_id, **kwargs) | |
57 | if response.get('Status') != 0: | |
58 | message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=compat_str) | |
59 | if not message: | |
60 | message = 'There was a error in the response: %d' % response.get('Status') | |
61 | raise ExtractorError(message, expected=False) | |
62 | return response.get('Value') | |
63 | ||
64 | ||
65 | class VoicyIE(VoicyBaseIE): | |
66 | IE_NAME = 'voicy' | |
67 | _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)' | |
68 | ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s' | |
69 | _TESTS = [{ | |
70 | 'url': 'https://voicy.jp/channel/1253/122754', | |
71 | 'info_dict': { | |
72 | 'id': '122754', | |
73 | 'title': '1/21(木)声日記:ついに原稿終わった!!', | |
74 | 'uploader': 'ちょまど@ ITエンジニアなオタク', | |
75 | 'uploader_id': '7339', | |
76 | }, | |
77 | 'playlist_mincount': 9, | |
78 | }] | |
79 | ||
80 | def _real_extract(self, url): | |
5ad28e7f | 81 | mobj = self._match_valid_url(url) |
e040bb0a THD |
82 | assert mobj |
83 | voice_id = mobj.group('id') | |
84 | channel_id = mobj.group('channel_id') | |
85 | url, article_list = unsmuggle_url(url) | |
86 | if not article_list: | |
87 | article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id) | |
88 | return self._extract_from_playlist_data(article_list) | |
89 | ||
90 | ||
91 | class VoicyChannelIE(VoicyBaseIE): | |
92 | IE_NAME = 'voicy:channel' | |
93 | _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)' | |
94 | PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s' | |
95 | _TESTS = [{ | |
96 | 'url': 'https://voicy.jp/channel/1253/', | |
97 | 'info_dict': { | |
98 | 'id': '7339', | |
99 | 'title': 'ゆるふわ日常ラジオ #ちょまラジ', | |
100 | 'uploader': 'ちょまど@ ITエンジニアなオタク', | |
101 | 'uploader_id': '7339', | |
102 | }, | |
103 | 'playlist_mincount': 54, | |
104 | }] | |
105 | ||
106 | @classmethod | |
107 | def suitable(cls, url): | |
21633673 | 108 | return not VoicyIE.suitable(url) and super().suitable(url) |
e040bb0a THD |
109 | |
110 | def _entries(self, channel_id): | |
111 | pager = '' | |
112 | for count in itertools.count(1): | |
113 | article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note='Paging #%d' % count) | |
114 | playlist_data = article_list.get('PlaylistData') | |
115 | if not playlist_data: | |
116 | break | |
117 | yield from playlist_data | |
118 | last = playlist_data[-1] | |
119 | pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount']) | |
120 | ||
121 | def _real_extract(self, url): | |
122 | channel_id = self._match_id(url) | |
123 | articles = self._entries(channel_id) | |
124 | ||
125 | first_article = next(articles, None) | |
126 | title = traverse_obj(first_article, ('ChannelName', ), expected_type=compat_str) | |
127 | speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=compat_str) | |
128 | if not title and speaker_name: | |
129 | title = 'Uploads from %s' % speaker_name | |
130 | if not title: | |
131 | title = 'Uploads from channel ID %s' % channel_id | |
132 | ||
133 | articles = itertools.chain([first_article], articles) if first_article else articles | |
134 | ||
135 | playlist = ( | |
136 | self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key()) | |
137 | for value in articles) | |
138 | return { | |
139 | '_type': 'playlist', | |
140 | 'entries': playlist, | |
141 | 'id': channel_id, | |
142 | 'title': title, | |
143 | 'channel': speaker_name, | |
144 | 'channel_id': channel_id, | |
145 | } |