]>
Commit | Line | Data |
---|---|---|
21633673 | 1 | import itertools |
2 | ||
e040bb0a THD |
3 | from .common import InfoExtractor |
4 | from ..compat import compat_str | |
5 | from ..utils import ( | |
6 | ExtractorError, | |
7 | smuggle_url, | |
b69fd25c | 8 | str_or_none, |
e040bb0a | 9 | traverse_obj, |
e040bb0a | 10 | unified_strdate, |
b69fd25c | 11 | unsmuggle_url, |
e040bb0a THD |
12 | ) |
13 | ||
e040bb0a THD |
14 | |
15 | class VoicyBaseIE(InfoExtractor): | |
16 | def _extract_from_playlist_data(self, value): | |
17 | voice_id = compat_str(value.get('PlaylistId')) | |
18 | upload_date = unified_strdate(value.get('Published'), False) | |
19 | items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']] | |
20 | return { | |
21 | '_type': 'multi_video', | |
22 | 'entries': items, | |
23 | 'id': voice_id, | |
24 | 'title': compat_str(value.get('PlaylistName')), | |
25 | 'uploader': value.get('SpeakerName'), | |
b69fd25c | 26 | 'uploader_id': str_or_none(value.get('SpeakerId')), |
e040bb0a | 27 | 'channel': value.get('ChannelName'), |
b69fd25c | 28 | 'channel_id': str_or_none(value.get('ChannelId')), |
e040bb0a THD |
29 | 'upload_date': upload_date, |
30 | } | |
31 | ||
32 | def _extract_single_article(self, entry): | |
33 | formats = [{ | |
34 | 'url': entry['VoiceHlsFile'], | |
35 | 'format_id': 'hls', | |
36 | 'ext': 'm4a', | |
37 | 'acodec': 'aac', | |
38 | 'vcodec': 'none', | |
39 | 'protocol': 'm3u8_native', | |
40 | }, { | |
41 | 'url': entry['VoiceFile'], | |
42 | 'format_id': 'mp3', | |
43 | 'ext': 'mp3', | |
44 | 'acodec': 'mp3', | |
45 | 'vcodec': 'none', | |
46 | }] | |
e040bb0a THD |
47 | return { |
48 | 'id': compat_str(entry.get('ArticleId')), | |
49 | 'title': entry.get('ArticleTitle'), | |
50 | 'description': entry.get('MediaName'), | |
51 | 'formats': formats, | |
52 | } | |
53 | ||
54 | def _call_api(self, url, video_id, **kwargs): | |
55 | response = self._download_json(url, video_id, **kwargs) | |
56 | if response.get('Status') != 0: | |
57 | message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=compat_str) | |
58 | if not message: | |
59 | message = 'There was a error in the response: %d' % response.get('Status') | |
60 | raise ExtractorError(message, expected=False) | |
61 | return response.get('Value') | |
62 | ||
63 | ||
64 | class VoicyIE(VoicyBaseIE): | |
65 | IE_NAME = 'voicy' | |
66 | _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)' | |
67 | ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s' | |
68 | _TESTS = [{ | |
69 | 'url': 'https://voicy.jp/channel/1253/122754', | |
70 | 'info_dict': { | |
71 | 'id': '122754', | |
72 | 'title': '1/21(木)声日記:ついに原稿終わった!!', | |
73 | 'uploader': 'ちょまど@ ITエンジニアなオタク', | |
74 | 'uploader_id': '7339', | |
75 | }, | |
76 | 'playlist_mincount': 9, | |
77 | }] | |
78 | ||
79 | def _real_extract(self, url): | |
5ad28e7f | 80 | mobj = self._match_valid_url(url) |
e040bb0a THD |
81 | assert mobj |
82 | voice_id = mobj.group('id') | |
83 | channel_id = mobj.group('channel_id') | |
84 | url, article_list = unsmuggle_url(url) | |
85 | if not article_list: | |
86 | article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id) | |
87 | return self._extract_from_playlist_data(article_list) | |
88 | ||
89 | ||
90 | class VoicyChannelIE(VoicyBaseIE): | |
91 | IE_NAME = 'voicy:channel' | |
92 | _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)' | |
93 | PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s' | |
94 | _TESTS = [{ | |
95 | 'url': 'https://voicy.jp/channel/1253/', | |
96 | 'info_dict': { | |
97 | 'id': '7339', | |
98 | 'title': 'ゆるふわ日常ラジオ #ちょまラジ', | |
99 | 'uploader': 'ちょまど@ ITエンジニアなオタク', | |
100 | 'uploader_id': '7339', | |
101 | }, | |
102 | 'playlist_mincount': 54, | |
103 | }] | |
104 | ||
105 | @classmethod | |
106 | def suitable(cls, url): | |
21633673 | 107 | return not VoicyIE.suitable(url) and super().suitable(url) |
e040bb0a THD |
108 | |
109 | def _entries(self, channel_id): | |
110 | pager = '' | |
111 | for count in itertools.count(1): | |
112 | article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note='Paging #%d' % count) | |
113 | playlist_data = article_list.get('PlaylistData') | |
114 | if not playlist_data: | |
115 | break | |
116 | yield from playlist_data | |
117 | last = playlist_data[-1] | |
118 | pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount']) | |
119 | ||
120 | def _real_extract(self, url): | |
121 | channel_id = self._match_id(url) | |
122 | articles = self._entries(channel_id) | |
123 | ||
124 | first_article = next(articles, None) | |
125 | title = traverse_obj(first_article, ('ChannelName', ), expected_type=compat_str) | |
126 | speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=compat_str) | |
127 | if not title and speaker_name: | |
128 | title = 'Uploads from %s' % speaker_name | |
129 | if not title: | |
130 | title = 'Uploads from channel ID %s' % channel_id | |
131 | ||
132 | articles = itertools.chain([first_article], articles) if first_article else articles | |
133 | ||
134 | playlist = ( | |
135 | self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key()) | |
136 | for value in articles) | |
137 | return { | |
138 | '_type': 'playlist', | |
139 | 'entries': playlist, | |
140 | 'id': channel_id, | |
141 | 'title': title, | |
142 | 'channel': speaker_name, | |
143 | 'channel_id': channel_id, | |
144 | } |