]>
Commit | Line | Data |
---|---|---|
e040bb0a THD |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..compat import compat_str | |
6 | from ..utils import ( | |
7 | ExtractorError, | |
8 | smuggle_url, | |
b69fd25c | 9 | str_or_none, |
e040bb0a | 10 | traverse_obj, |
e040bb0a | 11 | unified_strdate, |
b69fd25c | 12 | unsmuggle_url, |
e040bb0a THD |
13 | ) |
14 | ||
e040bb0a THD |
15 | import itertools |
16 | ||
17 | ||
18 | class VoicyBaseIE(InfoExtractor): | |
19 | def _extract_from_playlist_data(self, value): | |
20 | voice_id = compat_str(value.get('PlaylistId')) | |
21 | upload_date = unified_strdate(value.get('Published'), False) | |
22 | items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']] | |
23 | return { | |
24 | '_type': 'multi_video', | |
25 | 'entries': items, | |
26 | 'id': voice_id, | |
27 | 'title': compat_str(value.get('PlaylistName')), | |
28 | 'uploader': value.get('SpeakerName'), | |
b69fd25c | 29 | 'uploader_id': str_or_none(value.get('SpeakerId')), |
e040bb0a | 30 | 'channel': value.get('ChannelName'), |
b69fd25c | 31 | 'channel_id': str_or_none(value.get('ChannelId')), |
e040bb0a THD |
32 | 'upload_date': upload_date, |
33 | } | |
34 | ||
35 | def _extract_single_article(self, entry): | |
36 | formats = [{ | |
37 | 'url': entry['VoiceHlsFile'], | |
38 | 'format_id': 'hls', | |
39 | 'ext': 'm4a', | |
40 | 'acodec': 'aac', | |
41 | 'vcodec': 'none', | |
42 | 'protocol': 'm3u8_native', | |
43 | }, { | |
44 | 'url': entry['VoiceFile'], | |
45 | 'format_id': 'mp3', | |
46 | 'ext': 'mp3', | |
47 | 'acodec': 'mp3', | |
48 | 'vcodec': 'none', | |
49 | }] | |
50 | self._sort_formats(formats) | |
51 | return { | |
52 | 'id': compat_str(entry.get('ArticleId')), | |
53 | 'title': entry.get('ArticleTitle'), | |
54 | 'description': entry.get('MediaName'), | |
55 | 'formats': formats, | |
56 | } | |
57 | ||
58 | def _call_api(self, url, video_id, **kwargs): | |
59 | response = self._download_json(url, video_id, **kwargs) | |
60 | if response.get('Status') != 0: | |
61 | message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=compat_str) | |
62 | if not message: | |
63 | message = 'There was a error in the response: %d' % response.get('Status') | |
64 | raise ExtractorError(message, expected=False) | |
65 | return response.get('Value') | |
66 | ||
67 | ||
68 | class VoicyIE(VoicyBaseIE): | |
69 | IE_NAME = 'voicy' | |
70 | _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)' | |
71 | ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s' | |
72 | _TESTS = [{ | |
73 | 'url': 'https://voicy.jp/channel/1253/122754', | |
74 | 'info_dict': { | |
75 | 'id': '122754', | |
76 | 'title': '1/21(木)声日記:ついに原稿終わった!!', | |
77 | 'uploader': 'ちょまど@ ITエンジニアなオタク', | |
78 | 'uploader_id': '7339', | |
79 | }, | |
80 | 'playlist_mincount': 9, | |
81 | }] | |
82 | ||
83 | def _real_extract(self, url): | |
5ad28e7f | 84 | mobj = self._match_valid_url(url) |
e040bb0a THD |
85 | assert mobj |
86 | voice_id = mobj.group('id') | |
87 | channel_id = mobj.group('channel_id') | |
88 | url, article_list = unsmuggle_url(url) | |
89 | if not article_list: | |
90 | article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id) | |
91 | return self._extract_from_playlist_data(article_list) | |
92 | ||
93 | ||
94 | class VoicyChannelIE(VoicyBaseIE): | |
95 | IE_NAME = 'voicy:channel' | |
96 | _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)' | |
97 | PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s' | |
98 | _TESTS = [{ | |
99 | 'url': 'https://voicy.jp/channel/1253/', | |
100 | 'info_dict': { | |
101 | 'id': '7339', | |
102 | 'title': 'ゆるふわ日常ラジオ #ちょまラジ', | |
103 | 'uploader': 'ちょまど@ ITエンジニアなオタク', | |
104 | 'uploader_id': '7339', | |
105 | }, | |
106 | 'playlist_mincount': 54, | |
107 | }] | |
108 | ||
109 | @classmethod | |
110 | def suitable(cls, url): | |
111 | return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url) | |
112 | ||
113 | def _entries(self, channel_id): | |
114 | pager = '' | |
115 | for count in itertools.count(1): | |
116 | article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note='Paging #%d' % count) | |
117 | playlist_data = article_list.get('PlaylistData') | |
118 | if not playlist_data: | |
119 | break | |
120 | yield from playlist_data | |
121 | last = playlist_data[-1] | |
122 | pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount']) | |
123 | ||
124 | def _real_extract(self, url): | |
125 | channel_id = self._match_id(url) | |
126 | articles = self._entries(channel_id) | |
127 | ||
128 | first_article = next(articles, None) | |
129 | title = traverse_obj(first_article, ('ChannelName', ), expected_type=compat_str) | |
130 | speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=compat_str) | |
131 | if not title and speaker_name: | |
132 | title = 'Uploads from %s' % speaker_name | |
133 | if not title: | |
134 | title = 'Uploads from channel ID %s' % channel_id | |
135 | ||
136 | articles = itertools.chain([first_article], articles) if first_article else articles | |
137 | ||
138 | playlist = ( | |
139 | self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key()) | |
140 | for value in articles) | |
141 | return { | |
142 | '_type': 'playlist', | |
143 | 'entries': playlist, | |
144 | 'id': channel_id, | |
145 | 'title': title, | |
146 | 'channel': speaker_name, | |
147 | 'channel_id': channel_id, | |
148 | } |