]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/voicy.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / voicy.py
CommitLineData
21633673 1import itertools
2
e040bb0a 3from .common import InfoExtractor
e040bb0a
THD
4from ..utils import (
5 ExtractorError,
6 smuggle_url,
b69fd25c 7 str_or_none,
e040bb0a 8 traverse_obj,
e040bb0a 9 unified_strdate,
b69fd25c 10 unsmuggle_url,
e040bb0a
THD
11)
12
e040bb0a
THD
13
14class VoicyBaseIE(InfoExtractor):
15 def _extract_from_playlist_data(self, value):
add96eb9 16 voice_id = str(value.get('PlaylistId'))
e040bb0a
THD
17 upload_date = unified_strdate(value.get('Published'), False)
18 items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']]
19 return {
20 '_type': 'multi_video',
21 'entries': items,
22 'id': voice_id,
add96eb9 23 'title': str(value.get('PlaylistName')),
e040bb0a 24 'uploader': value.get('SpeakerName'),
b69fd25c 25 'uploader_id': str_or_none(value.get('SpeakerId')),
e040bb0a 26 'channel': value.get('ChannelName'),
b69fd25c 27 'channel_id': str_or_none(value.get('ChannelId')),
e040bb0a
THD
28 'upload_date': upload_date,
29 }
30
31 def _extract_single_article(self, entry):
32 formats = [{
33 'url': entry['VoiceHlsFile'],
34 'format_id': 'hls',
35 'ext': 'm4a',
36 'acodec': 'aac',
37 'vcodec': 'none',
38 'protocol': 'm3u8_native',
39 }, {
40 'url': entry['VoiceFile'],
41 'format_id': 'mp3',
42 'ext': 'mp3',
43 'acodec': 'mp3',
44 'vcodec': 'none',
45 }]
e040bb0a 46 return {
add96eb9 47 'id': str(entry.get('ArticleId')),
e040bb0a
THD
48 'title': entry.get('ArticleTitle'),
49 'description': entry.get('MediaName'),
50 'formats': formats,
51 }
52
53 def _call_api(self, url, video_id, **kwargs):
54 response = self._download_json(url, video_id, **kwargs)
55 if response.get('Status') != 0:
add96eb9 56 message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=str)
e040bb0a
THD
57 if not message:
58 message = 'There was a error in the response: %d' % response.get('Status')
59 raise ExtractorError(message, expected=False)
60 return response.get('Value')
61
62
63class VoicyIE(VoicyBaseIE):
df773c3d 64 _WORKING = False
e040bb0a
THD
65 IE_NAME = 'voicy'
66 _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)'
67 ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s'
68 _TESTS = [{
69 'url': 'https://voicy.jp/channel/1253/122754',
70 'info_dict': {
71 'id': '122754',
72 'title': '1/21(木)声日記:ついに原稿終わった!!',
73 'uploader': 'ちょまど@ ITエンジニアなオタク',
74 'uploader_id': '7339',
75 },
76 'playlist_mincount': 9,
77 }]
78
79 def _real_extract(self, url):
5ad28e7f 80 mobj = self._match_valid_url(url)
e040bb0a
THD
81 assert mobj
82 voice_id = mobj.group('id')
83 channel_id = mobj.group('channel_id')
84 url, article_list = unsmuggle_url(url)
85 if not article_list:
86 article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id)
87 return self._extract_from_playlist_data(article_list)
88
89
90class VoicyChannelIE(VoicyBaseIE):
df773c3d 91 _WORKING = False
e040bb0a
THD
92 IE_NAME = 'voicy:channel'
93 _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)'
94 PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s'
95 _TESTS = [{
96 'url': 'https://voicy.jp/channel/1253/',
97 'info_dict': {
98 'id': '7339',
99 'title': 'ゆるふわ日常ラジオ #ちょまラジ',
100 'uploader': 'ちょまど@ ITエンジニアなオタク',
101 'uploader_id': '7339',
102 },
103 'playlist_mincount': 54,
104 }]
105
106 @classmethod
107 def suitable(cls, url):
21633673 108 return not VoicyIE.suitable(url) and super().suitable(url)
e040bb0a
THD
109
110 def _entries(self, channel_id):
111 pager = ''
112 for count in itertools.count(1):
add96eb9 113 article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note=f'Paging #{count}')
e040bb0a
THD
114 playlist_data = article_list.get('PlaylistData')
115 if not playlist_data:
116 break
117 yield from playlist_data
118 last = playlist_data[-1]
119 pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount'])
120
121 def _real_extract(self, url):
122 channel_id = self._match_id(url)
123 articles = self._entries(channel_id)
124
125 first_article = next(articles, None)
add96eb9 126 title = traverse_obj(first_article, ('ChannelName', ), expected_type=str)
127 speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=str)
e040bb0a 128 if not title and speaker_name:
add96eb9 129 title = f'Uploads from {speaker_name}'
e040bb0a 130 if not title:
add96eb9 131 title = f'Uploads from channel ID {channel_id}'
e040bb0a
THD
132
133 articles = itertools.chain([first_article], articles) if first_article else articles
134
135 playlist = (
136 self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key())
137 for value in articles)
138 return {
139 '_type': 'playlist',
140 'entries': playlist,
141 'id': channel_id,
142 'title': title,
143 'channel': speaker_name,
144 'channel_id': channel_id,
145 }