]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/voicy.py
[test/cookies] Improve logging
[yt-dlp.git] / yt_dlp / extractor / voicy.py
CommitLineData
e040bb0a
THD
1# coding: utf-8
2from __future__ import unicode_literals
3
4from .common import InfoExtractor
5from ..compat import compat_str
6from ..utils import (
7 ExtractorError,
8 smuggle_url,
9 traverse_obj,
10 unsmuggle_url,
11 unified_strdate,
12)
13
e040bb0a
THD
14import itertools
15
16
17class VoicyBaseIE(InfoExtractor):
18 def _extract_from_playlist_data(self, value):
19 voice_id = compat_str(value.get('PlaylistId'))
20 upload_date = unified_strdate(value.get('Published'), False)
21 items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']]
22 return {
23 '_type': 'multi_video',
24 'entries': items,
25 'id': voice_id,
26 'title': compat_str(value.get('PlaylistName')),
27 'uploader': value.get('SpeakerName'),
28 'uploader_id': compat_str(value.get('SpeakerId')),
29 'channel': value.get('ChannelName'),
30 'channel_id': compat_str(value.get('ChannelId')),
31 'upload_date': upload_date,
32 }
33
34 def _extract_single_article(self, entry):
35 formats = [{
36 'url': entry['VoiceHlsFile'],
37 'format_id': 'hls',
38 'ext': 'm4a',
39 'acodec': 'aac',
40 'vcodec': 'none',
41 'protocol': 'm3u8_native',
42 }, {
43 'url': entry['VoiceFile'],
44 'format_id': 'mp3',
45 'ext': 'mp3',
46 'acodec': 'mp3',
47 'vcodec': 'none',
48 }]
49 self._sort_formats(formats)
50 return {
51 'id': compat_str(entry.get('ArticleId')),
52 'title': entry.get('ArticleTitle'),
53 'description': entry.get('MediaName'),
54 'formats': formats,
55 }
56
57 def _call_api(self, url, video_id, **kwargs):
58 response = self._download_json(url, video_id, **kwargs)
59 if response.get('Status') != 0:
60 message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=compat_str)
61 if not message:
62 message = 'There was a error in the response: %d' % response.get('Status')
63 raise ExtractorError(message, expected=False)
64 return response.get('Value')
65
66
67class VoicyIE(VoicyBaseIE):
68 IE_NAME = 'voicy'
69 _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)'
70 ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s'
71 _TESTS = [{
72 'url': 'https://voicy.jp/channel/1253/122754',
73 'info_dict': {
74 'id': '122754',
75 'title': '1/21(木)声日記:ついに原稿終わった!!',
76 'uploader': 'ちょまど@ ITエンジニアなオタク',
77 'uploader_id': '7339',
78 },
79 'playlist_mincount': 9,
80 }]
81
82 def _real_extract(self, url):
5ad28e7f 83 mobj = self._match_valid_url(url)
e040bb0a
THD
84 assert mobj
85 voice_id = mobj.group('id')
86 channel_id = mobj.group('channel_id')
87 url, article_list = unsmuggle_url(url)
88 if not article_list:
89 article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id)
90 return self._extract_from_playlist_data(article_list)
91
92
93class VoicyChannelIE(VoicyBaseIE):
94 IE_NAME = 'voicy:channel'
95 _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)'
96 PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s'
97 _TESTS = [{
98 'url': 'https://voicy.jp/channel/1253/',
99 'info_dict': {
100 'id': '7339',
101 'title': 'ゆるふわ日常ラジオ #ちょまラジ',
102 'uploader': 'ちょまど@ ITエンジニアなオタク',
103 'uploader_id': '7339',
104 },
105 'playlist_mincount': 54,
106 }]
107
108 @classmethod
109 def suitable(cls, url):
110 return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url)
111
112 def _entries(self, channel_id):
113 pager = ''
114 for count in itertools.count(1):
115 article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note='Paging #%d' % count)
116 playlist_data = article_list.get('PlaylistData')
117 if not playlist_data:
118 break
119 yield from playlist_data
120 last = playlist_data[-1]
121 pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount'])
122
123 def _real_extract(self, url):
124 channel_id = self._match_id(url)
125 articles = self._entries(channel_id)
126
127 first_article = next(articles, None)
128 title = traverse_obj(first_article, ('ChannelName', ), expected_type=compat_str)
129 speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=compat_str)
130 if not title and speaker_name:
131 title = 'Uploads from %s' % speaker_name
132 if not title:
133 title = 'Uploads from channel ID %s' % channel_id
134
135 articles = itertools.chain([first_article], articles) if first_article else articles
136
137 playlist = (
138 self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key())
139 for value in articles)
140 return {
141 '_type': 'playlist',
142 'entries': playlist,
143 'id': channel_id,
144 'title': title,
145 'channel': speaker_name,
146 'channel_id': channel_id,
147 }