[yt-dlp.git] / yt_dlp / extractor / voicy.py

import itertools

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    smuggle_url,
    str_or_none,
    traverse_obj,
    unified_strdate,
    unsmuggle_url,
)


class VoicyBaseIE(InfoExtractor):
    def _extract_from_playlist_data(self, value):
        voice_id = str(value.get('PlaylistId'))
        upload_date = unified_strdate(value.get('Published'), False)
        items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']]
        return {
            '_type': 'multi_video',
            'entries': items,
            'id': voice_id,
            'title': str(value.get('PlaylistName')),
            'uploader': value.get('SpeakerName'),
            'uploader_id': str_or_none(value.get('SpeakerId')),
            'channel': value.get('ChannelName'),
            'channel_id': str_or_none(value.get('ChannelId')),
            'upload_date': upload_date,
        }

    def _extract_single_article(self, entry):
        formats = [{
            'url': entry['VoiceHlsFile'],
            'format_id': 'hls',
            'ext': 'm4a',
            'acodec': 'aac',
            'vcodec': 'none',
            'protocol': 'm3u8_native',
        }, {
            'url': entry['VoiceFile'],
            'format_id': 'mp3',
            'ext': 'mp3',
            'acodec': 'mp3',
            'vcodec': 'none',
        }]
        return {
            'id': str(entry.get('ArticleId')),
            'title': entry.get('ArticleTitle'),
            'description': entry.get('MediaName'),
            'formats': formats,
        }

    def _call_api(self, url, video_id, **kwargs):
        response = self._download_json(url, video_id, **kwargs)
        if response.get('Status') != 0:
            message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=str)
            if not message:
                message = 'There was a error in the response: %d' % response.get('Status')
            raise ExtractorError(message, expected=False)
        return response.get('Value')


class VoicyIE(VoicyBaseIE):
    _WORKING = False
    IE_NAME = 'voicy'
    _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)'
    ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s'
    _TESTS = [{
        'url': 'https://voicy.jp/channel/1253/122754',
        'info_dict': {
            'id': '122754',
            'title': '1/21(木)声日記：ついに原稿終わった！！',
            'uploader': 'ちょまど@ ITエンジニアなオタク',
            'uploader_id': '7339',
        },
        'playlist_mincount': 9,
    }]

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        assert mobj
        voice_id = mobj.group('id')
        channel_id = mobj.group('channel_id')
        url, article_list = unsmuggle_url(url)
        if not article_list:
            article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id)
        return self._extract_from_playlist_data(article_list)


class VoicyChannelIE(VoicyBaseIE):
    _WORKING = False
    IE_NAME = 'voicy:channel'
    _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)'
    PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s'
    _TESTS = [{
        'url': 'https://voicy.jp/channel/1253/',
        'info_dict': {
            'id': '7339',
            'title': 'ゆるふわ日常ラジオ #ちょまラジ',
            'uploader': 'ちょまど@ ITエンジニアなオタク',
            'uploader_id': '7339',
        },
        'playlist_mincount': 54,
    }]

    @classmethod
    def suitable(cls, url):
        return not VoicyIE.suitable(url) and super().suitable(url)

    def _entries(self, channel_id):
        pager = ''
        for count in itertools.count(1):
            article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note=f'Paging #{count}')
            playlist_data = article_list.get('PlaylistData')
            if not playlist_data:
                break
            yield from playlist_data
            last = playlist_data[-1]
            pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount'])

    def _real_extract(self, url):
        channel_id = self._match_id(url)
        articles = self._entries(channel_id)

        first_article = next(articles, None)
        title = traverse_obj(first_article, ('ChannelName', ), expected_type=str)
        speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=str)
        if not title and speaker_name:
            title = f'Uploads from {speaker_name}'
        if not title:
            title = f'Uploads from channel ID {channel_id}'

        articles = itertools.chain([first_article], articles) if first_article else articles

        playlist = (
            self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key())
            for value in articles)
        return {
            '_type': 'playlist',
            'entries': playlist,
            'id': channel_id,
            'title': title,
            'channel': speaker_name,
            'channel_id': channel_id,
        }
Commit	Line	Data
21633673	1	import itertools
21633673	2
e040bb0a	3	from .common import InfoExtractor
e040bb0a THD	4	from ..utils import (
	5	ExtractorError,
	6	smuggle_url,
b69fd25c	7	str_or_none,
e040bb0a	8	traverse_obj,
e040bb0a	9	unified_strdate,
b69fd25c	10	unsmuggle_url,
e040bb0a THD	11	)
e040bb0a THD	12
e040bb0a THD	13
	14	class VoicyBaseIE(InfoExtractor):
	15	def _extract_from_playlist_data(self, value):
add96eb9	16	voice_id = str(value.get('PlaylistId'))
e040bb0a THD	17	upload_date = unified_strdate(value.get('Published'), False)
	18	items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']]
	19	return {
	20	'_type': 'multi_video',
	21	'entries': items,
	22	'id': voice_id,
add96eb9	23	'title': str(value.get('PlaylistName')),
e040bb0a	24	'uploader': value.get('SpeakerName'),
b69fd25c	25	'uploader_id': str_or_none(value.get('SpeakerId')),
e040bb0a	26	'channel': value.get('ChannelName'),
b69fd25c	27	'channel_id': str_or_none(value.get('ChannelId')),
e040bb0a THD	28	'upload_date': upload_date,
	29	}
	30
	31	def _extract_single_article(self, entry):
	32	formats = [{
	33	'url': entry['VoiceHlsFile'],
	34	'format_id': 'hls',
	35	'ext': 'm4a',
	36	'acodec': 'aac',
	37	'vcodec': 'none',
	38	'protocol': 'm3u8_native',
	39	}, {
	40	'url': entry['VoiceFile'],
	41	'format_id': 'mp3',
	42	'ext': 'mp3',
	43	'acodec': 'mp3',
	44	'vcodec': 'none',
	45	}]
e040bb0a	46	return {
add96eb9	47	'id': str(entry.get('ArticleId')),
e040bb0a THD	48	'title': entry.get('ArticleTitle'),
	49	'description': entry.get('MediaName'),
	50	'formats': formats,
	51	}
	52
	53	def _call_api(self, url, video_id, **kwargs):
	54	response = self._download_json(url, video_id, **kwargs)
	55	if response.get('Status') != 0:
add96eb9	56	message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=str)
e040bb0a THD	57	if not message:
	58	message = 'There was a error in the response: %d' % response.get('Status')
	59	raise ExtractorError(message, expected=False)
	60	return response.get('Value')
	61
	62
	63	class VoicyIE(VoicyBaseIE):
df773c3d	64	_WORKING = False
e040bb0a THD	65	IE_NAME = 'voicy'
	66	_VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)'
	67	ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s'
	68	_TESTS = [{
	69	'url': 'https://voicy.jp/channel/1253/122754',
	70	'info_dict': {
	71	'id': '122754',
	72	'title': '1/21(木)声日記：ついに原稿終わった！！',
	73	'uploader': 'ちょまど@ ITエンジニアなオタク',
	74	'uploader_id': '7339',
	75	},
	76	'playlist_mincount': 9,
	77	}]
	78
	79	def _real_extract(self, url):
5ad28e7f	80	mobj = self._match_valid_url(url)
e040bb0a THD	81	assert mobj
	82	voice_id = mobj.group('id')
	83	channel_id = mobj.group('channel_id')
	84	url, article_list = unsmuggle_url(url)
	85	if not article_list:
	86	article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id)
	87	return self._extract_from_playlist_data(article_list)
	88
	89
	90	class VoicyChannelIE(VoicyBaseIE):
df773c3d	91	_WORKING = False
e040bb0a THD	92	IE_NAME = 'voicy:channel'
	93	_VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)'
	94	PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s'
	95	_TESTS = [{
	96	'url': 'https://voicy.jp/channel/1253/',
	97	'info_dict': {
	98	'id': '7339',
	99	'title': 'ゆるふわ日常ラジオ #ちょまラジ',
	100	'uploader': 'ちょまど@ ITエンジニアなオタク',
	101	'uploader_id': '7339',
	102	},
	103	'playlist_mincount': 54,
	104	}]
	105
	106	@classmethod
	107	def suitable(cls, url):
21633673	108	return not VoicyIE.suitable(url) and super().suitable(url)
e040bb0a THD	109
	110	def _entries(self, channel_id):
	111	pager = ''
	112	for count in itertools.count(1):
add96eb9	113	article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note=f'Paging #{count}')
e040bb0a THD	114	playlist_data = article_list.get('PlaylistData')
	115	if not playlist_data:
	116	break
	117	yield from playlist_data
	118	last = playlist_data[-1]
	119	pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount'])
	120
	121	def _real_extract(self, url):
	122	channel_id = self._match_id(url)
	123	articles = self._entries(channel_id)
	124
	125	first_article = next(articles, None)
add96eb9	126	title = traverse_obj(first_article, ('ChannelName', ), expected_type=str)
add96eb9	127	speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=str)
e040bb0a	128	if not title and speaker_name:
add96eb9	129	title = f'Uploads from {speaker_name}'
e040bb0a	130	if not title:
add96eb9	131	title = f'Uploads from channel ID {channel_id}'
e040bb0a THD	132
	133	articles = itertools.chain([first_article], articles) if first_article else articles
	134
	135	playlist = (
	136	self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key())
	137	for value in articles)
	138	return {
	139	'_type': 'playlist',
	140	'entries': playlist,
	141	'id': channel_id,
	142	'title': title,
	143	'channel': speaker_name,
	144	'channel_id': channel_id,
	145	}