yt_dlp/extractor/voicy.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..compat import compat_str
   6 from ..utils import (
   7     ExtractorError,
   8     smuggle_url,
   9     str_or_none,
  10     traverse_obj,
  11     unified_strdate,
  12     unsmuggle_url,
  13 )
  14
  15 import itertools
  16
  17
  18 class VoicyBaseIE(InfoExtractor):
  19     def _extract_from_playlist_data(self, value):
  20         voice_id = compat_str(value.get('PlaylistId'))
  21         upload_date = unified_strdate(value.get('Published'), False)
  22         items = [self._extract_single_article(voice_data) for voice_data in value['VoiceData']]
  23         return {
  24             '_type': 'multi_video',
  25             'entries': items,
  26             'id': voice_id,
  27             'title': compat_str(value.get('PlaylistName')),
  28             'uploader': value.get('SpeakerName'),
  29             'uploader_id': str_or_none(value.get('SpeakerId')),
  30             'channel': value.get('ChannelName'),
  31             'channel_id': str_or_none(value.get('ChannelId')),
  32             'upload_date': upload_date,
  33         }
  34
  35     def _extract_single_article(self, entry):
  36         formats = [{
  37             'url': entry['VoiceHlsFile'],
  38             'format_id': 'hls',
  39             'ext': 'm4a',
  40             'acodec': 'aac',
  41             'vcodec': 'none',
  42             'protocol': 'm3u8_native',
  43         }, {
  44             'url': entry['VoiceFile'],
  45             'format_id': 'mp3',
  46             'ext': 'mp3',
  47             'acodec': 'mp3',
  48             'vcodec': 'none',
  49         }]
  50         self._sort_formats(formats)
  51         return {
  52             'id': compat_str(entry.get('ArticleId')),
  53             'title': entry.get('ArticleTitle'),
  54             'description': entry.get('MediaName'),
  55             'formats': formats,
  56         }
  57
  58     def _call_api(self, url, video_id, **kwargs):
  59         response = self._download_json(url, video_id, **kwargs)
  60         if response.get('Status') != 0:
  61             message = traverse_obj(response, ('Value', 'Error', 'Message'), expected_type=compat_str)
  62             if not message:
  63                 message = 'There was a error in the response: %d' % response.get('Status')
  64             raise ExtractorError(message, expected=False)
  65         return response.get('Value')
  66
  67
  68 class VoicyIE(VoicyBaseIE):
  69     IE_NAME = 'voicy'
  70     _VALID_URL = r'https?://voicy\.jp/channel/(?P<channel_id>\d+)/(?P<id>\d+)'
  71     ARTICLE_LIST_API_URL = 'https://vmw.api.voicy.jp/articles_list?channel_id=%s&pid=%s'
  72     _TESTS = [{
  73         'url': 'https://voicy.jp/channel/1253/122754',
  74         'info_dict': {
  75             'id': '122754',
  76             'title': '1/21(木)声日記：ついに原稿終わった！！',
  77             'uploader': 'ちょまど@ ITエンジニアなオタク',
  78             'uploader_id': '7339',
  79         },
  80         'playlist_mincount': 9,
  81     }]
  82
  83     def _real_extract(self, url):
  84         mobj = self._match_valid_url(url)
  85         assert mobj
  86         voice_id = mobj.group('id')
  87         channel_id = mobj.group('channel_id')
  88         url, article_list = unsmuggle_url(url)
  89         if not article_list:
  90             article_list = self._call_api(self.ARTICLE_LIST_API_URL % (channel_id, voice_id), voice_id)
  91         return self._extract_from_playlist_data(article_list)
  92
  93
  94 class VoicyChannelIE(VoicyBaseIE):
  95     IE_NAME = 'voicy:channel'
  96     _VALID_URL = r'https?://voicy\.jp/channel/(?P<id>\d+)'
  97     PROGRAM_LIST_API_URL = 'https://vmw.api.voicy.jp/program_list/all?channel_id=%s&limit=20&public_type=3%s'
  98     _TESTS = [{
  99         'url': 'https://voicy.jp/channel/1253/',
 100         'info_dict': {
 101             'id': '7339',
 102             'title': 'ゆるふわ日常ラジオ #ちょまラジ',
 103             'uploader': 'ちょまど@ ITエンジニアなオタク',
 104             'uploader_id': '7339',
 105         },
 106         'playlist_mincount': 54,
 107     }]
 108
 109     @classmethod
 110     def suitable(cls, url):
 111         return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url)
 112
 113     def _entries(self, channel_id):
 114         pager = ''
 115         for count in itertools.count(1):
 116             article_list = self._call_api(self.PROGRAM_LIST_API_URL % (channel_id, pager), channel_id, note='Paging #%d' % count)
 117             playlist_data = article_list.get('PlaylistData')
 118             if not playlist_data:
 119                 break
 120             yield from playlist_data
 121             last = playlist_data[-1]
 122             pager = '&pid=%d&p_date=%s&play_count=%s' % (last['PlaylistId'], last['Published'], last['PlayCount'])
 123
 124     def _real_extract(self, url):
 125         channel_id = self._match_id(url)
 126         articles = self._entries(channel_id)
 127
 128         first_article = next(articles, None)
 129         title = traverse_obj(first_article, ('ChannelName', ), expected_type=compat_str)
 130         speaker_name = traverse_obj(first_article, ('SpeakerName', ), expected_type=compat_str)
 131         if not title and speaker_name:
 132             title = 'Uploads from %s' % speaker_name
 133         if not title:
 134             title = 'Uploads from channel ID %s' % channel_id
 135
 136         articles = itertools.chain([first_article], articles) if first_article else articles
 137
 138         playlist = (
 139             self.url_result(smuggle_url('https://voicy.jp/channel/%s/%d' % (channel_id, value['PlaylistId']), value), VoicyIE.ie_key())
 140             for value in articles)
 141         return {
 142             '_type': 'playlist',
 143             'entries': playlist,
 144             'id': channel_id,
 145             'title': title,
 146             'channel': speaker_name,
 147             'channel_id': channel_id,
 148         }