]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/damtomo.py
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409)
[yt-dlp.git] / yt_dlp / extractor / damtomo.py
1 import re
2
3 from .common import InfoExtractor
4 from ..compat import compat_str
5 from ..utils import ExtractorError, clean_html, int_or_none, try_get, unified_strdate
6
7
8 class DamtomoBaseIE(InfoExtractor):
9 def _real_extract(self, url):
10 video_id = self._match_id(url)
11 webpage, handle = self._download_webpage_handle(self._WEBPAGE_URL_TMPL % video_id, video_id, encoding='sjis')
12
13 if handle.url == 'https://www.clubdam.com/sorry/':
14 raise ExtractorError('You are rate-limited. Try again later.', expected=True)
15 if '<h2>予期せぬエラーが発生しました。</h2>' in webpage:
16 raise ExtractorError('There is an error on server-side. Try again later.', expected=True)
17
18 description = self._search_regex(r'(?m)<div id="public_comment">\s*<p>\s*([^<]*?)\s*</p>', webpage, 'description', default=None)
19 uploader_id = self._search_regex(r'<a href="https://www\.clubdam\.com/app/damtomo/member/info/Profile\.do\?damtomoId=([^"]+)"', webpage, 'uploader_id', default=None)
20
21 data_dict = {
22 mobj.group('class'): re.sub(r'\s+', ' ', clean_html(mobj.group('value')))
23 for mobj in re.finditer(r'(?s)<(p|div)\s+class="(?P<class>[^" ]+?)">(?P<value>.+?)</\1>', webpage)}
24
25 # since videos do not have title, give the name of song instead
26 data_dict['user_name'] = re.sub(r'\s*さん\s*$', '', data_dict['user_name'])
27 title = data_dict.get('song_title')
28
29 stream_tree = self._download_xml(
30 self._DKML_XML_URL % video_id, video_id, note='Requesting stream information', encoding='sjis',
31 # doing this has no problem since there is no character outside ASCII,
32 # and never likely to happen in the future
33 transform_source=lambda x: re.sub(r'\s*encoding="[^"]+?"', '', x))
34 m3u8_url = try_get(stream_tree, lambda x: x.find(
35 './/d:streamingUrl', {'d': self._DKML_XML_NS}).text.strip(), compat_str)
36 if not m3u8_url:
37 raise ExtractorError('Failed to obtain m3u8 URL')
38 formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
39
40 return {
41 'id': video_id,
42 'title': title,
43 'uploader_id': uploader_id,
44 'description': description,
45 'uploader': data_dict.get('user_name'),
46 'upload_date': unified_strdate(self._search_regex(r'(\d{4}/\d{2}/\d{2})', data_dict.get('date'), 'upload_date', default=None)),
47 'view_count': int_or_none(self._search_regex(r'(\d+)', data_dict['audience'], 'view_count', default=None)),
48 'like_count': int_or_none(self._search_regex(r'(\d+)', data_dict['nice'], 'like_count', default=None)),
49 'track': title,
50 'artist': data_dict.get('song_artist'),
51 'formats': formats,
52 }
53
54
55 class DamtomoVideoIE(DamtomoBaseIE):
56 IE_NAME = 'damtomo:video'
57 _VALID_URL = r'https?://(?:www\.)?clubdam\.com/app/damtomo/(?:SP/)?karaokeMovie/StreamingDkm\.do\?karaokeMovieId=(?P<id>\d+)'
58 _WEBPAGE_URL_TMPL = 'https://www.clubdam.com/app/damtomo/karaokeMovie/StreamingDkm.do?karaokeMovieId=%s'
59 _DKML_XML_URL = 'https://www.clubdam.com/app/damtomo/karaokeMovie/GetStreamingDkmUrlXML.do?movieSelectFlg=2&karaokeMovieId=%s'
60 _DKML_XML_NS = 'https://www.clubdam.com/app/damtomo/karaokeMovie/GetStreamingDkmUrlXML'
61 _TESTS = [{
62 'url': 'https://www.clubdam.com/app/damtomo/karaokeMovie/StreamingDkm.do?karaokeMovieId=2414316',
63 'info_dict': {
64 'id': '2414316',
65 'title': 'Get Wild',
66 'uploader': 'Kドロン',
67 'uploader_id': 'ODk5NTQwMzQ',
68 'track': 'Get Wild',
69 'artist': 'TM NETWORK(TMN)',
70 'upload_date': '20201226',
71 }
72 }]
73
74
75 class DamtomoRecordIE(DamtomoBaseIE):
76 IE_NAME = 'damtomo:record'
77 _VALID_URL = r'https?://(?:www\.)?clubdam\.com/app/damtomo/(?:SP/)?karaokePost/StreamingKrk\.do\?karaokeContributeId=(?P<id>\d+)'
78 _WEBPAGE_URL_TMPL = 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=%s'
79 _DKML_XML_URL = 'https://www.clubdam.com/app/damtomo/karaokePost/GetStreamingKrkUrlXML.do?karaokeContributeId=%s'
80 _DKML_XML_NS = 'https://www.clubdam.com/app/damtomo/karaokePost/GetStreamingKrkUrlXML'
81 _TESTS = [{
82 'url': 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=27376862',
83 'info_dict': {
84 'id': '27376862',
85 'title': 'イカSUMMER [良音]',
86 'uploader': 'NANA',
87 'uploader_id': 'MzAyMDExNTY',
88 'upload_date': '20210721',
89 'view_count': 4,
90 'like_count': 1,
91 'track': 'イカSUMMER [良音]',
92 'artist': 'ORANGE RANGE',
93 }
94 }, {
95 'url': 'https://www.clubdam.com/app/damtomo/karaokePost/StreamingKrk.do?karaokeContributeId=27489418',
96 'info_dict': {
97 'id': '27489418',
98 'title': '心みだれて〜say it with flowers〜(生音)',
99 'uploader_id': 'NjI1MjI2MjU',
100 'description': 'やっぱりキーを下げて正解だった感じ。リベンジ成功ということで。',
101 'uploader': '箱の「中の人」',
102 'upload_date': '20210815',
103 'view_count': 5,
104 'like_count': 3,
105 'track': '心みだれて〜say it with flowers〜(生音)',
106 'artist': '小林明子',
107 }
108 }]