]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/skeb.py
[cleanup, docs] Misc cleanup
[yt-dlp.git] / yt_dlp / extractor / skeb.py
CommitLineData
2814f12b
THD
1# coding: utf-8
2from __future__ import unicode_literals
3
4from .common import InfoExtractor
5from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj
6
7
8class SkebIE(InfoExtractor):
9 _VALID_URL = r'https?://skeb\.jp/@[^/]+/works/(?P<id>\d+)'
10
11 _TESTS = [{
12 'url': 'https://skeb.jp/@riiru_wm/works/10',
13 'info_dict': {
14 'id': '466853',
15 'title': '内容はおまかせします! by 姫ノ森りぃる@一周年',
16 'descripion': 'md5:1ec50901efc3437cfbfe3790468d532d',
17 'uploader': '姫ノ森りぃる@一周年',
18 'uploader_id': 'riiru_wm',
19 'age_limit': 0,
20 'tags': [],
21 'url': r're:https://skeb.+',
22 'thumbnail': r're:https://skeb.+',
23 'subtitles': {
24 'jpn': [{
25 'url': r're:https://skeb.+',
26 'ext': 'vtt'
27 }]
28 },
29 'width': 720,
30 'height': 405,
31 'duration': 313,
32 'fps': 30,
33 'ext': 'mp4',
34 },
35 }, {
36 'url': 'https://skeb.jp/@furukawa_nob/works/3',
37 'info_dict': {
38 'id': '489408',
39 'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...',
40 'descripion': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
41 'uploader': '古川ノブ@音楽とVlogのVtuber',
42 'uploader_id': 'furukawa_nob',
43 'age_limit': 0,
44 'tags': [
45 'よろしく', '大丈夫', 'お願い', 'でした',
46 '是非', 'O', 'バー', '遊び', 'おはよう',
47 'オーバ', 'ボイス',
48 ],
49 'url': r're:https://skeb.+',
50 'thumbnail': r're:https://skeb.+',
51 'subtitles': {
52 'jpn': [{
53 'url': r're:https://skeb.+',
54 'ext': 'vtt'
55 }]
56 },
57 'duration': 98,
58 'ext': 'mp3',
59 'vcodec': 'none',
60 'abr': 128,
61 },
62 }, {
63 'url': 'https://skeb.jp/@mollowmollow/works/6',
64 'info_dict': {
65 'id': '6',
66 'title': 'ヒロ。\n\n私のキャラク... by 諸々',
67 'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
68 '_type': 'playlist',
69 'entries': [{
70 'id': '486430',
71 'title': 'ヒロ。\n\n私のキャラク... by 諸々',
72 'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
73 }, {
74 'id': '486431',
75 'title': 'ヒロ。\n\n私のキャラク... by 諸々',
76 }]
77 }
78 }]
79
80 def _real_extract(self, url):
81 video_id = self._match_id(url)
82 nuxt_data = self._search_nuxt_data(self._download_webpage(url, video_id), video_id)
83
84 parent = {
85 'id': video_id,
86 'title': nuxt_data.get('title'),
87 'descripion': nuxt_data.get('description'),
88 'uploader': traverse_obj(nuxt_data, ('creator', 'name')),
89 'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')),
90 'age_limit': 18 if nuxt_data.get('nsfw') else 0,
91 'tags': nuxt_data.get('tag_list'),
92 }
93
94 entries = []
95 for item in nuxt_data.get('previews') or []:
96 vid_url = item.get('url')
97 given_ext = traverse_obj(item, ('information', 'extension'))
98 preview_ext = determine_ext(vid_url, default_ext=None)
99 if not preview_ext:
100 content_disposition = parse_qs(vid_url)['response-content-disposition'][0]
101 preview_ext = self._search_regex(
102 r'filename="[^"]+\.([^\.]+?)"', content_disposition,
103 'preview file extension', fatal=False, group=1)
104 if preview_ext not in ('mp4', 'mp3'):
105 continue
106 if not vid_url or not item.get('id'):
107 continue
108 width, height = traverse_obj(item, ('information', 'width')), traverse_obj(item, ('information', 'height'))
109 if width is not None and height is not None:
110 # the longest side is at most 720px for non-client viewers
111 max_size = max(width, height)
112 width, height = list(x * 720 // max_size for x in (width, height))
113 entries.append({
114 **parent,
115 'id': str(item['id']),
116 'url': vid_url,
117 'thumbnail': item.get('poster_url'),
118 'subtitles': {
119 'jpn': [{
120 'url': item.get('vtt_url'),
121 'ext': 'vtt',
122 }]
123 } if item.get('vtt_url') else None,
124 'width': width,
125 'height': height,
126 'duration': traverse_obj(item, ('information', 'duration')),
127 'fps': traverse_obj(item, ('information', 'frame_rate')),
128 'ext': preview_ext or given_ext,
129 'vcodec': 'none' if preview_ext == 'mp3' else None,
130 # you'll always get 128kbps MP3 for non-client viewers
131 'abr': 128 if preview_ext == 'mp3' else None,
132 })
133
134 if not entries:
135 raise ExtractorError('No video/audio attachment found in this commission.', expected=True)
136 elif len(entries) == 1:
137 return entries[0]
138 else:
139 parent.update({
140 '_type': 'playlist',
141 'entries': entries,
142 })
143 return parent