[yt-dlp.git] / yt_dlp / extractor / skeb.py

# coding: utf-8
from __future__ import unicode_literals

from .common import InfoExtractor
from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj


class SkebIE(InfoExtractor):
    _VALID_URL = r'https?://skeb\.jp/@[^/]+/works/(?P<id>\d+)'

    _TESTS = [{
        'url': 'https://skeb.jp/@riiru_wm/works/10',
        'info_dict': {
            'id': '466853',
            'title': '内容はおまかせします！ by 姫ノ森りぃる@一周年',
            'descripion': 'md5:1ec50901efc3437cfbfe3790468d532d',
            'uploader': '姫ノ森りぃる@一周年',
            'uploader_id': 'riiru_wm',
            'age_limit': 0,
            'tags': [],
            'url': r're:https://skeb.+',
            'thumbnail': r're:https://skeb.+',
            'subtitles': {
                'jpn': [{
                    'url': r're:https://skeb.+',
                    'ext': 'vtt'
                }]
            },
            'width': 720,
            'height': 405,
            'duration': 313,
            'fps': 30,
            'ext': 'mp4',
        },
    }, {
        'url': 'https://skeb.jp/@furukawa_nob/works/3',
        'info_dict': {
            'id': '489408',
            'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...',
            'descripion': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
            'uploader': '古川ノブ@音楽とVlogのVtuber',
            'uploader_id': 'furukawa_nob',
            'age_limit': 0,
            'tags': [
                'よろしく', '大丈夫', 'お願い', 'でした',
                '是非', 'O', 'バー', '遊び', 'おはよう',
                'オーバ', 'ボイス',
            ],
            'url': r're:https://skeb.+',
            'thumbnail': r're:https://skeb.+',
            'subtitles': {
                'jpn': [{
                    'url': r're:https://skeb.+',
                    'ext': 'vtt'
                }]
            },
            'duration': 98,
            'ext': 'mp3',
            'vcodec': 'none',
            'abr': 128,
        },
    }, {
        'url': 'https://skeb.jp/@mollowmollow/works/6',
        'info_dict': {
            'id': '6',
            'title': 'ヒロ。\n\n私のキャラク... by 諸々',
            'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
            '_type': 'playlist',
            'entries': [{
                'id': '486430',
                'title': 'ヒロ。\n\n私のキャラク... by 諸々',
                'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
            }, {
                'id': '486431',
                'title': 'ヒロ。\n\n私のキャラク... by 諸々',
            }]
        }
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)
        nuxt_data = self._search_nuxt_data(self._download_webpage(url, video_id), video_id)

        parent = {
            'id': video_id,
            'title': nuxt_data.get('title'),
            'descripion': nuxt_data.get('description'),
            'uploader': traverse_obj(nuxt_data, ('creator', 'name')),
            'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')),
            'age_limit': 18 if nuxt_data.get('nsfw') else 0,
            'tags': nuxt_data.get('tag_list'),
        }

        entries = []
        for item in nuxt_data.get('previews') or []:
            vid_url = item.get('url')
            given_ext = traverse_obj(item, ('information', 'extension'))
            preview_ext = determine_ext(vid_url, default_ext=None)
            if not preview_ext:
                content_disposition = parse_qs(vid_url)['response-content-disposition'][0]
                preview_ext = self._search_regex(
                    r'filename="[^"]+\.([^\.]+?)"', content_disposition,
                    'preview file extension', fatal=False, group=1)
            if preview_ext not in ('mp4', 'mp3'):
                continue
            if not vid_url or not item.get('id'):
                continue
            width, height = traverse_obj(item, ('information', 'width')), traverse_obj(item, ('information', 'height'))
            if width is not None and height is not None:
                # the longest side is at most 720px for non-client viewers
                max_size = max(width, height)
                width, height = list(x * 720 // max_size for x in (width, height))
            entries.append({
                **parent,
                'id': str(item['id']),
                'url': vid_url,
                'thumbnail': item.get('poster_url'),
                'subtitles': {
                    'jpn': [{
                        'url': item.get('vtt_url'),
                        'ext': 'vtt',
                    }]
                } if item.get('vtt_url') else None,
                'width': width,
                'height': height,
                'duration': traverse_obj(item, ('information', 'duration')),
                'fps': traverse_obj(item, ('information', 'frame_rate')),
                'ext': preview_ext or given_ext,
                'vcodec': 'none' if preview_ext == 'mp3' else None,
                # you'll always get 128kbps MP3 for non-client viewers
                'abr': 128 if preview_ext == 'mp3' else None,
            })

        if not entries:
            raise ExtractorError('No video/audio attachment found in this commission.', expected=True)
        elif len(entries) == 1:
            return entries[0]
        else:
            parent.update({
                '_type': 'playlist',
                'entries': entries,
            })
            return parent
Commit	Line	Data
2814f12b THD	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	from .common import InfoExtractor
	5	from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj
	6
	7
	8	class SkebIE(InfoExtractor):
	9	_VALID_URL = r'https?://skeb\.jp/@[^/]+/works/(?P<id>\d+)'
	10
	11	_TESTS = [{
	12	'url': 'https://skeb.jp/@riiru_wm/works/10',
	13	'info_dict': {
	14	'id': '466853',
	15	'title': '内容はおまかせします！ by 姫ノ森りぃる@一周年',
	16	'descripion': 'md5:1ec50901efc3437cfbfe3790468d532d',
	17	'uploader': '姫ノ森りぃる@一周年',
	18	'uploader_id': 'riiru_wm',
	19	'age_limit': 0,
	20	'tags': [],
	21	'url': r're:https://skeb.+',
	22	'thumbnail': r're:https://skeb.+',
	23	'subtitles': {
	24	'jpn': [{
	25	'url': r're:https://skeb.+',
	26	'ext': 'vtt'
	27	}]
	28	},
	29	'width': 720,
	30	'height': 405,
	31	'duration': 313,
	32	'fps': 30,
	33	'ext': 'mp4',
	34	},
	35	}, {
	36	'url': 'https://skeb.jp/@furukawa_nob/works/3',
	37	'info_dict': {
	38	'id': '489408',
	39	'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...',
	40	'descripion': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
	41	'uploader': '古川ノブ@音楽とVlogのVtuber',
	42	'uploader_id': 'furukawa_nob',
	43	'age_limit': 0,
	44	'tags': [
	45	'よろしく', '大丈夫', 'お願い', 'でした',
	46	'是非', 'O', 'バー', '遊び', 'おはよう',
	47	'オーバ', 'ボイス',
	48	],
	49	'url': r're:https://skeb.+',
	50	'thumbnail': r're:https://skeb.+',
	51	'subtitles': {
	52	'jpn': [{
	53	'url': r're:https://skeb.+',
	54	'ext': 'vtt'
	55	}]
	56	},
	57	'duration': 98,
	58	'ext': 'mp3',
	59	'vcodec': 'none',
	60	'abr': 128,
	61	},
	62	}, {
	63	'url': 'https://skeb.jp/@mollowmollow/works/6',
	64	'info_dict': {
65	'id': '6',
66	'title': 'ヒロ。\n\n私のキャラク... by 諸々',
67	'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
68	'_type': 'playlist',
69	'entries': [{
70	'id': '486430',
71	'title': 'ヒロ。\n\n私のキャラク... by 諸々',
72	'descripion': 'md5:aa6cbf2ba320b50bce219632de195f07',
73	}, {
74	'id': '486431',
75	'title': 'ヒロ。\n\n私のキャラク... by 諸々',
76	}]
77	}
78	}]
79
80	def _real_extract(self, url):
81	video_id = self._match_id(url)
82	nuxt_data = self._search_nuxt_data(self._download_webpage(url, video_id), video_id)
83
84	parent = {
85	'id': video_id,
86	'title': nuxt_data.get('title'),
87	'descripion': nuxt_data.get('description'),
88	'uploader': traverse_obj(nuxt_data, ('creator', 'name')),
89	'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')),
90	'age_limit': 18 if nuxt_data.get('nsfw') else 0,
91	'tags': nuxt_data.get('tag_list'),
92	}
93
94	entries = []
95	for item in nuxt_data.get('previews') or []:
96	vid_url = item.get('url')
97	given_ext = traverse_obj(item, ('information', 'extension'))
98	preview_ext = determine_ext(vid_url, default_ext=None)
99	if not preview_ext:
100	content_disposition = parse_qs(vid_url)['response-content-disposition'][0]
101	preview_ext = self._search_regex(
102	r'filename="[^"]+\.([^\.]+?)"', content_disposition,
103	'preview file extension', fatal=False, group=1)
104	if preview_ext not in ('mp4', 'mp3'):
105	continue
106	if not vid_url or not item.get('id'):
107	continue
108	width, height = traverse_obj(item, ('information', 'width')), traverse_obj(item, ('information', 'height'))
109	if width is not None and height is not None:
110	# the longest side is at most 720px for non-client viewers
111	max_size = max(width, height)
112	width, height = list(x * 720 // max_size for x in (width, height))
113	entries.append({
114	**parent,
115	'id': str(item['id']),
116	'url': vid_url,
117	'thumbnail': item.get('poster_url'),
118	'subtitles': {
119	'jpn': [{
120	'url': item.get('vtt_url'),
121	'ext': 'vtt',
122	}]
123	} if item.get('vtt_url') else None,
124	'width': width,
125	'height': height,
126	'duration': traverse_obj(item, ('information', 'duration')),
127	'fps': traverse_obj(item, ('information', 'frame_rate')),
128	'ext': preview_ext or given_ext,
129	'vcodec': 'none' if preview_ext == 'mp3' else None,
130	# you'll always get 128kbps MP3 for non-client viewers
131	'abr': 128 if preview_ext == 'mp3' else None,
132	})
133
134	if not entries:
135	raise ExtractorError('No video/audio attachment found in this commission.', expected=True)
136	elif len(entries) == 1:
137	return entries[0]
138	else:
139	parent.update({
140	'_type': 'playlist',
141	'entries': entries,
142	})
143	return parent