yt_dlp/extractor/skeb.py

   1 from .common import InfoExtractor
   2 from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj
   3
   4
   5 class SkebIE(InfoExtractor):
   6     _VALID_URL = r'https?://skeb\.jp/@[^/]+/works/(?P<id>\d+)'
   7
   8     _TESTS = [{
   9         'url': 'https://skeb.jp/@riiru_wm/works/10',
  10         'info_dict': {
  11             'id': '466853',
  12             'title': '内容はおまかせします！ by 姫ノ森りぃる@一周年',
  13             'description': 'md5:1ec50901efc3437cfbfe3790468d532d',
  14             'uploader': '姫ノ森りぃる@一周年',
  15             'uploader_id': 'riiru_wm',
  16             'age_limit': 0,
  17             'tags': [],
  18             'url': r're:https://skeb.+',
  19             'thumbnail': r're:https://skeb.+',
  20             'subtitles': {
  21                 'jpn': [{
  22                     'url': r're:https://skeb.+',
  23                     'ext': 'vtt'
  24                 }]
  25             },
  26             'width': 720,
  27             'height': 405,
  28             'duration': 313,
  29             'fps': 30,
  30             'ext': 'mp4',
  31         },
  32     }, {
  33         'url': 'https://skeb.jp/@furukawa_nob/works/3',
  34         'info_dict': {
  35             'id': '489408',
  36             'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...',
  37             'description': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
  38             'uploader': '古川ノブ@音楽とVlogのVtuber',
  39             'uploader_id': 'furukawa_nob',
  40             'age_limit': 0,
  41             'tags': [
  42                 'よろしく', '大丈夫', 'お願い', 'でした',
  43                 '是非', 'O', 'バー', '遊び', 'おはよう',
  44                 'オーバ', 'ボイス',
  45             ],
  46             'url': r're:https://skeb.+',
  47             'thumbnail': r're:https://skeb.+',
  48             'subtitles': {
  49                 'jpn': [{
  50                     'url': r're:https://skeb.+',
  51                     'ext': 'vtt'
  52                 }]
  53             },
  54             'duration': 98,
  55             'ext': 'mp3',
  56             'vcodec': 'none',
  57             'abr': 128,
  58         },
  59     }, {
  60         'url': 'https://skeb.jp/@mollowmollow/works/6',
  61         'info_dict': {
  62             'id': '6',
  63             'title': 'ヒロ。\n\n私のキャラク... by 諸々',
  64             'description': 'md5:aa6cbf2ba320b50bce219632de195f07',
  65             '_type': 'playlist',
  66             'entries': [{
  67                 'id': '486430',
  68                 'title': 'ヒロ。\n\n私のキャラク... by 諸々',
  69                 'description': 'md5:aa6cbf2ba320b50bce219632de195f07',
  70             }, {
  71                 'id': '486431',
  72                 'title': 'ヒロ。\n\n私のキャラク... by 諸々',
  73             }]
  74         }
  75     }]
  76
  77     def _real_extract(self, url):
  78         video_id = self._match_id(url)
  79         nuxt_data = self._search_nuxt_data(self._download_webpage(url, video_id), video_id)
  80
  81         parent = {
  82             'id': video_id,
  83             'title': nuxt_data.get('title'),
  84             'description': nuxt_data.get('description'),
  85             'uploader': traverse_obj(nuxt_data, ('creator', 'name')),
  86             'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')),
  87             'age_limit': 18 if nuxt_data.get('nsfw') else 0,
  88             'tags': nuxt_data.get('tag_list'),
  89         }
  90
  91         entries = []
  92         for item in nuxt_data.get('previews') or []:
  93             vid_url = item.get('url')
  94             given_ext = traverse_obj(item, ('information', 'extension'))
  95             preview_ext = determine_ext(vid_url, default_ext=None)
  96             if not preview_ext:
  97                 content_disposition = parse_qs(vid_url)['response-content-disposition'][0]
  98                 preview_ext = self._search_regex(
  99                     r'filename="[^"]+\.([^\.]+?)"', content_disposition,
 100                     'preview file extension', fatal=False, group=1)
 101             if preview_ext not in ('mp4', 'mp3'):
 102                 continue
 103             if not vid_url or not item.get('id'):
 104                 continue
 105             width, height = traverse_obj(item, ('information', 'width')), traverse_obj(item, ('information', 'height'))
 106             if width is not None and height is not None:
 107                 # the longest side is at most 720px for non-client viewers
 108                 max_size = max(width, height)
 109                 width, height = list(x * 720 // max_size for x in (width, height))
 110             entries.append({
 111                 **parent,
 112                 'id': str(item['id']),
 113                 'url': vid_url,
 114                 'thumbnail': item.get('poster_url'),
 115                 'subtitles': {
 116                     'jpn': [{
 117                         'url': item.get('vtt_url'),
 118                         'ext': 'vtt',
 119                     }]
 120                 } if item.get('vtt_url') else None,
 121                 'width': width,
 122                 'height': height,
 123                 'duration': traverse_obj(item, ('information', 'duration')),
 124                 'fps': traverse_obj(item, ('information', 'frame_rate')),
 125                 'ext': preview_ext or given_ext,
 126                 'vcodec': 'none' if preview_ext == 'mp3' else None,
 127                 # you'll always get 128kbps MP3 for non-client viewers
 128                 'abr': 128 if preview_ext == 'mp3' else None,
 129             })
 130
 131         if not entries:
 132             raise ExtractorError('No video/audio attachment found in this commission.', expected=True)
 133         elif len(entries) == 1:
 134             return entries[0]
 135         else:
 136             parent.update({
 137                 '_type': 'playlist',
 138                 'entries': entries,
 139             })
 140             return parent