yt_dlp/extractor/vvvvid.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from .youtube import YoutubeIE
   8 from ..utils import (
   9     ExtractorError,
  10     int_or_none,
  11     str_or_none,
  12 )
  13
  14
  15 class VVVVIDIE(InfoExtractor):
  16     _VALID_URL_BASE = r'https?://(?:www\.)?vvvvid\.it/(?:#!)?(?:show|anime|film|series)/'
  17     _VALID_URL = r'%s(?P<show_id>\d+)/[^/]+/(?P<season_id>\d+)/(?P<id>[0-9]+)' % _VALID_URL_BASE
  18     _TESTS = [{
  19         # video_type == 'video/vvvvid'
  20         'url': 'https://www.vvvvid.it/#!show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048/ping-pong',
  21         'md5': 'b8d3cecc2e981adc3835adf07f6df91b',
  22         'info_dict': {
  23             'id': '489048',
  24             'ext': 'mp4',
  25             'title': 'Ping Pong',
  26             'duration': 239,
  27             'series': '"Perché dovrei guardarlo?" di Dario Moccia',
  28             'season_id': '437',
  29             'episode': 'Ping Pong',
  30             'episode_number': 1,
  31             'episode_id': '3334',
  32             'view_count': int,
  33             'like_count': int,
  34             'repost_count': int,
  35         },
  36         'params': {
  37             'skip_download': True,
  38         },
  39     }, {
  40         # video_type == 'video/rcs'
  41         'url': 'https://www.vvvvid.it/#!show/376/death-note-live-action/377/482493/episodio-01',
  42         'md5': '33e0edfba720ad73a8782157fdebc648',
  43         'info_dict': {
  44             'id': '482493',
  45             'ext': 'mp4',
  46             'title': 'Episodio 01',
  47         },
  48         'params': {
  49             'skip_download': True,
  50         },
  51     }, {
  52         # video_type == 'video/youtube'
  53         'url': 'https://www.vvvvid.it/show/404/one-punch-man/406/486683/trailer',
  54         'md5': '33e0edfba720ad73a8782157fdebc648',
  55         'info_dict': {
  56             'id': 'RzmFKUDOUgw',
  57             'ext': 'mp4',
  58             'title': 'Trailer',
  59             'upload_date': '20150906',
  60             'description': 'md5:a5e802558d35247fee285875328c0b80',
  61             'uploader_id': 'BandaiVisual',
  62             'uploader': 'BANDAI NAMCO Arts Channel',
  63         },
  64         'params': {
  65             'skip_download': True,
  66         },
  67     }, {
  68         'url': 'https://www.vvvvid.it/show/434/perche-dovrei-guardarlo-di-dario-moccia/437/489048',
  69         'only_matching': True
  70     }]
  71     _conn_id = None
  72
  73     def _real_initialize(self):
  74         self._conn_id = self._download_json(
  75             'https://www.vvvvid.it/user/login',
  76             None, headers=self.geo_verification_headers())['data']['conn_id']
  77
  78     def _download_info(self, show_id, path, video_id, fatal=True):
  79         response = self._download_json(
  80             'https://www.vvvvid.it/vvvvid/ondemand/%s/%s' % (show_id, path),
  81             video_id, headers=self.geo_verification_headers(), query={
  82                 'conn_id': self._conn_id,
  83             }, fatal=fatal)
  84         if not (response or fatal):
  85             return
  86         if response.get('result') == 'error':
  87             raise ExtractorError('%s said: %s' % (
  88                 self.IE_NAME, response['message']), expected=True)
  89         return response['data']
  90
  91     def _extract_common_video_info(self, video_data):
  92         return {
  93             'thumbnail': video_data.get('thumbnail'),
  94             'episode_id': str_or_none(video_data.get('id')),
  95         }
  96
  97     def _real_extract(self, url):
  98         show_id, season_id, video_id = re.match(self._VALID_URL, url).groups()
  99
 100         response = self._download_info(
 101             show_id, 'season/%s' % season_id, video_id)
 102
 103         vid = int(video_id)
 104         video_data = list(filter(
 105             lambda episode: episode.get('video_id') == vid, response))[0]
 106         title = video_data['title']
 107         formats = []
 108
 109         # vvvvid embed_info decryption algorithm is reverse engineered from function $ds(h) at vvvvid.js
 110         def ds(h):
 111             g = "MNOPIJKL89+/4567UVWXQRSTEFGHABCDcdefYZabstuvopqr0123wxyzklmnghij"
 112
 113             def f(m):
 114                 l = []
 115                 o = 0
 116                 b = False
 117                 m_len = len(m)
 118                 while ((not b) and o < m_len):
 119                     n = m[o] << 2
 120                     o += 1
 121                     k = -1
 122                     j = -1
 123                     if o < m_len:
 124                         n += m[o] >> 4
 125                         o += 1
 126                         if o < m_len:
 127                             k = (m[o - 1] << 4) & 255
 128                             k += m[o] >> 2
 129                             o += 1
 130                             if o < m_len:
 131                                 j = (m[o - 1] << 6) & 255
 132                                 j += m[o]
 133                                 o += 1
 134                             else:
 135                                 b = True
 136                         else:
 137                             b = True
 138                     else:
 139                         b = True
 140                     l.append(n)
 141                     if k != -1:
 142                         l.append(k)
 143                     if j != -1:
 144                         l.append(j)
 145                 return l
 146
 147             c = []
 148             for e in h:
 149                 c.append(g.index(e))
 150
 151             c_len = len(c)
 152             for e in range(c_len * 2 - 1, -1, -1):
 153                 a = c[e % c_len] ^ c[(e + 1) % c_len]
 154                 c[e % c_len] = a
 155
 156             c = f(c)
 157             d = ''
 158             for e in c:
 159                 d += chr(e)
 160
 161             return d
 162
 163         info = {}
 164
 165         def metadata_from_url(r_url):
 166             if not info and r_url:
 167                 mobj = re.search(r'_(?:S(\d+))?Ep(\d+)', r_url)
 168                 if mobj:
 169                     info['episode_number'] = int(mobj.group(2))
 170                     season_number = mobj.group(1)
 171                     if season_number:
 172                         info['season_number'] = int(season_number)
 173
 174         video_type = video_data.get('video_type')
 175         is_youtube = False
 176         for quality in ('', '_sd'):
 177             embed_code = video_data.get('embed_info' + quality)
 178             if not embed_code:
 179                 continue
 180             embed_code = ds(embed_code)
 181             if video_type in ('video/rcs', 'video/kenc'):
 182                 if video_type == 'video/kenc':
 183                     kenc = self._download_json(
 184                         'https://www.vvvvid.it/kenc', video_id, query={
 185                             'action': 'kt',
 186                             'conn_id': self._conn_id,
 187                             'url': embed_code,
 188                         }, fatal=False) or {}
 189                     kenc_message = kenc.get('message')
 190                     if kenc_message:
 191                         embed_code += '?' + ds(kenc_message)
 192                 formats.extend(self._extract_akamai_formats(embed_code, video_id))
 193             elif video_type == 'video/youtube':
 194                 info.update({
 195                     '_type': 'url_transparent',
 196                     'ie_key': YoutubeIE.ie_key(),
 197                     'url': embed_code,
 198                 })
 199                 is_youtube = True
 200                 break
 201             else:
 202                 formats.extend(self._extract_wowza_formats(
 203                     'http://sb.top-ix.org/videomg/_definst_/mp4:%s/playlist.m3u8' % embed_code, video_id))
 204             metadata_from_url(embed_code)
 205
 206         if not is_youtube:
 207             self._sort_formats(formats)
 208             info['formats'] = formats
 209
 210         metadata_from_url(video_data.get('thumbnail'))
 211         info.update(self._extract_common_video_info(video_data))
 212         info.update({
 213             'id': video_id,
 214             'title': title,
 215             'duration': int_or_none(video_data.get('length')),
 216             'series': video_data.get('show_title'),
 217             'season_id': season_id,
 218             'episode': title,
 219             'view_count': int_or_none(video_data.get('views')),
 220             'like_count': int_or_none(video_data.get('video_likes')),
 221             'repost_count': int_or_none(video_data.get('video_shares')),
 222         })
 223         return info
 224
 225
 226 class VVVVIDShowIE(VVVVIDIE):
 227     _VALID_URL = r'(?P<base_url>%s(?P<id>\d+)(?:/(?P<show_title>[^/?&#]+))?)/?(?:[?#&]|$)' % VVVVIDIE._VALID_URL_BASE
 228     _TESTS = [{
 229         'url': 'https://www.vvvvid.it/show/156/psyco-pass',
 230         'info_dict': {
 231             'id': '156',
 232             'title': 'Psycho-Pass',
 233             'description': 'md5:94d572c0bd85894b193b8aebc9a3a806',
 234         },
 235         'playlist_count': 46,
 236     }, {
 237         'url': 'https://www.vvvvid.it/show/156',
 238         'only_matching': True,
 239     }]
 240
 241     def _real_extract(self, url):
 242         base_url, show_id, show_title = re.match(self._VALID_URL, url).groups()
 243
 244         seasons = self._download_info(
 245             show_id, 'seasons/', show_title)
 246
 247         show_info = self._download_info(
 248             show_id, 'info/', show_title, fatal=False)
 249
 250         entries = []
 251         for season in (seasons or []):
 252             episodes = season.get('episodes') or []
 253             for episode in episodes:
 254                 if episode.get('playable') is False:
 255                     continue
 256                 season_id = str_or_none(episode.get('season_id'))
 257                 video_id = str_or_none(episode.get('video_id'))
 258                 if not (season_id and video_id):
 259                     continue
 260                 info = self._extract_common_video_info(episode)
 261                 info.update({
 262                     '_type': 'url',
 263                     'ie_key': VVVVIDIE.ie_key(),
 264                     'url': '/'.join([base_url, season_id, video_id]),
 265                     'title': episode.get('title'),
 266                     'description': episode.get('description'),
 267                     'season_id': season_id,
 268                 })
 269                 entries.append(info)
 270
 271         return self.playlist_result(
 272             entries, show_id, show_info.get('title'), show_info.get('description'))