yt_dlp/extractor/viu.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import json
   6 import uuid
   7 import random
   8 import urllib.parse
   9
  10 from .common import InfoExtractor
  11 from ..compat import compat_str
  12 from ..utils import (
  13     ExtractorError,
  14     int_or_none,
  15     strip_or_none,
  16     try_get,
  17     smuggle_url,
  18     unsmuggle_url,
  19     url_or_none,
  20 )
  21
  22
  23 class ViuBaseIE(InfoExtractor):
  24     def _call_api(self, path, *args, headers={}, **kwargs):
  25         response = self._download_json(
  26             f'https://www.viu.com/api/{path}', *args, **kwargs,
  27             headers={**self.geo_verification_headers(), **headers})['response']
  28         if response.get('status') != 'success':
  29             raise ExtractorError(f'{self.IE_NAME} said: {response["message"]}', expected=True)
  30         return response
  31
  32
  33 class ViuIE(ViuBaseIE):
  34     _VALID_URL = r'(?:viu:|https?://[^/]+\.viu\.com/[a-z]{2}/media/)(?P<id>\d+)'
  35     _TESTS = [{
  36         'url': 'https://www.viu.com/en/media/1116705532?containerId=playlist-22168059',
  37         'info_dict': {
  38             'id': '1116705532',
  39             'ext': 'mp4',
  40             'title': 'Citizen Khan - Ep 1',
  41             'description': 'md5:d7ea1604f49e5ba79c212c551ce2110e',
  42         },
  43         'params': {
  44             'skip_download': 'm3u8 download',
  45         },
  46         'skip': 'Geo-restricted to India',
  47     }, {
  48         'url': 'https://www.viu.com/en/media/1130599965',
  49         'info_dict': {
  50             'id': '1130599965',
  51             'ext': 'mp4',
  52             'title': 'Jealousy Incarnate - Episode 1',
  53             'description': 'md5:d3d82375cab969415d2720b6894361e9',
  54         },
  55         'params': {
  56             'skip_download': 'm3u8 download',
  57         },
  58         'skip': 'Geo-restricted to Indonesia',
  59     }, {
  60         'url': 'https://india.viu.com/en/media/1126286865',
  61         'only_matching': True,
  62     }]
  63
  64     def _real_extract(self, url):
  65         video_id = self._match_id(url)
  66
  67         video_data = self._call_api(
  68             'clip/load', video_id, 'Downloading video data', query={
  69                 'appid': 'viu_desktop',
  70                 'fmt': 'json',
  71                 'id': video_id
  72             })['item'][0]
  73
  74         title = video_data['title']
  75
  76         m3u8_url = None
  77         url_path = video_data.get('urlpathd') or video_data.get('urlpath')
  78         tdirforwhole = video_data.get('tdirforwhole')
  79         # #EXT-X-BYTERANGE is not supported by native hls downloader
  80         # and ffmpeg (#10955)
  81         # FIXME: It is supported in yt-dlp
  82         # hls_file = video_data.get('hlsfile')
  83         hls_file = video_data.get('jwhlsfile')
  84         if url_path and tdirforwhole and hls_file:
  85             m3u8_url = '%s/%s/%s' % (url_path, tdirforwhole, hls_file)
  86         else:
  87             # m3u8_url = re.sub(
  88             #     r'(/hlsc_)[a-z]+(\d+\.m3u8)',
  89             #     r'\1whe\2', video_data['href'])
  90             m3u8_url = video_data['href']
  91         formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4')
  92         self._sort_formats(formats)
  93
  94         for key, value in video_data.items():
  95             mobj = re.match(r'^subtitle_(?P<lang>[^_]+)_(?P<ext>(vtt|srt))', key)
  96             if not mobj:
  97                 continue
  98             subtitles.setdefault(mobj.group('lang'), []).append({
  99                 'url': value,
 100                 'ext': mobj.group('ext')
 101             })
 102
 103         return {
 104             'id': video_id,
 105             'title': title,
 106             'description': video_data.get('description'),
 107             'series': video_data.get('moviealbumshowname'),
 108             'episode': title,
 109             'episode_number': int_or_none(video_data.get('episodeno')),
 110             'duration': int_or_none(video_data.get('duration')),
 111             'formats': formats,
 112             'subtitles': subtitles,
 113         }
 114
 115
 116 class ViuPlaylistIE(ViuBaseIE):
 117     IE_NAME = 'viu:playlist'
 118     _VALID_URL = r'https?://www\.viu\.com/[^/]+/listing/playlist-(?P<id>\d+)'
 119     _TEST = {
 120         'url': 'https://www.viu.com/en/listing/playlist-22461380',
 121         'info_dict': {
 122             'id': '22461380',
 123             'title': 'The Good Wife',
 124         },
 125         'playlist_count': 16,
 126         'skip': 'Geo-restricted to Indonesia',
 127     }
 128
 129     def _real_extract(self, url):
 130         playlist_id = self._match_id(url)
 131         playlist_data = self._call_api(
 132             'container/load', playlist_id,
 133             'Downloading playlist info', query={
 134                 'appid': 'viu_desktop',
 135                 'fmt': 'json',
 136                 'id': 'playlist-' + playlist_id
 137             })['container']
 138
 139         entries = []
 140         for item in playlist_data.get('item', []):
 141             item_id = item.get('id')
 142             if not item_id:
 143                 continue
 144             item_id = compat_str(item_id)
 145             entries.append(self.url_result(
 146                 'viu:' + item_id, 'Viu', item_id))
 147
 148         return self.playlist_result(
 149             entries, playlist_id, playlist_data.get('title'))
 150
 151
 152 class ViuOTTIE(InfoExtractor):
 153     IE_NAME = 'viu:ott'
 154     _NETRC_MACHINE = 'viu'
 155     _VALID_URL = r'https?://(?:www\.)?viu\.com/ott/(?P<country_code>[a-z]{2})/(?P<lang_code>[a-z]{2}-[a-z]{2})/vod/(?P<id>\d+)'
 156     _TESTS = [{
 157         'url': 'http://www.viu.com/ott/sg/en-us/vod/3421/The%20Prime%20Minister%20and%20I',
 158         'info_dict': {
 159             'id': '3421',
 160             'ext': 'mp4',
 161             'title': 'A New Beginning',
 162             'description': 'md5:1e7486a619b6399b25ba6a41c0fe5b2c',
 163         },
 164         'params': {
 165             'skip_download': 'm3u8 download',
 166             'noplaylist': True,
 167         },
 168         'skip': 'Geo-restricted to Singapore',
 169     }, {
 170         'url': 'http://www.viu.com/ott/hk/zh-hk/vod/7123/%E5%A4%A7%E4%BA%BA%E5%A5%B3%E5%AD%90',
 171         'info_dict': {
 172             'id': '7123',
 173             'ext': 'mp4',
 174             'title': '這就是我的生活之道',
 175             'description': 'md5:4eb0d8b08cf04fcdc6bbbeb16043434f',
 176         },
 177         'params': {
 178             'skip_download': 'm3u8 download',
 179             'noplaylist': True,
 180         },
 181         'skip': 'Geo-restricted to Hong Kong',
 182     }, {
 183         'url': 'https://www.viu.com/ott/hk/zh-hk/vod/68776/%E6%99%82%E5%B0%9A%E5%AA%BD%E5%92%AA',
 184         'playlist_count': 12,
 185         'info_dict': {
 186             'id': '3916',
 187             'title': '時尚媽咪',
 188         },
 189         'params': {
 190             'skip_download': 'm3u8 download',
 191             'noplaylist': False,
 192         },
 193         'skip': 'Geo-restricted to Hong Kong',
 194     }]
 195
 196     _AREA_ID = {
 197         'HK': 1,
 198         'SG': 2,
 199         'TH': 4,
 200         'PH': 5,
 201     }
 202     _LANGUAGE_FLAG = {
 203         'zh-hk': 1,
 204         'zh-cn': 2,
 205         'en-us': 3,
 206     }
 207
 208     _user_token = None
 209     _auth_codes = {}
 210
 211     def _detect_error(self, response):
 212         code = try_get(response, lambda x: x['status']['code'])
 213         if code and code > 0:
 214             message = try_get(response, lambda x: x['status']['message'])
 215             raise ExtractorError(f'{self.IE_NAME} said: {message} ({code})', expected=True)
 216         return response.get('data') or {}
 217
 218     def _login(self, country_code, video_id):
 219         if self._user_token is None:
 220             username, password = self._get_login_info()
 221             if username is None:
 222                 return
 223             headers = {
 224                 'Authorization': f'Bearer {self._auth_codes[country_code]}',
 225                 'Content-Type': 'application/json'
 226             }
 227             data = self._download_json(
 228                 'https://api-gateway-global.viu.com/api/account/validate',
 229                 video_id, 'Validating email address', headers=headers,
 230                 data=json.dumps({
 231                     'principal': username,
 232                     'provider': 'email'
 233                 }).encode())
 234             if not data.get('exists'):
 235                 raise ExtractorError('Invalid email address')
 236
 237             data = self._download_json(
 238                 'https://api-gateway-global.viu.com/api/auth/login',
 239                 video_id, 'Logging in', headers=headers,
 240                 data=json.dumps({
 241                     'email': username,
 242                     'password': password,
 243                     'provider': 'email',
 244                 }).encode())
 245             self._detect_error(data)
 246             self._user_token = data.get('identity')
 247             # need to update with valid user's token else will throw an error again
 248             self._auth_codes[country_code] = data.get('token')
 249         return self._user_token
 250
 251     def _get_token(self, country_code, video_id):
 252         rand = ''.join(random.choice('0123456789') for _ in range(10))
 253         return self._download_json(
 254             f'https://api-gateway-global.viu.com/api/auth/token?v={rand}000', video_id,
 255             headers={'Content-Type': 'application/json'}, note='Getting bearer token',
 256             data=json.dumps({
 257                 'countryCode': country_code.upper(),
 258                 'platform': 'browser',
 259                 'platformFlagLabel': 'web',
 260                 'language': 'en',
 261                 'uuid': str(uuid.uuid4()),
 262                 'carrierId': '0'
 263             }).encode('utf-8'))['token']
 264
 265     def _real_extract(self, url):
 266         url, idata = unsmuggle_url(url, {})
 267         country_code, lang_code, video_id = self._match_valid_url(url).groups()
 268
 269         query = {
 270             'r': 'vod/ajax-detail',
 271             'platform_flag_label': 'web',
 272             'product_id': video_id,
 273         }
 274
 275         area_id = self._AREA_ID.get(country_code.upper())
 276         if area_id:
 277             query['area_id'] = area_id
 278
 279         product_data = self._download_json(
 280             f'http://www.viu.com/ott/{country_code}/index.php', video_id,
 281             'Downloading video info', query=query)['data']
 282
 283         video_data = product_data.get('current_product')
 284         if not video_data:
 285             self.raise_geo_restricted()
 286
 287         series_id = video_data.get('series_id')
 288         if self._yes_playlist(series_id, video_id, idata):
 289             series = product_data.get('series') or {}
 290             product = series.get('product')
 291             if product:
 292                 entries = []
 293                 for entry in sorted(product, key=lambda x: int_or_none(x.get('number', 0))):
 294                     item_id = entry.get('product_id')
 295                     if not item_id:
 296                         continue
 297                     entries.append(self.url_result(
 298                         smuggle_url(f'http://www.viu.com/ott/{country_code}/{lang_code}/vod/{item_id}/',
 299                                     {'force_noplaylist': True}),
 300                         ViuOTTIE, str(item_id), entry.get('synopsis', '').strip()))
 301
 302                 return self.playlist_result(entries, series_id, series.get('name'), series.get('description'))
 303
 304         duration_limit = False
 305         query = {
 306             'ccs_product_id': video_data['ccs_product_id'],
 307             'language_flag_id': self._LANGUAGE_FLAG.get(lang_code.lower()) or '3',
 308         }
 309
 310         def download_playback():
 311             stream_data = self._download_json(
 312                 'https://api-gateway-global.viu.com/api/playback/distribute',
 313                 video_id=video_id, query=query, fatal=False, note='Downloading stream info',
 314                 headers={
 315                     'Authorization': f'Bearer {self._auth_codes[country_code]}',
 316                     'Referer': url,
 317                     'Origin': url
 318                 })
 319             return self._detect_error(stream_data).get('stream')
 320
 321         if not self._auth_codes.get(country_code):
 322             self._auth_codes[country_code] = self._get_token(country_code, video_id)
 323
 324         stream_data = None
 325         try:
 326             stream_data = download_playback()
 327         except (ExtractorError, KeyError):
 328             token = self._login(country_code, video_id)
 329             if token is not None:
 330                 query['identity'] = token
 331             else:
 332                 # preview is limited to 3min for non-members. But we can try to bypass it
 333                 duration_limit, query['duration'] = True, '180'
 334             try:
 335                 stream_data = download_playback()
 336             except (ExtractorError, KeyError):
 337                 if token is not None:
 338                     raise
 339                 self.raise_login_required(method='password')
 340         if not stream_data:
 341             raise ExtractorError('Cannot get stream info', expected=True)
 342
 343         formats = []
 344         for vid_format, stream_url in (stream_data.get('url') or {}).items():
 345             height = int(self._search_regex(r's(\d+)p', vid_format, 'height', default=None))
 346
 347             # bypass preview duration limit
 348             if duration_limit:
 349                 stream_url = urllib.parse.urlparse(stream_url)
 350                 query.update({
 351                     'duration': video_data.get('time_duration') or '9999999',
 352                     'duration_start': '0',
 353                 })
 354                 stream_url = stream_url._replace(query=urllib.parse.urlencode(dict(
 355                     urllib.parse.parse_qsl(stream_url.query, keep_blank_values=True)))).geturl()
 356
 357             formats.append({
 358                 'format_id': vid_format,
 359                 'url': stream_url,
 360                 'height': height,
 361                 'ext': 'mp4',
 362                 'filesize': try_get(stream_data, lambda x: x['size'][vid_format], int)
 363             })
 364         self._sort_formats(formats)
 365
 366         subtitles = {}
 367         for sub in video_data.get('subtitle') or []:
 368             sub_url = sub.get('url')
 369             if not sub_url:
 370                 continue
 371             subtitles.setdefault(sub.get('name'), []).append({
 372                 'url': sub_url,
 373                 'ext': 'srt',
 374             })
 375
 376         title = strip_or_none(video_data.get('synopsis'))
 377         return {
 378             'id': video_id,
 379             'title': title,
 380             'description': video_data.get('description'),
 381             'series': try_get(product_data, lambda x: x['series']['name']),
 382             'episode': title,
 383             'episode_number': int_or_none(video_data.get('number')),
 384             'duration': int_or_none(stream_data.get('duration')),
 385             'thumbnail': url_or_none(video_data.get('cover_image_url')),
 386             'formats': formats,
 387             'subtitles': subtitles,
 388         }