[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)

[yt-dlp.git] / yt_dlp / extractor / sonyliv.py
diff --git a/yt_dlp/extractor/sonyliv.py b/yt_dlp/extractor/sonyliv.py

index ec7b4f37f69b757b7c2ec46ba1d91779a5fa6bf0..a0a051e97244d702f35045798669526b83b4cdd6 100644 (file)
--- a/yt_dlp/extractor/sonyliv.py
+++ b/yt_dlp/extractor/sonyliv.py
@@ -1,16 +1,20 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
+import datetime as dt
+import itertools
+import json
+import math
+import random
  import time
  import uuid
  
  from .common import InfoExtractor
-from ..compat import compat_HTTPError
+from ..networking.exceptions import HTTPError
  from ..utils import (
      ExtractorError,
      int_or_none,
-    try_get,
+    jwt_decode_hs256,
+    try_call,
  )
+from ..utils.traversal import traverse_obj
  
  
  class SonyLIVIE(InfoExtractor):
@@ -56,37 +60,93 @@ class SonyLIVIE(InfoExtractor):
          'only_matching': True,
      }]
      _GEO_COUNTRIES = ['IN']
-    _TOKEN = None
+    _HEADERS = {}
+    _LOGIN_HINT = 'Use "--username <mobile_number>" to login using OTP or "--username token --password <auth_token>" to login using auth token.'
+    _NETRC_MACHINE = 'sonyliv'
+
+    def _get_device_id(self):
+        e = int(time.time() * 1000)
+        t = list('xxxxxxxxxxxx4xxxyxxxxxxxxxxxxxxx')
+        for i, c in enumerate(t):
+            n = int((e + 16 * random.random()) % 16) | 0
+            e = math.floor(e / 16)
+            if c == 'x':
+                t[i] = str(n)
+            elif c == 'y':
+                t[i] = f'{3 & n | 8:x}'
+        return ''.join(t) + '-' + str(int(time.time() * 1000))
+
+    def _perform_login(self, username, password):
+        self._HEADERS['device_id'] = self._get_device_id()
+        self._HEADERS['content-type'] = 'application/json'
+
+        if username.lower() == 'token' and try_call(lambda: jwt_decode_hs256(password)):
+            self._HEADERS['authorization'] = password
+            self.report_login()
+            return
+        elif len(username) != 10 or not username.isdigit():
+            raise ExtractorError(f'Invalid username/password; {self._LOGIN_HINT}')
+
+        self.report_login()
+        otp_request_json = self._download_json(
+            'https://apiv2.sonyliv.com/AGL/1.6/A/ENG/WEB/IN/HR/CREATEOTP-V2',
+            None, note='Sending OTP', headers=self._HEADERS, data=json.dumps({
+                'mobileNumber': username,
+                'channelPartnerID': 'MSMIND',
+                'country': 'IN',
+                'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
+                'otpSize': 6,
+                'loginType': 'REGISTERORSIGNIN',
+                'isMobileMandatory': True,
+            }).encode())
+        if otp_request_json['resultCode'] == 'KO':
+            raise ExtractorError(otp_request_json['message'], expected=True)
+
+        otp_verify_json = self._download_json(
+            'https://apiv2.sonyliv.com/AGL/2.0/A/ENG/WEB/IN/HR/CONFIRMOTP-V2',
+            None, note='Verifying OTP', headers=self._HEADERS, data=json.dumps({
+                'channelPartnerID': 'MSMIND',
+                'mobileNumber': username,
+                'country': 'IN',
+                'otp': self._get_tfa_info('OTP'),
+                'dmaId': 'IN',
+                'ageConfirmation': True,
+                'timestamp': dt.datetime.now().strftime('%Y-%m-%dT%H:%M:%S.%MZ'),
+                'isMobileMandatory': True,
+            }).encode())
+        if otp_verify_json['resultCode'] == 'KO':
+            raise ExtractorError(otp_request_json['message'], expected=True)
+        self._HEADERS['authorization'] = otp_verify_json['resultObj']['accessToken']
  
      def _call_api(self, version, path, video_id):
-        headers = {}
-        if self._TOKEN:
-            headers['security_token'] = self._TOKEN
          try:
              return self._download_json(
-                'https://apiv2.sonyliv.com/AGL/%s/A/ENG/WEB/%s' % (version, path),
-                video_id, headers=headers)['resultObj']
+                f'https://apiv2.sonyliv.com/AGL/{version}/A/ENG/WEB/{path}',
+                video_id, headers=self._HEADERS)['resultObj']
          except ExtractorError as e:
-            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
+            if isinstance(e.cause, HTTPError) and e.cause.status == 406 and self._parse_json(
+                    e.cause.response.read().decode(), video_id)['message'] == 'Please subscribe to watch this content':
+                self.raise_login_required(self._LOGIN_HINT, method=None)
+            if isinstance(e.cause, HTTPError) and e.cause.status == 403:
                  message = self._parse_json(
-                    e.cause.read().decode(), video_id)['message']
+                    e.cause.response.read().decode(), video_id)['message']
                  if message == 'Geoblocked Country':
                      self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
                  raise ExtractorError(message)
              raise
  
-    def _real_initialize(self):
-        self._TOKEN = self._call_api('1.4', 'ALL/GETTOKEN', None)
+    def _initialize_pre_login(self):
+        self._HEADERS['security_token'] = self._call_api('1.4', 'ALL/GETTOKEN', None)
  
      def _real_extract(self, url):
          video_id = self._match_id(url)
          content = self._call_api(
              '1.5', 'IN/CONTENT/VIDEOURL/VOD/' + video_id, video_id)
          if not self.get_param('allow_unplayable_formats') and content.get('isEncrypted'):
-            raise ExtractorError('This video is DRM protected.', expected=True)
+            self.report_drm(video_id)
          dash_url = content['videoURL']
          headers = {
-            'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000)
+            'x-playback-session-id': '%s-%d' % (uuid.uuid4().hex, time.time() * 1000),
          }
          formats = self._extract_mpd_formats(
              dash_url, video_id, mpd_id='dash', headers=headers, fatal=False)
@@ -95,12 +155,18 @@ def _real_extract(self, url):
              video_id, 'mp4', m3u8_id='hls', headers=headers, fatal=False))
          for f in formats:
              f.setdefault('http_headers', {}).update(headers)
-        self._sort_formats(formats)
  
          metadata = self._call_api(
              '1.6', 'IN/DETAIL/' + video_id, video_id)['containers'][0]['metadata']
          title = metadata['episodeTitle']
-
+        subtitles = {}
+        for sub in content.get('subtitle', []):
+            sub_url = sub.get('subtitleUrl')
+            if not sub_url:
+                continue
+            subtitles.setdefault(sub.get('subtitleLanguageName', 'ENG'), []).append({
+                'url': sub_url,
+            })
          return {
              'id': video_id,
              'title': title,
@@ -113,21 +179,26 @@ def _real_extract(self, url):
              'series': metadata.get('title'),
              'episode_number': int_or_none(metadata.get('episodeNumber')),
              'release_year': int_or_none(metadata.get('year')),
+            'subtitles': subtitles,
          }
  
  
  class SonyLIVSeriesIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/shows/[^/?#&]+-(?P<id>\d{10})$'
+    _VALID_URL = r'https?://(?:www\.)?sonyliv\.com/shows/[^/?#&]+-(?P<id>\d{10})/?(?:$|[?#])'
      _TESTS = [{
          'url': 'https://www.sonyliv.com/shows/adaalat-1700000091',
-        'playlist_mincount': 456,
+        'playlist_mincount': 452,
          'info_dict': {
              'id': '1700000091',
          },
+    }, {
+        'url': 'https://www.sonyliv.com/shows/beyhadh-1700000007/',
+        'playlist_mincount': 358,
+        'info_dict': {
+            'id': '1700000007',
+        },
      }]
-    _API_SHOW_URL = "https://apiv2.sonyliv.com/AGL/1.9/R/ENG/WEB/IN/DL/DETAIL/{}?kids_safe=false&from=0&to=49"
-    _API_EPISODES_URL = "https://apiv2.sonyliv.com/AGL/1.4/R/ENG/WEB/IN/CONTENT/DETAIL/BUNDLE/{}?from=0&to=1000&orderBy=episodeNumber&sortOrder=asc"
-    _API_SECURITY_URL = 'https://apiv2.sonyliv.com/AGL/1.4/A/ENG/WEB/ALL/GETTOKEN'
+    _API_BASE = 'https://apiv2.sonyliv.com/AGL'
  
      def _entries(self, show_id):
          headers = {
@@ -135,19 +206,34 @@ def _entries(self, show_id):
              'Referer': 'https://www.sonyliv.com',
          }
          headers['security_token'] = self._download_json(
-            self._API_SECURITY_URL, video_id=show_id, headers=headers,
-            note='Downloading security token')['resultObj']
-        seasons = try_get(
-            self._download_json(self._API_SHOW_URL.format(show_id), video_id=show_id, headers=headers),
-            lambda x: x['resultObj']['containers'][0]['containers'], list)
-        for season in seasons or []:
-            season_id = season['id']
-            episodes = try_get(
-                self._download_json(self._API_EPISODES_URL.format(season_id), video_id=season_id, headers=headers),
-                lambda x: x['resultObj']['containers'][0]['containers'], list)
-            for episode in episodes or []:
-                video_id = episode.get('id')
-                yield self.url_result('sonyliv:%s' % video_id, ie=SonyLIVIE.ie_key(), video_id=video_id)
+            f'{self._API_BASE}/1.4/A/ENG/WEB/ALL/GETTOKEN', show_id,
+            'Downloading security token', headers=headers)['resultObj']
+        seasons = traverse_obj(self._download_json(
+            f'{self._API_BASE}/1.9/R/ENG/WEB/IN/DL/DETAIL/{show_id}', show_id,
+            'Downloading series JSON', headers=headers, query={
+                'kids_safe': 'false',
+                'from': '0',
+                'to': '49',
+            }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
+        for season in seasons:
+            season_id = str(season['id'])
+            note = traverse_obj(season, ('metadata', 'title', {str})) or 'season'
+            cursor = 0
+            for page_num in itertools.count(1):
+                episodes = traverse_obj(self._download_json(
+                    f'{self._API_BASE}/1.4/R/ENG/WEB/IN/CONTENT/DETAIL/BUNDLE/{season_id}',
+                    season_id, f'Downloading {note} page {page_num} JSON', headers=headers, query={
+                        'from': str(cursor),
+                        'to': str(cursor + 99),
+                        'orderBy': 'episodeNumber',
+                        'sortOrder': 'asc',
+                    }), ('resultObj', 'containers', 0, 'containers', lambda _, v: int_or_none(v['id'])))
+                if not episodes:
+                    break
+                for episode in episodes:
+                    video_id = str(episode['id'])
+                    yield self.url_result(f'sonyliv:{video_id}', SonyLIVIE, video_id)
+                cursor += 100
  
      def _real_extract(self, url):
          show_id = self._match_id(url)