]> jfr.im git - yt-dlp.git/commitdiff
[extractor/amazonminitv] Cleanup 48652590ec401f4e747a5e51552cdcac20744aa1
authorpukkandan <redacted>
Tue, 29 Nov 2022 02:20:58 +0000 (07:50 +0530)
committerpukkandan <redacted>
Tue, 29 Nov 2022 02:24:07 +0000 (07:54 +0530)
yt_dlp/extractor/amazonminitv.py

index 793fac2e4df67f4eae3c99cddf0f7e449dbb22ce..7309968537adf872303f95283463ae6b104c8190 100644 (file)
@@ -4,16 +4,43 @@
 from ..utils import ExtractorError, int_or_none, traverse_obj, try_get
 
 
-class AmazonMiniTVIE(InfoExtractor):
+class AmazonMiniTVBaseIE(InfoExtractor):
+    def _real_initialize(self):
+        self._download_webpage(
+            'https://www.amazon.in/minitv', None,
+            note='Fetching guest session cookies')
+        AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value
+
+    def _call_api(self, asin, data=None, note=None):
+        device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'}
+        if data:
+            data['variables'].update({
+                'contentType': 'VOD',
+                'sessionIdToken': self.session_id,
+                **device,
+            })
+
+        resp = self._download_json(
+            f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
+            asin, note=note, headers={'Content-Type': 'application/json'},
+            data=json.dumps(data).encode() if data else None,
+            query=None if data else {
+                'deviceType': 'A1WMMUXPCUJL4N',
+                'contentId': asin,
+                **device,
+            })
+
+        if resp.get('errors'):
+            raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
+        elif not data:
+            return resp
+        return resp['data'][data['operationName']]
+
+
+class AmazonMiniTVIE(AmazonMiniTVBaseIE):
     _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)'
-    _HEADERS = {
-        'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Mobile Safari/537.36',
-    }
-    _CLIENT_ID = 'ATVIN'
-    _DEVICE_LOCALE = 'en_GB'
     _TESTS = [{
         'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
-        'md5': '0045a5ea38dddd4de5a5fcec7274b476',
         'info_dict': {
             'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
             'ext': 'mp4',
@@ -24,11 +51,7 @@ class AmazonMiniTVIE(InfoExtractor):
             'release_timestamp': 1644710400,
             'release_date': '20220213',
             'duration': 846,
-            'chapters': [{
-                'start_time': 815.0,
-                'end_time': 846,
-                'title': 'End Credits',
-            }],
+            'chapters': 'count:2',
             'series': 'Couple Goals',
             'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
             'season': 'Season 3',
@@ -40,7 +63,6 @@ class AmazonMiniTVIE(InfoExtractor):
         },
     }, {
         'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
-        'md5': '9a977bffd5d99c4dd2a32b360aee1863',
         'info_dict': {
             'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
             'ext': 'mp4',
@@ -63,6 +85,7 @@ class AmazonMiniTVIE(InfoExtractor):
         'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab',
         'only_matching': True,
     }]
+
     _GRAPHQL_QUERY_CONTENT = '''
 query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
   content(
@@ -107,68 +130,13 @@ class AmazonMiniTVIE(InfoExtractor):
   }
 }'''
 
-    def _call_api(self, asin, data=None, note=None):
-        query = {}
-        headers = self._HEADERS.copy()
-        if data:
-            name = 'graphql'
-            data['variables'].update({
-                'clientId': self._CLIENT_ID,
-                'contentType': 'VOD',
-                'deviceLocale': self._DEVICE_LOCALE,
-                'sessionIdToken': self.session_id,
-            })
-            headers.update({'Content-Type': 'application/json'})
-        else:
-            name = 'prs'
-            query.update({
-                'clientId': self._CLIENT_ID,
-                'deviceType': 'A1WMMUXPCUJL4N',
-                'contentId': asin,
-                'deviceLocale': self._DEVICE_LOCALE,
-            })
-
-        resp = self._download_json(
-            f'https://www.amazon.in/minitv/api/web/{name}',
-            asin, query=query, data=json.dumps(data).encode() if data else None,
-            headers=headers, note=note)
-
-        if 'errors' in resp:
-            raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
-
-        if data:
-            resp = resp['data'][data['operationName']]
-        return resp
-
-    def _real_initialize(self):
-        # Download webpage to get the required guest session cookies
-        self._download_webpage(
-            'https://www.amazon.in/minitv',
-            None,
-            headers=self._HEADERS,
-            note='Downloading webpage')
-
-        self.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value
-
     def _real_extract(self, url):
         asin = f'amzn1.dv.gti.{self._match_id(url)}'
-
-        title_info = self._call_api(
-            asin, data={
-                'operationName': 'content',
-                'variables': {
-                    'contentId': asin,
-                },
-                'query': self._GRAPHQL_QUERY_CONTENT,
-            },
-            note='Downloading title info')
-
         prs = self._call_api(asin, note='Downloading playback info')
 
-        formats = []
-        subtitles = {}
+        formats, subtitles = [], {}
         for type_, asset in prs['playbackAssets'].items():
-            if not isinstance(asset, dict):
+            if not traverse_obj(asset, 'manifestUrl'):
                 continue
             if type_ == 'hls':
                 m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
@@ -181,14 +149,16 @@ def _real_extract(self, url):
                     asset['manifestUrl'], asin, mpd_id=type_, fatal=False)
                 formats.extend(mpd_fmts)
                 subtitles = self._merge_subtitles(subtitles, mpd_subs)
+            else:
+                self.report_warning(f'Unknown asset type: {type_}')
 
-        duration = traverse_obj(title_info, ('description', 'contentLengthInSeconds'))
+        title_info = self._call_api(
+            asin, note='Downloading title info', data={
+                'operationName': 'content',
+                'variables': {'contentId': asin},
+                'query': self._GRAPHQL_QUERY_CONTENT,
+            })
         credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
-        chapters = [{
-            'start_time': credits_time,
-            'end_time': duration + credits_time,  # FIXME: I suppose this is correct
-            'title': 'End Credits',
-        }] if credits_time and duration else []
         is_episode = title_info.get('vodType') == 'EPISODE'
 
         return {
@@ -203,8 +173,11 @@ def _real_extract(self, url):
             } for type_, url in (title_info.get('images') or {}).items()],
             'description': traverse_obj(title_info, ('description', 'synopsis')),
             'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)),
-            'duration': duration,
-            'chapters': chapters,
+            'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')),
+            'chapters': [{
+                'start_time': credits_time,
+                'title': 'End Credits',
+            }] if credits_time else [],
             'series': title_info.get('seriesName'),
             'series_id': title_info.get('seriesId'),
             'season_number': title_info.get('seasonNumber'),
@@ -215,7 +188,7 @@ def _real_extract(self, url):
         }
 
 
-class AmazonMiniTVSeasonIE(AmazonMiniTVIE):
+class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
     IE_NAME = 'amazonminitv:season'
     _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
     IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix'
@@ -229,6 +202,7 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVIE):
         'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0',
         'only_matching': True,
     }]
+
     _GRAPHQL_QUERY = '''
 query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) {
   getEpisodes(
@@ -258,25 +232,22 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVIE):
 
     def _entries(self, asin):
         season_info = self._call_api(
-            asin,
-            data={
+            asin, note='Downloading season info', data={
                 'operationName': 'getEpisodes',
-                'variables': {
-                    'episodeOrSeasonId': asin,
-                },
+                'variables': {'episodeOrSeasonId': asin},
                 'query': self._GRAPHQL_QUERY,
-            },
-            note='Downloading season info')
+            })
 
         for episode in season_info['episodes']:
-            yield self.url_result(f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId'])
+            yield self.url_result(
+                f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId'])
 
     def _real_extract(self, url):
         asin = f'amzn1.dv.gti.{self._match_id(url)}'
-        return self.playlist_result(self._entries(asin), playlist_id=asin)
+        return self.playlist_result(self._entries(asin), asin)
 
 
-class AmazonMiniTVSeriesIE(AmazonMiniTVIE):
+class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
     IE_NAME = 'amazonminitv:series'
     _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
     _TESTS = [{
@@ -289,6 +260,7 @@ class AmazonMiniTVSeriesIE(AmazonMiniTVIE):
         'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0',
         'only_matching': True,
     }]
+
     _GRAPHQL_QUERY = '''
 query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) {
   getSeasons(
@@ -304,19 +276,15 @@ class AmazonMiniTVSeriesIE(AmazonMiniTVIE):
 
     def _entries(self, asin):
         season_info = self._call_api(
-            asin,
-            data={
+            asin, note='Downloading series info', data={
                 'operationName': 'getSeasons',
-                'variables': {
-                    'episodeOrSeasonOrSeriesId': asin,
-                },
+                'variables': {'episodeOrSeasonOrSeriesId': asin},
                 'query': self._GRAPHQL_QUERY,
-            },
-            note='Downloading series info')
+            })
 
         for season in season_info['seasons']:
             yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId'])
 
     def _real_extract(self, url):
         asin = f'amzn1.dv.gti.{self._match_id(url)}'
-        return self.playlist_result(self._entries(asin), playlist_id=asin)
+        return self.playlist_result(self._entries(asin), asin)