[ie/afreecatv:live] Add `cdn` extractor-arg (#9666)

[yt-dlp.git] / yt_dlp / extractor / afreecatv.py
diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py

index 2c33c90dbb6e65d48ae75615cc93867860b7a20a..3e5738f6ab6cb919e69d5cac156cc6bfd9aa55f7 100644 (file)
--- a/yt_dlp/extractor/afreecatv.py
+++ b/yt_dlp/extractor/afreecatv.py
@@ -8,9 +8,11 @@
      determine_ext,
      filter_dict,
      int_or_none,
+    orderedSet,
      unified_timestamp,
      url_or_none,
      urlencode_postdata,
+    urljoin,
  )
  from ..utils.traversal import traverse_obj
  
@@ -276,6 +278,47 @@ class AfreecaTVLiveIE(AfreecaTVBaseIE):
      }]
  
      _LIVE_API_URL = 'https://live.afreecatv.com/afreeca/player_live_api.php'
+    _WORKING_CDNS = [
+        'gcp_cdn',  # live-global-cdn-v02.afreecatv.com
+        'gs_cdn_pc_app',  # pc-app.stream.afreecatv.com
+        'gs_cdn_mobile_web',  # mobile-web.stream.afreecatv.com
+        'gs_cdn_pc_web',  # pc-web.stream.afreecatv.com
+    ]
+    _BAD_CDNS = [
+        'gs_cdn',  # chromecast.afreeca.gscdn.com (cannot resolve)
+        'gs_cdn_chromecast',  # chromecast.stream.afreecatv.com (HTTP Error 400)
+        'azure_cdn',  # live-global-cdn-v01.afreecatv.com (cannot resolve)
+        'aws_cf',  # live-global-cdn-v03.afreecatv.com (cannot resolve)
+        'kt_cdn',  # kt.stream.afreecatv.com (HTTP Error 400)
+    ]
+
+    def _extract_formats(self, channel_info, broadcast_no, aid):
+        stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
+
+        # If user has not passed CDN IDs, try API-provided CDN ID followed by other working CDN IDs
+        default_cdn_ids = orderedSet([
+            *traverse_obj(channel_info, ('CDN', {str}, all, lambda _, v: v not in self._BAD_CDNS)),
+            *self._WORKING_CDNS,
+        ])
+        cdn_ids = self._configuration_arg('cdn', default_cdn_ids)
+
+        for attempt, cdn_id in enumerate(cdn_ids, start=1):
+            m3u8_url = traverse_obj(self._download_json(
+                urljoin(stream_base_url, 'broad_stream_assign.html'), broadcast_no,
+                f'Downloading {cdn_id} stream info', f'Unable to download {cdn_id} stream info',
+                fatal=False, query={
+                    'return_type': cdn_id,
+                    'broad_key': f'{broadcast_no}-common-master-hls',
+                }), ('view_url', {url_or_none}))
+            try:
+                return self._extract_m3u8_formats(
+                    m3u8_url, broadcast_no, 'mp4', m3u8_id='hls', query={'aid': aid},
+                    headers={'Referer': 'https://play.afreecatv.com/'})
+            except ExtractorError as e:
+                if attempt == len(cdn_ids):
+                    raise
+                self.report_warning(
+                    f'{e.cause or e.msg}. Retrying... (attempt {attempt} of {len(cdn_ids)})')
  
      def _real_extract(self, url):
          broadcaster_id, broadcast_no = self._match_valid_url(url).group('id', 'bno')
@@ -294,7 +337,7 @@ def _real_extract(self, url):
                  'This livestream is protected by a password, use the --video-password option',
                  expected=True)
  
-        aid = self._download_json(
+        token_info = traverse_obj(self._download_json(
              self._LIVE_API_URL, broadcast_no, 'Downloading access token for stream',
              'Unable to download access token for stream', data=urlencode_postdata(filter_dict({
                  'bno': broadcast_no,
@@ -302,18 +345,17 @@ def _real_extract(self, url):
                  'type': 'aid',
                  'quality': 'master',
                  'pwd': password,
-            })))['CHANNEL']['AID']
-
-        stream_base_url = channel_info.get('RMD') or 'https://livestream-manager.afreecatv.com'
-        stream_info = self._download_json(f'{stream_base_url}/broad_stream_assign.html', broadcast_no, query={
-            # works: gs_cdn_pc_app, gs_cdn_mobile_web, gs_cdn_pc_web
-            'return_type': 'gs_cdn_pc_app',
-            'broad_key': f'{broadcast_no}-common-master-hls',
-        }, note='Downloading metadata for stream', errnote='Unable to download metadata for stream')
-
-        formats = self._extract_m3u8_formats(
-            stream_info['view_url'], broadcast_no, 'mp4', m3u8_id='hls',
-            query={'aid': aid}, headers={'Referer': url})
+            }))), ('CHANNEL', {dict})) or {}
+        aid = token_info.get('AID')
+        if not aid:
+            result = token_info.get('RESULT')
+            if result == 0:
+                raise ExtractorError('This livestream has ended', expected=True)
+            elif result == -6:
+                self.raise_login_required('This livestream is for subscribers only', method='password')
+            raise ExtractorError('Unable to extract access token')
+
+        formats = self._extract_formats(channel_info, broadcast_no, aid)
  
          station_info = traverse_obj(self._download_json(
              'https://st.afreecatv.com/api/get_station_status.php', broadcast_no,