[bbc] Fix extraction of news articles (#2811)

author ajj8 <redacted>

Thu, 17 Feb 2022 15:54:53 +0000 (15:54 +0000)

committer GitHub <redacted>

Thu, 17 Feb 2022 15:54:53 +0000 (07:54 -0800)
author ajj8 <redacted>
Thu, 17 Feb 2022 15:54:53 +0000 (15:54 +0000)
committer GitHub <redacted>
Thu, 17 Feb 2022 15:54:53 +0000 (07:54 -0800)
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py

index 85ab478a65dfc50355523dc05feab5be9cab3522..199a3f8e2cf055ceed03b91d6e2607b7de763238 100644 (file)
--- a/yt_dlp/extractor/bbc.py
+++ b/yt_dlp/extractor/bbc.py
@@ -1171,9 +1171,9 @@ def _real_extract(self, url):
                  return self.playlist_result(
                      entries, playlist_id, playlist_title, playlist_description)
  
-        initial_data = self._parse_json(self._search_regex(
-            r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
-            'preload state', default='{}'), playlist_id, fatal=False)
+        initial_data = self._parse_json(self._parse_json(self._search_regex(
+            r'window\.__INITIAL_DATA__\s*=\s*("{.+?}");', webpage,
+            'preload state', default='"{}"'), playlist_id, fatal=False), playlist_id, fatal=False)
          if initial_data:
              def parse_media(media):
                  if not media:
@@ -1214,7 +1214,7 @@ def parse_media(media):
                  if name == 'media-experience':
                      parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
                  elif name == 'article':
-                    for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
+                    for block in (try_get(resp, lambda x: x['data']['content']['model']['blocks'], list) or []):
                          if block.get('type') != 'media':
                              continue
                          parse_media(block.get('model'))
author	ajj8 <redacted>
	Thu, 17 Feb 2022 15:54:53 +0000 (15:54 +0000)
committer	GitHub <redacted>
	Thu, 17 Feb 2022 15:54:53 +0000 (07:54 -0800)