]> jfr.im git - yt-dlp.git/commitdiff
[bbc] Fix extraction of news articles (#2811)
authorajj8 <redacted>
Thu, 17 Feb 2022 15:54:53 +0000 (15:54 +0000)
committerGitHub <redacted>
Thu, 17 Feb 2022 15:54:53 +0000 (07:54 -0800)
Closes #1374

Authored by: ajj8

yt_dlp/extractor/bbc.py

index 85ab478a65dfc50355523dc05feab5be9cab3522..199a3f8e2cf055ceed03b91d6e2607b7de763238 100644 (file)
@@ -1171,9 +1171,9 @@ def _real_extract(self, url):
                 return self.playlist_result(
                     entries, playlist_id, playlist_title, playlist_description)
 
-        initial_data = self._parse_json(self._search_regex(
-            r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
-            'preload state', default='{}'), playlist_id, fatal=False)
+        initial_data = self._parse_json(self._parse_json(self._search_regex(
+            r'window\.__INITIAL_DATA__\s*=\s*("{.+?}");', webpage,
+            'preload state', default='"{}"'), playlist_id, fatal=False), playlist_id, fatal=False)
         if initial_data:
             def parse_media(media):
                 if not media:
@@ -1214,7 +1214,7 @@ def parse_media(media):
                 if name == 'media-experience':
                     parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
                 elif name == 'article':
-                    for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
+                    for block in (try_get(resp, lambda x: x['data']['content']['model']['blocks'], list) or []):
                         if block.get('type') != 'media':
                             continue
                         parse_media(block.get('model'))