]> jfr.im git - yt-dlp.git/commitdiff
[extractor/Douyin] Fix extraction from webpage
authorbashonly <redacted>
Tue, 27 Jun 2023 21:50:02 +0000 (16:50 -0500)
committerbashonly <redacted>
Tue, 27 Jun 2023 21:50:02 +0000 (16:50 -0500)
Closes #7431
Authored by: bashonly

yt_dlp/extractor/tiktok.py

index 9c6d74007dfb653873517c92ccd8928eb90c6cac..2f491c317027252af585bff977c00a288e50d8c7 100644 (file)
@@ -1015,18 +1015,16 @@ def _real_extract(self, url):
             self.to_screen(f'{e}; trying with webpage')
 
         webpage = self._download_webpage(url, video_id)
-        render_data_json = self._search_regex(
-            r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>(%7B.+%7D)</script>',
-            webpage, 'render data', default=None)
-        if not render_data_json:
+        render_data = self._search_json(
+            r'<script [^>]*\bid=[\'"]RENDER_DATA[\'"][^>]*>', webpage, 'render data', video_id,
+            contains_pattern=r'%7B(?s:.+)%7D', fatal=False, transform_source=compat_urllib_parse_unquote)
+        if not render_data:
             # TODO: Run verification challenge code to generate signature cookies
             cookies = self._get_cookies(self._WEBPAGE_HOST)
             expected = not cookies.get('s_v_web_id') or not cookies.get('ttwid')
             raise ExtractorError(
                 'Fresh cookies (not necessarily logged in) are needed', expected=expected)
 
-        render_data = self._parse_json(
-            render_data_json, video_id, transform_source=compat_urllib_parse_unquote)
         return self._parse_aweme_video_web(get_first(render_data, ('aweme', 'detail')), url, video_id)