]> jfr.im git - yt-dlp.git/commitdiff
Fix bugs related to `sanitize_info`
authorpukkandan <redacted>
Sat, 7 Aug 2021 15:46:55 +0000 (21:16 +0530)
committerpukkandan <redacted>
Sat, 7 Aug 2021 15:46:55 +0000 (21:16 +0530)
Related: https://github.com/yt-dlp/yt-dlp/commit/8012d892bd38af731357a61e071e0a0d01bc41b4#r54555230

test/test_YoutubeDL.py
yt_dlp/YoutubeDL.py
yt_dlp/utils.py

index 9a0b286e24f82ea315502af6a63843ef7988732f..1e086510229dd03f1a28f5465e732265aa56a7f5 100644 (file)
@@ -18,7 +18,7 @@
 from yt_dlp.extractor import YoutubeIE
 from yt_dlp.extractor.common import InfoExtractor
 from yt_dlp.postprocessor.common import PostProcessor
-from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func
+from yt_dlp.utils import ExtractorError, int_or_none, match_filter_func, LazyList
 
 TEST_URL = 'http://localhost/sample.mp4'
 
@@ -678,10 +678,17 @@ def test(tmpl, expected, *, info=None, **params):
                 self.assertEqual(out, expected[0])
                 self.assertEqual(fname, expected[1])
 
+        # Side-effects
+        original_infodict = dict(self.outtmpl_info)
+        test('foo.bar', 'foo.bar')
+        original_infodict['epoch'] = self.outtmpl_info.get('epoch')
+        self.assertTrue(isinstance(original_infodict['epoch'], int))
+        test('%(epoch)d', int_or_none)
+        self.assertEqual(original_infodict, self.outtmpl_info)
+
         # Auto-generated fields
         test('%(id)s.%(ext)s', '1234.mp4')
         test('%(duration_string)s', ('27:46:40', '27-46-40'))
-        test('%(epoch)d', int_or_none)
         test('%(resolution)s', '1080p')
         test('%(playlist_index)s', '001')
         test('%(autonumber)s', '00001')
@@ -774,6 +781,12 @@ def test(tmpl, expected, *, info=None, **params):
         test('%(formats.0.id.-1+id)f', '1235.000000')
         test('%(formats.0.id.-1+formats.1.id.-1)d', '3')
 
+        # Laziness
+        def gen():
+            yield from range(5)
+            raise self.assertTrue(False, 'LazyList should not be evaluated till here')
+        test('%(key.4)s', '4', info={'key': LazyList(gen())})
+
         # Empty filename
         test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4')
         # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4'))  # fixme
index 978f43054481c7ec42d3de70a73432b36451a0e5..50380872721eb138bd16288c6d09448e1223b052 100644 (file)
@@ -887,14 +887,16 @@ def validate_outtmpl(cls, outtmpl):
 
     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
         """ Make the template and info_dict suitable for substitution : ydl.outtmpl_escape(outtmpl) % info_dict """
-        info_dict = dict(info_dict)
+        info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
         na = self.params.get('outtmpl_na_placeholder', 'NA')
 
+        info_dict = dict(info_dict)  # Do not sanitize so as not to consume LazyList
+        for key in ('__original_infodict', '__postprocessors'):
+            info_dict.pop(key, None)
         info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
             if info_dict.get('duration', None) is not None
             else None)
-        info_dict['epoch'] = int(time.time())
         info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
         if info_dict.get('resolution') is None:
             info_dict['resolution'] = self.format_resolution(info_dict, default=None)
@@ -964,6 +966,11 @@ def get_value(mdict):
 
             return value
 
+        def _dumpjson_default(obj):
+            if isinstance(obj, (set, LazyList)):
+                return list(obj)
+            raise TypeError(f'Object of type {type(obj).__name__} is not JSON serializable')
+
         def create_key(outer_mobj):
             if not outer_mobj.group('has_key'):
                 return f'%{outer_mobj.group(0)}'
@@ -988,7 +995,7 @@ def create_key(outer_mobj):
             if fmt[-1] == 'l':
                 value, fmt = ', '.join(variadic(value)), str_fmt
             elif fmt[-1] == 'j':
-                value, fmt = json.dumps(value), str_fmt
+                value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt
             elif fmt[-1] == 'q':
                 value, fmt = compat_shlex_quote(str(value)), str_fmt
             elif fmt[-1] == 'c':
@@ -2386,7 +2393,7 @@ def print_optional(field):
 
         if self.params.get('forcejson', False):
             self.post_extract(info_dict)
-            self.to_stdout(json.dumps(self.sanitize_info(info_dict), default=repr))
+            self.to_stdout(json.dumps(self.sanitize_info(info_dict)))
 
     def dl(self, name, info, subtitle=False, test=False):
 
@@ -2861,7 +2868,7 @@ def download(self, url_list):
             else:
                 if self.params.get('dump_single_json', False):
                     self.post_extract(res)
-                    self.to_stdout(json.dumps(self.filter_requested_info(res), default=repr))
+                    self.to_stdout(json.dumps(self.sanitize_info(res)))
 
         return self._download_retcode
 
@@ -2885,15 +2892,18 @@ def download_with_info_file(self, info_filename):
     @staticmethod
     def sanitize_info(info_dict, remove_private_keys=False):
         ''' Sanitize the infodict for converting to json '''
-        remove_keys = ['__original_infodict']  # Always remove this since this may contain a copy of the entire dict
+        info_dict.setdefault('epoch', int(time.time()))
+        remove_keys = {'__original_infodict'}  # Always remove this since this may contain a copy of the entire dict
         keep_keys = ['_type'],  # Always keep this to facilitate load-info-json
         if remove_private_keys:
-            remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
+            remove_keys |= {
+                'requested_formats', 'requested_subtitles', 'requested_entries',
+                'filepath', 'entries', 'original_url', 'playlist_autonumber',
+            }
             empty_values = (None, {}, [], set(), tuple())
             reject = lambda k, v: k not in keep_keys and (
                 k.startswith('_') or k in remove_keys or v in empty_values)
         else:
-            info_dict['epoch'] = int(time.time())
             reject = lambda k, v: k in remove_keys
         filter_fn = lambda obj: (
             list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
index a6e61313984be34813af02a53fb692a3a4b675f1..fd13febd6da5ef0822231661f4dca579fda43833 100644 (file)
@@ -1836,7 +1836,7 @@ def write_json_file(obj, fn):
 
     try:
         with tf:
-            json.dump(obj, tf, default=repr)
+            json.dump(obj, tf)
         if sys.platform == 'win32':
             # Need to remove existing file on Windows, else os.rename raises
             # WindowsError or FileExistsError.