[extractor] Extract thumbnails from JSON-LD (#2195)

author nixxo <redacted>

Sat, 1 Jan 2022 19:50:27 +0000 (20:50 +0100)

committer GitHub <redacted>

Sat, 1 Jan 2022 19:50:27 +0000 (01:20 +0530)
author nixxo <redacted>
Sat, 1 Jan 2022 19:50:27 +0000 (20:50 +0100)
committer GitHub <redacted>
Sat, 1 Jan 2022 19:50:27 +0000 (01:20 +0530)
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py

index cf06dbde46d2130869e8a6d8e6cd0bcbc7701bb3..25bc00e0b94921c46bfb9508cd831ea06537b6a2 100644 (file)
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -208,6 +208,32 @@ def test_search_json_ld_realworld(self):
                  },
                  {'expected_type': 'NewsArticle'},
              ),
+            (
+                # test multiple thumbnails in a list
+                r'''
+<script type="application/ld+json">
+{"@context":"https://schema.org",
+"@type":"VideoObject",
+"thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]}
+</script>''',
+                {
+                    'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+                },
+                {},
+            ),
+            (
+                # test single thumbnail
+                r'''
+<script type="application/ld+json">
+{"@context":"https://schema.org",
+"@type":"VideoObject",
+"thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"}
+</script>''',
+                {
+                    'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+                },
+                {},
+            )
          ]
          for html, expected_dict, search_json_ld_kwargs in _TESTS:
              expect_dict(
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index 3821b718351db76566c79b19ae910da6b3f43012..1d694293e07681c150e649c9894eacce20f2b933 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1436,7 +1436,8 @@ def extract_video_object(e):
                  'url': url_or_none(e.get('contentUrl')),
                  'title': unescapeHTML(e.get('name')),
                  'description': unescapeHTML(e.get('description')),
-                'thumbnail': url_or_none(e.get('thumbnailUrl') or e.get('thumbnailURL')),
+                'thumbnails': [{'url': url_or_none(url)}
+                               for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))],
                  'duration': parse_duration(e.get('duration')),
                  'timestamp': unified_timestamp(e.get('uploadDate')),
                  # author can be an instance of 'Organization' or 'Person' types.
author	nixxo <redacted>
	Sat, 1 Jan 2022 19:50:27 +0000 (20:50 +0100)
committer	GitHub <redacted>
	Sat, 1 Jan 2022 19:50:27 +0000 (01:20 +0530)
test/test_InfoExtractor.py		patch \| blob \| blame \| history
yt_dlp/extractor/common.py		patch \| blob \| blame \| history