]>
Commit | Line | Data |
---|---|---|
c02ec7d4 | 1 | from .common import InfoExtractor |
998dffb5 G |
2 | from ..utils import int_or_none, parse_iso8601, str_or_none, url_or_none |
3 | from ..utils.traversal import traverse_obj | |
c02ec7d4 | 4 | |
5 | ||
998dffb5 G |
6 | class CNBCVideoIE(InfoExtractor): |
7 | _VALID_URL = r'https?://(?:www\.)?cnbc\.com/video/(?:[^/?#]+/)+(?P<id>[^./?#&]+)\.html' | |
8 | ||
9 | _TESTS = [{ | |
10 | 'url': 'https://www.cnbc.com/video/2023/12/07/mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand.html', | |
c02ec7d4 | 11 | 'info_dict': { |
c02ec7d4 | 12 | 'ext': 'mp4', |
998dffb5 G |
13 | 'id': '107344774', |
14 | 'display_id': 'mcdonalds-just-unveiled-cosmcsits-new-spinoff-brand', | |
15 | 'modified_timestamp': 1702053483, | |
16 | 'timestamp': 1701977810, | |
17 | 'channel': 'News Videos', | |
18 | 'upload_date': '20231207', | |
19 | 'description': 'md5:882c001d85cb43d7579b514307b3e78b', | |
20 | 'release_timestamp': 1701977375, | |
21 | 'modified_date': '20231208', | |
22 | 'release_date': '20231207', | |
23 | 'duration': 65, | |
f4f9f6d0 | 24 | 'creators': ['Sean Conlon'], |
998dffb5 G |
25 | 'title': 'Here\'s a first look at McDonald\'s new spinoff brand, CosMc\'s', |
26 | 'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107344192-1701894812493-CosMcsskyHero_2336x1040_hero-desktop.jpg?v=1701894855', | |
ce548296 | 27 | }, |
998dffb5 G |
28 | 'expected_warnings': ['Unable to download f4m manifest'], |
29 | }, { | |
30 | 'url': 'https://www.cnbc.com/video/2023/12/08/jim-cramer-shares-his-take-on-seattles-tech-scene.html', | |
ffa7b2bf | 31 | 'info_dict': { |
f4f9f6d0 | 32 | 'creators': ['Jim Cramer'], |
998dffb5 G |
33 | 'channel': 'Mad Money with Jim Cramer', |
34 | 'description': 'md5:72925be21b952e95eba51178dddf4e3e', | |
35 | 'duration': 299.0, | |
ffa7b2bf | 36 | 'ext': 'mp4', |
998dffb5 G |
37 | 'id': '107345451', |
38 | 'display_id': 'jim-cramer-shares-his-take-on-seattles-tech-scene', | |
39 | 'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345481-1702079431MM-B-120823.jpg?v=1702079430', | |
40 | 'timestamp': 1702080139, | |
41 | 'title': 'Jim Cramer shares his take on Seattle\'s tech scene', | |
42 | 'release_date': '20231208', | |
43 | 'upload_date': '20231209', | |
44 | 'modified_timestamp': 1702080139, | |
45 | 'modified_date': '20231209', | |
46 | 'release_timestamp': 1702073551, | |
ffa7b2bf | 47 | }, |
998dffb5 G |
48 | 'expected_warnings': ['Unable to download f4m manifest'], |
49 | }, { | |
50 | 'url': 'https://www.cnbc.com/video/2023/12/08/the-epicenter-of-ai-is-in-seattle-says-jim-cramer.html', | |
51 | 'info_dict': { | |
f4f9f6d0 | 52 | 'creators': ['Jim Cramer'], |
998dffb5 G |
53 | 'channel': 'Mad Money with Jim Cramer', |
54 | 'description': 'md5:72925be21b952e95eba51178dddf4e3e', | |
55 | 'duration': 113.0, | |
56 | 'ext': 'mp4', | |
57 | 'id': '107345474', | |
58 | 'display_id': 'the-epicenter-of-ai-is-in-seattle-says-jim-cramer', | |
59 | 'thumbnail': 'https://image.cnbcfm.com/api/v1/image/107345486-Screenshot_2023-12-08_at_70339_PM.png?v=1702080248', | |
60 | 'timestamp': 1702080535, | |
61 | 'title': 'The epicenter of AI is in Seattle, says Jim Cramer', | |
62 | 'release_timestamp': 1702077347, | |
63 | 'modified_timestamp': 1702080535, | |
64 | 'release_date': '20231208', | |
65 | 'upload_date': '20231209', | |
66 | 'modified_date': '20231209', | |
ffa7b2bf | 67 | }, |
998dffb5 G |
68 | 'expected_warnings': ['Unable to download f4m manifest'], |
69 | }] | |
ffa7b2bf | 70 | |
ffa7b2bf | 71 | def _real_extract(self, url): |
998dffb5 G |
72 | display_id = self._match_id(url) |
73 | webpage = self._download_webpage(url, display_id) | |
74 | data = self._search_json(r'window\.__s_data=', webpage, 'video data', display_id) | |
75 | ||
76 | player_data = traverse_obj(data, ( | |
77 | 'page', 'page', 'layout', ..., 'columns', ..., 'modules', | |
78 | lambda _, v: v['name'] == 'clipPlayer', 'data', {dict}), get_all=False) | |
79 | ||
80 | return { | |
81 | 'id': display_id, | |
82 | 'display_id': display_id, | |
83 | 'formats': self._extract_akamai_formats(player_data['playbackURL'], display_id), | |
84 | **self._search_json_ld(webpage, display_id, fatal=False), | |
85 | **traverse_obj(player_data, { | |
86 | 'id': ('id', {str_or_none}), | |
87 | 'title': ('title', {str}), | |
88 | 'description': ('description', {str}), | |
f4f9f6d0 | 89 | 'creators': ('author', ..., 'name', {str}), |
998dffb5 G |
90 | 'timestamp': ('datePublished', {parse_iso8601}), |
91 | 'release_timestamp': ('uploadDate', {parse_iso8601}), | |
92 | 'modified_timestamp': ('dateLastPublished', {parse_iso8601}), | |
93 | 'thumbnail': ('thumbnail', {url_or_none}), | |
94 | 'duration': ('duration', {int_or_none}), | |
95 | 'channel': ('section', 'title', {str}), | |
f4f9f6d0 | 96 | }), |
998dffb5 | 97 | } |