]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/cbssports.py
[generic] Extract subtitles from video.js (#3156)
[yt-dlp.git] / yt_dlp / extractor / cbssports.py
1 from __future__ import unicode_literals
2
3
4 # from .cbs import CBSBaseIE
5 from .common import InfoExtractor
6 from ..utils import (
7 int_or_none,
8 try_get,
9 )
10
11
12 # class CBSSportsEmbedIE(CBSBaseIE):
13 class CBSSportsEmbedIE(InfoExtractor):
14 IE_NAME = 'cbssports:embed'
15 _VALID_URL = r'''(?ix)https?://(?:(?:www\.)?cbs|embed\.247)sports\.com/player/embed.+?
16 (?:
17 ids%3D(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})|
18 pcid%3D(?P<pcid>\d+)
19 )'''
20 _TESTS = [{
21 'url': 'https://www.cbssports.com/player/embed/?args=player_id%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26ids%3Db56c03a6-231a-4bbe-9c55-af3c8a8e9636%26resizable%3D1%26autoplay%3Dtrue%26domain%3Dcbssports.com%26comp_ads_enabled%3Dfalse%26watchAndRead%3D0%26startTime%3D0%26env%3Dprod',
22 'only_matching': True,
23 }, {
24 'url': 'https://embed.247sports.com/player/embed/?args=%3fplayer_id%3d1827823171591%26channel%3dcollege-football-recruiting%26pcid%3d1827823171591%26width%3d640%26height%3d360%26autoplay%3dTrue%26comp_ads_enabled%3dFalse%26uvpc%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_v4%2526partner%253d247%26uvpc_m%3dhttps%253a%252f%252fwww.cbssports.com%252fapi%252fcontent%252fvideo%252fconfig%252f%253fcfg%253duvp_247sports_m_v4%2526partner_m%253d247_mobile%26utag%3d247sportssite%26resizable%3dTrue',
25 'only_matching': True,
26 }]
27
28 # def _extract_video_info(self, filter_query, video_id):
29 # return self._extract_feed_info('dJ5BDC', 'VxxJg8Ymh8sE', filter_query, video_id)
30
31 def _real_extract(self, url):
32 uuid, pcid = self._match_valid_url(url).groups()
33 query = {'id': uuid} if uuid else {'pcid': pcid}
34 video = self._download_json(
35 'https://www.cbssports.com/api/content/video/',
36 uuid or pcid, query=query)[0]
37 video_id = video['id']
38 title = video['title']
39 metadata = video.get('metaData') or {}
40 # return self._extract_video_info('byId=%d' % metadata['mpxOutletId'], video_id)
41 # return self._extract_video_info('byGuid=' + metadata['mpxRefId'], video_id)
42
43 formats = self._extract_m3u8_formats(
44 metadata['files'][0]['url'], video_id, 'mp4',
45 'm3u8_native', m3u8_id='hls', fatal=False)
46 self._sort_formats(formats)
47
48 image = video.get('image')
49 thumbnails = None
50 if image:
51 image_path = image.get('path')
52 if image_path:
53 thumbnails = [{
54 'url': image_path,
55 'width': int_or_none(image.get('width')),
56 'height': int_or_none(image.get('height')),
57 'filesize': int_or_none(image.get('size')),
58 }]
59
60 return {
61 'id': video_id,
62 'title': title,
63 'formats': formats,
64 'thumbnails': thumbnails,
65 'description': video.get('description'),
66 'timestamp': int_or_none(try_get(video, lambda x: x['dateCreated']['epoch'])),
67 'duration': int_or_none(metadata.get('duration')),
68 }
69
70
71 class CBSSportsBaseIE(InfoExtractor):
72 def _real_extract(self, url):
73 display_id = self._match_id(url)
74 webpage = self._download_webpage(url, display_id)
75 iframe_url = self._search_regex(
76 r'<iframe[^>]+(?:data-)?src="(https?://[^/]+/player/embed[^"]+)"',
77 webpage, 'embed url')
78 return self.url_result(iframe_url, CBSSportsEmbedIE.ie_key())
79
80
81 class CBSSportsIE(CBSSportsBaseIE):
82 IE_NAME = 'cbssports'
83 _VALID_URL = r'https?://(?:www\.)?cbssports\.com/[^/]+/video/(?P<id>[^/?#&]+)'
84 _TESTS = [{
85 'url': 'https://www.cbssports.com/college-football/video/cover-3-stanford-spring-gleaning/',
86 'info_dict': {
87 'id': 'b56c03a6-231a-4bbe-9c55-af3c8a8e9636',
88 'ext': 'mp4',
89 'title': 'Cover 3: Stanford Spring Gleaning',
90 'description': 'The Cover 3 crew break down everything you need to know about the Stanford Cardinal this spring.',
91 'timestamp': 1617218398,
92 'upload_date': '20210331',
93 'duration': 502,
94 },
95 }]
96
97
98 class TwentyFourSevenSportsIE(CBSSportsBaseIE):
99 IE_NAME = '247sports'
100 _VALID_URL = r'https?://(?:www\.)?247sports\.com/Video/(?:[^/?#&]+-)?(?P<id>\d+)'
101 _TESTS = [{
102 'url': 'https://247sports.com/Video/2021-QB-Jake-Garcia-senior-highlights-through-five-games-10084854/',
103 'info_dict': {
104 'id': '4f1265cb-c3b5-44a8-bb1d-1914119a0ccc',
105 'ext': 'mp4',
106 'title': '2021 QB Jake Garcia senior highlights through five games',
107 'description': 'md5:8cb67ebed48e2e6adac1701e0ff6e45b',
108 'timestamp': 1607114223,
109 'upload_date': '20201204',
110 'duration': 208,
111 },
112 }]