]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/ninecninemedia.py
[cleanup] Use `_html_extract_title`
[yt-dlp.git] / yt_dlp / extractor / ninecninemedia.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6 float_or_none,
7 int_or_none,
8 parse_iso8601,
9 try_get,
10 )
11
12
13 class NineCNineMediaIE(InfoExtractor):
14 IE_NAME = '9c9media'
15 _GEO_COUNTRIES = ['CA']
16 _VALID_URL = r'9c9media:(?P<destination_code>[^:]+):(?P<id>\d+)'
17 _API_BASE_TEMPLATE = 'http://capi.9c9media.com/destinations/%s/platforms/desktop/contents/%s/'
18
19 def _real_extract(self, url):
20 destination_code, content_id = self._match_valid_url(url).groups()
21 api_base_url = self._API_BASE_TEMPLATE % (destination_code, content_id)
22 content = self._download_json(api_base_url, content_id, query={
23 '$include': '[Media.Name,Season,ContentPackages.Duration,ContentPackages.Id]',
24 })
25 title = content['Name']
26 content_package = content['ContentPackages'][0]
27 package_id = content_package['Id']
28 content_package_url = api_base_url + 'contentpackages/%s/' % package_id
29 content_package = self._download_json(
30 content_package_url, content_id, query={
31 '$include': '[HasClosedCaptions]',
32 })
33
34 if (not self.get_param('allow_unplayable_formats')
35 and try_get(content_package, lambda x: x['Constraints']['Security']['Type'])):
36 self.report_drm(content_id)
37
38 manifest_base_url = content_package_url + 'manifest.'
39 formats = []
40 formats.extend(self._extract_m3u8_formats(
41 manifest_base_url + 'm3u8', content_id, 'mp4',
42 'm3u8_native', m3u8_id='hls', fatal=False))
43 formats.extend(self._extract_f4m_formats(
44 manifest_base_url + 'f4m', content_id,
45 f4m_id='hds', fatal=False))
46 formats.extend(self._extract_mpd_formats(
47 manifest_base_url + 'mpd', content_id,
48 mpd_id='dash', fatal=False))
49 self._sort_formats(formats)
50
51 thumbnails = []
52 for image in (content.get('Images') or []):
53 image_url = image.get('Url')
54 if not image_url:
55 continue
56 thumbnails.append({
57 'url': image_url,
58 'width': int_or_none(image.get('Width')),
59 'height': int_or_none(image.get('Height')),
60 })
61
62 tags, categories = [], []
63 for source_name, container in (('Tags', tags), ('Genres', categories)):
64 for e in content.get(source_name, []):
65 e_name = e.get('Name')
66 if not e_name:
67 continue
68 container.append(e_name)
69
70 season = content.get('Season') or {}
71
72 info = {
73 'id': content_id,
74 'title': title,
75 'description': content.get('Desc') or content.get('ShortDesc'),
76 'timestamp': parse_iso8601(content.get('BroadcastDateTime')),
77 'episode_number': int_or_none(content.get('Episode')),
78 'season': season.get('Name'),
79 'season_number': int_or_none(season.get('Number')),
80 'season_id': season.get('Id'),
81 'series': try_get(content, lambda x: x['Media']['Name']),
82 'tags': tags,
83 'categories': categories,
84 'duration': float_or_none(content_package.get('Duration')),
85 'formats': formats,
86 'thumbnails': thumbnails,
87 }
88
89 if content_package.get('HasClosedCaptions'):
90 info['subtitles'] = {
91 'en': [{
92 'url': manifest_base_url + 'vtt',
93 'ext': 'vtt',
94 }, {
95 'url': manifest_base_url + 'srt',
96 'ext': 'srt',
97 }]
98 }
99
100 return info
101
102
103 class CPTwentyFourIE(InfoExtractor):
104 IE_NAME = 'cp24'
105 _GEO_COUNTRIES = ['CA']
106 _VALID_URL = r'https?://(?:www\.)?cp24\.com/news/(?P<id>[^?#]+)'
107
108 _TESTS = [{
109 'url': 'https://www.cp24.com/news/video-shows-atm-being-ripped-out-of-business-by-pickup-truck-driver-in-mississauga-1.5676877',
110 'info_dict': {
111 'id': '2328005',
112 'ext': 'mp4',
113 'title': 'WATCH: Truck rips ATM from Mississauga business',
114 'description': 'md5:cf7498480885f080a754389a2b2f7073',
115 'timestamp': 1637618377,
116 'episode_number': None,
117 'season': 'Season 0',
118 'season_number': 0,
119 'season_id': 57974,
120 'series': 'CTV News Toronto',
121 'duration': 26.86,
122 'thumbnail': 'http://images2.9c9media.com/image_asset/2014_11_5_2eb609a0-475b-0132-fbd6-34b52f6f1279_jpg_2000x1125.jpg',
123 'upload_date': '20211122',
124 },
125 'params': {'skip_download': True, 'format': 'bv'}
126 }]
127
128 def _real_extract(self, url):
129 display_id = self._match_id(url)
130 webpage = self._download_webpage(url, display_id)
131 id, destination = self._search_regex(
132 r'getAuthStates\("(?P<id>[^"]+)",\s?"(?P<destination>[^"]+)"\);',
133 webpage, 'video id and destination', group=('id', 'destination'))
134 return self.url_result(f'9c9media:{destination}:{id}', ie=NineCNineMediaIE.ie_key(), video_id=id)