]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/ccma.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / ccma.py
1 from .common import InfoExtractor
2 from ..utils import (
3 clean_html,
4 determine_ext,
5 int_or_none,
6 parse_duration,
7 parse_resolution,
8 try_get,
9 unified_timestamp,
10 url_or_none,
11 )
12
13
14 class CCMAIE(InfoExtractor):
15 _VALID_URL = r'https?://(?:www\.)?ccma\.cat/(?:[^/]+/)*?(?P<type>video|audio)/(?P<id>\d+)'
16 _TESTS = [{
17 'url': 'http://www.ccma.cat/tv3/alacarta/lespot-de-la-marato-de-tv3/lespot-de-la-marato-de-tv3/video/5630208/',
18 'md5': '7296ca43977c8ea4469e719c609b0871',
19 'info_dict': {
20 'id': '5630208',
21 'ext': 'mp4',
22 'title': 'L\'espot de La Marató de TV3',
23 'description': 'md5:f12987f320e2f6e988e9908e4fe97765',
24 'timestamp': 1478608140,
25 'upload_date': '20161108',
26 'age_limit': 0,
27 },
28 }, {
29 'url': 'http://www.ccma.cat/catradio/alacarta/programa/el-consell-de-savis-analitza-el-derbi/audio/943685/',
30 'md5': 'fa3e38f269329a278271276330261425',
31 'info_dict': {
32 'id': '943685',
33 'ext': 'mp3',
34 'title': 'El Consell de Savis analitza el derbi',
35 'description': 'md5:e2a3648145f3241cb9c6b4b624033e53',
36 'upload_date': '20170512',
37 'timestamp': 1494622500,
38 'vcodec': 'none',
39 'categories': ['Esports'],
40 },
41 }, {
42 'url': 'http://www.ccma.cat/tv3/alacarta/crims/crims-josep-tallada-lespereu-me-capitol-1/video/6031387/',
43 'md5': 'b43c3d3486f430f3032b5b160d80cbc3',
44 'info_dict': {
45 'id': '6031387',
46 'ext': 'mp4',
47 'title': 'Crims - Josep Talleda, l\'"Espereu-me" (capítol 1)',
48 'description': 'md5:7cbdafb640da9d0d2c0f62bad1e74e60',
49 'timestamp': 1582577700,
50 'upload_date': '20200224',
51 'subtitles': 'mincount:4',
52 'age_limit': 16,
53 'series': 'Crims',
54 },
55 }]
56
57 def _real_extract(self, url):
58 media_type, media_id = self._match_valid_url(url).groups()
59
60 media = self._download_json(
61 'http://dinamics.ccma.cat/pvideo/media.jsp', media_id, query={
62 'media': media_type,
63 'idint': media_id,
64 'format': 'dm',
65 })
66
67 formats = []
68 media_url = media['media']['url']
69 if isinstance(media_url, list):
70 for format_ in media_url:
71 format_url = url_or_none(format_.get('file'))
72 if not format_url:
73 continue
74 if determine_ext(format_url) == 'mpd':
75 formats.extend(self._extract_mpd_formats(
76 format_url, media_id, mpd_id='dash', fatal=False))
77 continue
78 label = format_.get('label')
79 f = parse_resolution(label)
80 f.update({
81 'url': format_url,
82 'format_id': label,
83 })
84 formats.append(f)
85 else:
86 formats.append({
87 'url': media_url,
88 'vcodec': 'none' if media_type == 'audio' else None,
89 })
90
91 informacio = media['informacio']
92 title = informacio['titol']
93 durada = informacio.get('durada') or {}
94 duration = int_or_none(durada.get('milisegons'), 1000) or parse_duration(durada.get('text'))
95 tematica = try_get(informacio, lambda x: x['tematica']['text'])
96
97 data_utc = try_get(informacio, lambda x: x['data_emissio']['utc'])
98 timestamp = unified_timestamp(data_utc)
99
100 subtitles = {}
101 subtitols = media.get('subtitols') or []
102 if isinstance(subtitols, dict):
103 subtitols = [subtitols]
104 for st in subtitols:
105 sub_url = st.get('url')
106 if sub_url:
107 subtitles.setdefault(
108 st.get('iso') or st.get('text') or 'ca', []).append({
109 'url': sub_url,
110 })
111
112 thumbnails = []
113 imatges = media.get('imatges', {})
114 if imatges:
115 thumbnail_url = imatges.get('url')
116 if thumbnail_url:
117 thumbnails = [{
118 'url': thumbnail_url,
119 'width': int_or_none(imatges.get('amplada')),
120 'height': int_or_none(imatges.get('alcada')),
121 }]
122
123 age_limit = None
124 codi_etic = try_get(informacio, lambda x: x['codi_etic']['id'])
125 if codi_etic:
126 codi_etic_s = codi_etic.split('_')
127 if len(codi_etic_s) == 2:
128 if codi_etic_s[1] == 'TP':
129 age_limit = 0
130 else:
131 age_limit = int_or_none(codi_etic_s[1])
132
133 return {
134 'id': media_id,
135 'title': title,
136 'description': clean_html(informacio.get('descripcio')),
137 'duration': duration,
138 'timestamp': timestamp,
139 'thumbnails': thumbnails,
140 'subtitles': subtitles,
141 'formats': formats,
142 'age_limit': age_limit,
143 'alt_title': informacio.get('titol_complet'),
144 'episode_number': int_or_none(informacio.get('capitol')),
145 'categories': [tematica] if tematica else None,
146 'series': informacio.get('programa'),
147 }