]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/srgssr.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
17 class SRGSSRIE(InfoExtractor
):
20 https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|
34 _GEO_COUNTRIES
= ['CH']
37 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
38 'AGERATING18': 'To protect children under the age of 18, this video is only available between 11 p.m. and 5 a.m.',
39 # 'ENDDATE': 'For legal reasons, this video was only available for a specified period of time.',
40 'GEOBLOCK': 'For legal reasons, this video is only available in Switzerland.',
41 'LEGAL': 'The video cannot be transmitted for legal reasons.',
42 'STARTDATE': 'This video is not yet available. Please try again later.',
44 _DEFAULT_LANGUAGE_CODES
= {
52 def _get_tokenized_src(self
, url
, video_id
, format_id
):
53 token
= self
._download
_json
(
54 'http://tp.srgssr.ch/akahd/token?acl=*',
55 video_id
, 'Downloading %s token' % format_id
, fatal
=False) or {}
56 auth_params
= try_get(token
, lambda x
: x
['token']['authparams'])
58 url
+= ('?' if '?' not in url
else '&') + auth_params
61 def _get_media_data(self
, bu
, media_type
, media_id
):
62 query
= {'onlyChapters': True}
if media_type
== 'video' else {}
63 full_media_data
= self
._download
_json
(
64 'https://il.srgssr.ch/integrationlayer/2.0/%s/mediaComposition/%s/%s.json'
65 % (bu
, media_type
, media_id
),
66 media_id
, query
=query
)['chapterList']
69 x
for x
in full_media_data
if x
.get('id') == media_id
)
71 raise ExtractorError('No media information found')
73 block_reason
= media_data
.get('blockReason')
74 if block_reason
and block_reason
in self
._ERRORS
:
75 message
= self
._ERRORS
[block_reason
]
76 if block_reason
== 'GEOBLOCK':
77 self
.raise_geo_restricted(
78 msg
=message
, countries
=self
._GEO
_COUNTRIES
)
80 '%s said: %s' % (self
.IE_NAME
, message
), expected
=True)
84 def _real_extract(self
, url
):
85 bu
, media_type
, media_id
= re
.match(self
._VALID
_URL
, url
).groups()
86 media_data
= self
._get
_media
_data
(bu
, media_type
, media_id
)
87 title
= media_data
['title']
91 q
= qualities(['SD', 'HD'])
92 for source
in (media_data
.get('resourceList') or []):
93 format_url
= source
.get('url')
96 protocol
= source
.get('protocol')
97 quality
= source
.get('quality')
99 for e
in (protocol
, source
.get('encoding'), quality
):
102 format_id
= '-'.join(format_id
)
104 if protocol
in ('HDS', 'HLS'):
105 if source
.get('tokenType') == 'AKAMAI':
106 format_url
= self
._get
_tokenized
_src
(
107 format_url
, media_id
, format_id
)
108 fmts
, subs
= self
._extract
_akamai
_formats
_and
_subtitles
(
109 format_url
, media_id
)
111 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
112 elif protocol
== 'HLS':
113 m3u8_fmts
, m3u8_subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
114 format_url
, media_id
, 'mp4', 'm3u8_native',
115 m3u8_id
=format_id
, fatal
=False)
116 formats
.extend(m3u8_fmts
)
117 subtitles
= self
._merge
_subtitles
(subtitles
, m3u8_subs
)
118 elif protocol
in ('HTTP', 'HTTPS'):
120 'format_id': format_id
,
122 'quality': q(quality
),
125 # This is needed because for audio medias the podcast url is usually
126 # always included, even if is only an audio segment and not the
128 if int_or_none(media_data
.get('position')) == 0:
130 podcast_url
= media_data
.get('podcast%sdUrl' % p
)
135 'format_id': 'PODCAST-' + quality
,
137 'quality': q(quality
),
139 self
._sort
_formats
(formats
)
141 if media_type
== 'video':
142 for sub
in (media_data
.get('subtitleList') or []):
143 sub_url
= sub
.get('url')
146 lang
= sub
.get('locale') or self
._DEFAULT
_LANGUAGE
_CODES
[bu
]
147 subtitles
.setdefault(lang
, []).append({
154 'description': media_data
.get('description'),
155 'timestamp': parse_iso8601(media_data
.get('date')),
156 'thumbnail': media_data
.get('imageUrl'),
157 'duration': float_or_none(media_data
.get('duration'), 1000),
158 'subtitles': subtitles
,
163 class SRGSSRPlayIE(InfoExtractor
):
164 IE_DESC
= 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites'
165 _VALID_URL
= r
'''(?x)
168 (?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/
170 [^/]+/(?P<type>video|audio)/[^?]+|
171 popup(?P<type_2>video|audio)player
173 \?.*?\b(?:id=|urn=urn:[^:]+:video:)(?P<id>[0-9a-f\-]{36}|\d+)
177 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
178 'md5': '6db2226ba97f62ad42ce09783680046c',
180 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
182 'upload_date': '20130701',
183 'title': 'Snowden beantragt Asyl in Russland',
184 'timestamp': 1372708215,
186 'thumbnail': r
're:^https?://.*1383719781\.png$',
188 'expected_warnings': ['Unable to download f4m manifest'],
190 'url': 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc',
192 'id': '63cb0778-27f8-49af-9284-8c7a8c6d15fc',
194 'upload_date': '20151013',
195 'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem',
196 'timestamp': 1444709160,
201 'skip_download': True,
204 'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260',
205 'md5': '67a2a9ae4e8e62a68d0e9820cc9782df',
208 'display_id': '6348260',
212 'upload_date': '20141201',
213 'timestamp': 1417458600,
214 'thumbnail': r
're:^https?://.*\.image',
218 'skip_download': True,
221 'url': 'http://play.swissinfo.ch/play/tv/business/video/why-people-were-against-tax-reforms?id=42960270',
225 'title': 'Why people were against tax reforms',
226 'description': 'md5:7ac442c558e9630e947427469c4b824d',
228 'upload_date': '20170215',
229 'timestamp': 1487173560,
230 'thumbnail': r
're:https?://www\.swissinfo\.ch/srgscalableimage/42961964',
231 'subtitles': 'count:9',
234 'skip_download': True,
237 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
238 'only_matching': True,
240 'url': 'https://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?urn=urn:srf:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5',
241 'only_matching': True,
243 'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260',
244 'only_matching': True,
246 # audio segment, has podcastSdUrl of the full episode
247 'url': 'https://www.srf.ch/play/radio/popupaudioplayer?id=50b20dc8-f05b-4972-bf03-e438ff2833eb',
248 'only_matching': True,
251 def _real_extract(self
, url
):
252 mobj
= re
.match(self
._VALID
_URL
, url
)
253 bu
= mobj
.group('bu')
254 media_type
= mobj
.group('type') or mobj
.group('type_2')
255 media_id
= mobj
.group('id')
256 return self
.url_result('srgssr:%s:%s:%s' % (bu
[:3], media_type
, media_id
), 'SRGSSR')