]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/srgssr.py
1 from .common
import InfoExtractor
13 class SRGSSRIE(InfoExtractor
):
16 https?://tp\.srgssr\.ch/p(?:/[^/]+)+\?urn=urn|
30 _GEO_COUNTRIES
= ['CH']
33 'AGERATING12': 'To protect children under the age of 12, this video is only available between 8 p.m. and 6 a.m.',
34 'AGERATING18': 'To protect children under the age of 18, this video is only available between 11 p.m. and 5 a.m.',
35 # 'ENDDATE': 'For legal reasons, this video was only available for a specified period of time.',
36 'GEOBLOCK': 'For legal reasons, this video is only available in Switzerland.',
37 'LEGAL': 'The video cannot be transmitted for legal reasons.',
38 'STARTDATE': 'This video is not yet available. Please try again later.',
40 _DEFAULT_LANGUAGE_CODES
= {
48 def _get_tokenized_src(self
, url
, video_id
, format_id
):
49 token
= self
._download
_json
(
50 'http://tp.srgssr.ch/akahd/token?acl=*',
51 video_id
, 'Downloading %s token' % format_id
, fatal
=False) or {}
52 auth_params
= try_get(token
, lambda x
: x
['token']['authparams'])
54 url
+= ('?' if '?' not in url
else '&') + auth_params
57 def _get_media_data(self
, bu
, media_type
, media_id
):
58 query
= {'onlyChapters': True}
if media_type
== 'video' else {}
59 full_media_data
= self
._download
_json
(
60 'https://il.srgssr.ch/integrationlayer/2.0/%s/mediaComposition/%s/%s.json'
61 % (bu
, media_type
, media_id
),
62 media_id
, query
=query
)['chapterList']
65 x
for x
in full_media_data
if x
.get('id') == media_id
)
67 raise ExtractorError('No media information found')
69 block_reason
= media_data
.get('blockReason')
70 if block_reason
and block_reason
in self
._ERRORS
:
71 message
= self
._ERRORS
[block_reason
]
72 if block_reason
== 'GEOBLOCK':
73 self
.raise_geo_restricted(
74 msg
=message
, countries
=self
._GEO
_COUNTRIES
)
76 '%s said: %s' % (self
.IE_NAME
, message
), expected
=True)
80 def _real_extract(self
, url
):
81 bu
, media_type
, media_id
= self
._match
_valid
_url
(url
).groups()
82 media_data
= self
._get
_media
_data
(bu
, media_type
, media_id
)
83 title
= media_data
['title']
87 q
= qualities(['SD', 'HD'])
88 for source
in (media_data
.get('resourceList') or []):
89 format_url
= source
.get('url')
92 protocol
= source
.get('protocol')
93 quality
= source
.get('quality')
94 format_id
= join_nonempty(protocol
, source
.get('encoding'), quality
)
96 if protocol
in ('HDS', 'HLS'):
97 if source
.get('tokenType') == 'AKAMAI':
98 format_url
= self
._get
_tokenized
_src
(
99 format_url
, media_id
, format_id
)
100 fmts
, subs
= self
._extract
_akamai
_formats
_and
_subtitles
(
101 format_url
, media_id
)
103 subtitles
= self
._merge
_subtitles
(subtitles
, subs
)
104 elif protocol
== 'HLS':
105 m3u8_fmts
, m3u8_subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
106 format_url
, media_id
, 'mp4', 'm3u8_native',
107 m3u8_id
=format_id
, fatal
=False)
108 formats
.extend(m3u8_fmts
)
109 subtitles
= self
._merge
_subtitles
(subtitles
, m3u8_subs
)
110 elif protocol
in ('HTTP', 'HTTPS'):
112 'format_id': format_id
,
114 'quality': q(quality
),
117 # This is needed because for audio medias the podcast url is usually
118 # always included, even if is only an audio segment and not the
120 if int_or_none(media_data
.get('position')) == 0:
122 podcast_url
= media_data
.get('podcast%sdUrl' % p
)
127 'format_id': 'PODCAST-' + quality
,
129 'quality': q(quality
),
132 if media_type
== 'video':
133 for sub
in (media_data
.get('subtitleList') or []):
134 sub_url
= sub
.get('url')
137 lang
= sub
.get('locale') or self
._DEFAULT
_LANGUAGE
_CODES
[bu
]
138 subtitles
.setdefault(lang
, []).append({
145 'description': media_data
.get('description'),
146 'timestamp': parse_iso8601(media_data
.get('date')),
147 'thumbnail': media_data
.get('imageUrl'),
148 'duration': float_or_none(media_data
.get('duration'), 1000),
149 'subtitles': subtitles
,
154 class SRGSSRPlayIE(InfoExtractor
):
155 IE_DESC
= 'srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites'
156 _VALID_URL
= r
'''(?x)
159 (?P<bu>srf|rts|rsi|rtr|swissinfo)\.ch/play/(?:tv|radio)/
161 [^/]+/(?P<type>video|audio)/[^?]+|
162 popup(?P<type_2>video|audio)player
164 \?.*?\b(?:id=|urn=urn:[^:]+:video:)(?P<id>[0-9a-f\-]{36}|\d+)
168 'url': 'http://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?id=28e1a57d-5b76-4399-8ab3-9097f071e6c5',
169 'md5': '6db2226ba97f62ad42ce09783680046c',
171 'id': '28e1a57d-5b76-4399-8ab3-9097f071e6c5',
173 'upload_date': '20130701',
174 'title': 'Snowden beantragt Asyl in Russland',
175 'timestamp': 1372708215,
177 'thumbnail': r
're:^https?://.*1383719781\.png$',
179 'expected_warnings': ['Unable to download f4m manifest'],
181 'url': 'http://www.rtr.ch/play/radio/actualitad/audio/saira-tujetsch-tuttina-cuntinuar-cun-sedrun-muster-turissem?id=63cb0778-27f8-49af-9284-8c7a8c6d15fc',
183 'id': '63cb0778-27f8-49af-9284-8c7a8c6d15fc',
185 'upload_date': '20151013',
186 'title': 'Saira: Tujetsch - tuttina cuntinuar cun Sedrun Mustér Turissem',
187 'timestamp': 1444709160,
192 'skip_download': True,
195 'url': 'http://www.rts.ch/play/tv/-/video/le-19h30?id=6348260',
196 'md5': '67a2a9ae4e8e62a68d0e9820cc9782df',
199 'display_id': '6348260',
203 'upload_date': '20141201',
204 'timestamp': 1417458600,
205 'thumbnail': r
're:^https?://.*\.image',
209 'skip_download': True,
212 'url': 'http://play.swissinfo.ch/play/tv/business/video/why-people-were-against-tax-reforms?id=42960270',
216 'title': 'Why people were against tax reforms',
217 'description': 'md5:7ac442c558e9630e947427469c4b824d',
219 'upload_date': '20170215',
220 'timestamp': 1487173560,
221 'thumbnail': r
're:https?://www\.swissinfo\.ch/srgscalableimage/42961964',
222 'subtitles': 'count:9',
225 'skip_download': True,
228 'url': 'https://www.srf.ch/play/tv/popupvideoplayer?id=c4dba0ca-e75b-43b2-a34f-f708a4932e01',
229 'only_matching': True,
231 'url': 'https://www.srf.ch/play/tv/10vor10/video/snowden-beantragt-asyl-in-russland?urn=urn:srf:video:28e1a57d-5b76-4399-8ab3-9097f071e6c5',
232 'only_matching': True,
234 'url': 'https://www.rts.ch/play/tv/19h30/video/le-19h30?urn=urn:rts:video:6348260',
235 'only_matching': True,
237 # audio segment, has podcastSdUrl of the full episode
238 'url': 'https://www.srf.ch/play/radio/popupaudioplayer?id=50b20dc8-f05b-4972-bf03-e438ff2833eb',
239 'only_matching': True,
242 def _real_extract(self
, url
):
243 mobj
= self
._match
_valid
_url
(url
)
244 bu
= mobj
.group('bu')
245 media_type
= mobj
.group('type') or mobj
.group('type_2')
246 media_id
= mobj
.group('id')
247 return self
.url_result('srgssr:%s:%s:%s' % (bu
[:3], media_type
, media_id
), 'SRGSSR')