3 from .common
import InfoExtractor
19 class ArteTVBaseIE(InfoExtractor
):
20 _ARTE_LANGUAGES
= 'fr|de|en|es|it|pl'
21 _API_BASE
= 'https://api.arte.tv/api/player/v1'
24 class ArteTVIE(ArteTVBaseIE
):
28 (?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
29 api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
31 /(?P<id>\d{6}-\d{3}-[AF])
32 ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
34 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
38 'title': 'Mexico: Stealing Petrol to Survive',
39 'upload_date': '20190628',
42 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
43 'only_matching': True,
45 'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
46 'only_matching': True,
49 def _real_extract(self
, url
):
50 mobj
= self
._match
_valid
_url
(url
)
51 video_id
= mobj
.group('id')
52 lang
= mobj
.group('lang') or mobj
.group('lang_2')
54 info
= self
._download
_json
(
55 '%s/config/%s/%s' % (self
._API
_BASE
, lang
, video_id
), video_id
)
56 player_info
= info
['videoJsonPlayer']
58 vsr
= try_get(player_info
, lambda x
: x
['VSR'], dict)
61 if try_get(player_info
, lambda x
: x
['custom_msg']['type']) == 'error':
63 player_info
, lambda x
: x
['custom_msg']['msg'], compat_str
)
65 error
= 'Video %s is not available' % player_info
.get('VID') or video_id
66 raise ExtractorError(error
, expected
=True)
68 upload_date_str
= player_info
.get('shootingDate')
69 if not upload_date_str
:
70 upload_date_str
= (player_info
.get('VRA') or player_info
.get('VDA') or '').split(' ')[0]
72 title
= (player_info
.get('VTI') or player_info
['VID']).strip()
73 subtitle
= player_info
.get('VSU', '').strip()
75 title
+= ' - %s' % subtitle
77 qfunc
= qualities(['MQ', 'HQ', 'EQ', 'SQ'])
88 langcode
= LANGS
.get(lang
, lang
)
91 for format_id
, format_dict
in vsr
.items():
93 format_url
= url_or_none(f
.get('url'))
94 streamer
= f
.get('streamer')
95 if not format_url
and not streamer
:
97 versionCode
= f
.get('versionCode')
98 l
= re
.escape(langcode
)
100 # Language preference from most to least priority
101 # Reference: section 6.8 of
102 # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
104 # original version in requested language, without subtitles
106 # original version in requested language, with partial subtitles in requested language
107 r
'VO{0}-ST{0}$'.format(l
),
108 # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
109 r
'VO{0}-STM{0}$'.format(l
),
110 # non-original (dubbed) version in requested language, without subtitles
112 # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
113 r
'V{0}-ST{0}$'.format(l
),
114 # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
115 r
'V{0}-STM{0}$'.format(l
),
116 # original version in requested language, with partial subtitles in different language
117 r
'VO{0}-ST(?!{0}).+?$'.format(l
),
118 # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
119 r
'VO{0}-STM(?!{0}).+?$'.format(l
),
120 # original version in different language, with partial subtitles in requested language
121 r
'VO(?:(?!{0}).+?)?-ST{0}$'.format(l
),
122 # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
123 r
'VO(?:(?!{0}).+?)?-STM{0}$'.format(l
),
124 # original version in different language, without subtitles
125 r
'VO(?:(?!{0}))?$'.format(l
),
126 # original version in different language, with partial subtitles in different language
127 r
'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l
),
128 # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
129 r
'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l
),
132 for pref
, p
in enumerate(PREFERENCES
):
133 if re
.match(p
, versionCode
):
134 lang_pref
= len(PREFERENCES
) - pref
138 format_note
= '%s, %s' % (f
.get('versionCode'), f
.get('versionLibelle'))
140 media_type
= f
.get('mediaType')
141 if media_type
== 'hls':
142 m3u8_formats
= self
._extract
_m
3u8_formats
(
143 format_url
, video_id
, 'mp4', entry_protocol
='m3u8_native',
144 m3u8_id
=format_id
, fatal
=False)
145 for m3u8_format
in m3u8_formats
:
147 'language_preference': lang_pref
,
148 'format_note': format_note
,
150 formats
.extend(m3u8_formats
)
154 'format_id': format_id
,
155 'language_preference': lang_pref
,
156 'format_note': format_note
,
157 'width': int_or_none(f
.get('width')),
158 'height': int_or_none(f
.get('height')),
159 'tbr': int_or_none(f
.get('bitrate')),
160 'quality': qfunc(f
.get('quality')),
163 if media_type
== 'rtmp':
164 format
['url'] = f
['streamer']
165 format
['play_path'] = 'mp4:' + f
['url']
166 format
['ext'] = 'flv'
168 format
['url'] = f
['url']
170 formats
.append(format
)
172 # For this extractor, quality only represents the relative quality
173 # with respect to other formats with the same resolution
174 self
._sort
_formats
(formats
, ('res', 'quality'))
177 'id': player_info
.get('VID') or video_id
,
179 'description': player_info
.get('VDE') or player_info
.get('V7T'),
180 'upload_date': unified_strdate(upload_date_str
),
181 'thumbnail': player_info
.get('programImage') or player_info
.get('VTU', {}).get('IUR'),
186 class ArteTVEmbedIE(InfoExtractor
):
187 _VALID_URL
= r
'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
189 'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
191 'id': '100605-013-A',
193 'title': 'United we Stream November Lockdown Edition #13',
194 'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
195 'upload_date': '20201116',
198 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
199 'only_matching': True,
203 def _extract_urls(webpage
):
204 return [url
for _
, url
in re
.findall(
205 r
'<(?:iframe|script)[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:www\
.)?arte\
.tv
/player
/v\d
+/index\
.php
\?.*?
\bjson
_url
=.+?
)\
1',
208 def _real_extract(self, url):
210 json_url = qs['json_url
'][0]
211 video_id = ArteTVIE._match_id(json_url)
212 return self.url_result(
213 json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
216 class ArteTVPlaylistIE(ArteTVBaseIE):
217 _VALID_URL = r'https?
://(?
:www\
.)?arte\
.tv
/(?P
<lang
>%s)/videos
/(?P
<id>RC
-\d{6}
)' % ArteTVBaseIE._ARTE_LANGUAGES
219 'url
': 'https
://www
.arte
.tv
/en
/videos
/RC
-016954/earn
-a
-living
/',
222 'title
': 'Earn a Living
',
223 'description
': 'md5
:d322c55011514b3a7241f7fb80d494c2
',
225 'playlist_mincount
': 6,
227 'url
': 'https
://www
.arte
.tv
/pl
/videos
/RC
-014123/arte
-reportage
/',
228 'only_matching
': True,
231 def _real_extract(self, url):
232 lang, playlist_id = self._match_valid_url(url).groups()
233 collection = self._download_json(
234 '%s/collectionData
/%s/%s?source
=videos
'
235 % (self._API_BASE, lang, playlist_id), playlist_id)
237 for video in collection['videos
']:
238 if not isinstance(video, dict):
240 video_url = url_or_none(video.get('url
')) or url_or_none(video.get('jsonUrl
'))
243 video_id = video.get('programId
')
245 '_type
': 'url_transparent
',
248 'title
': video.get('title
'),
249 'alt_title
': video.get('subtitle
'),
250 'thumbnail
': url_or_none(try_get(video, lambda x: x['mainImage
']['url
'], compat_str)),
251 'duration
': int_or_none(video.get('durationSeconds
')),
252 'view_count
': int_or_none(video.get('views
')),
253 'ie_key
': ArteTVIE.ie_key(),
255 title = collection.get('title
')
256 description = collection.get('shortDescription
') or collection.get('teaserText
')
257 return self.playlist_result(entries, playlist_id, title, description)
260 class ArteTVCategoryIE(ArteTVBaseIE):
261 _VALID_URL = r'https?
://(?
:www\
.)?arte\
.tv
/(?P
<lang
>%s)/videos
/(?P
<id>[\w
-]+(?
:/[\w
-]+)*)/?\s
*$
' % ArteTVBaseIE._ARTE_LANGUAGES
263 'url
': 'https
://www
.arte
.tv
/en
/videos
/politics
-and-society
/',
265 'id': 'politics
-and-society
',
266 'title
': 'Politics
and society
',
267 'description
': 'Investigative documentary series
, geopolitical analysis
, and international commentary
',
269 'playlist_mincount
': 13,
274 def suitable(cls, url):
276 not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
277 and super(ArteTVCategoryIE, cls).suitable(url))
279 def _real_extract(self, url):
280 lang, playlist_id = self._match_valid_url(url).groups()
281 webpage = self._download_webpage(url, playlist_id)
284 for video in re.finditer(
285 r'<a
\b[^
>]*?href\s
*=\s
*(?P
<q
>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
287 video = video.group('url')
290 if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
293 title = (self._og_search_title(webpage, default=None)
294 or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
295 title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
297 return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
298 description=self._og_search_description(webpage, default=None))