2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
22 class ArteTVBaseIE(InfoExtractor
):
23 _ARTE_LANGUAGES
= 'fr|de|en|es|it|pl'
24 _API_BASE
= 'https://api.arte.tv/api/player/v1'
27 class ArteTVIE(ArteTVBaseIE
):
31 (?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
32 api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
34 /(?P<id>\d{6}-\d{3}-[AF])
35 ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
37 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
41 'title': 'Mexico: Stealing Petrol to Survive',
42 'upload_date': '20190628',
45 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
46 'only_matching': True,
48 'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
49 'only_matching': True,
52 def _real_extract(self
, url
):
53 mobj
= self
._match
_valid
_url
(url
)
54 video_id
= mobj
.group('id')
55 lang
= mobj
.group('lang') or mobj
.group('lang_2')
57 info
= self
._download
_json
(
58 '%s/config/%s/%s' % (self
._API
_BASE
, lang
, video_id
), video_id
)
59 player_info
= info
['videoJsonPlayer']
61 vsr
= try_get(player_info
, lambda x
: x
['VSR'], dict)
64 if try_get(player_info
, lambda x
: x
['custom_msg']['type']) == 'error':
66 player_info
, lambda x
: x
['custom_msg']['msg'], compat_str
)
68 error
= 'Video %s is not available' % player_info
.get('VID') or video_id
69 raise ExtractorError(error
, expected
=True)
71 upload_date_str
= player_info
.get('shootingDate')
72 if not upload_date_str
:
73 upload_date_str
= (player_info
.get('VRA') or player_info
.get('VDA') or '').split(' ')[0]
75 title
= (player_info
.get('VTI') or player_info
['VID']).strip()
76 subtitle
= player_info
.get('VSU', '').strip()
78 title
+= ' - %s' % subtitle
80 qfunc
= qualities(['MQ', 'HQ', 'EQ', 'SQ'])
91 langcode
= LANGS
.get(lang
, lang
)
94 for format_id
, format_dict
in vsr
.items():
96 format_url
= url_or_none(f
.get('url'))
97 streamer
= f
.get('streamer')
98 if not format_url
and not streamer
:
100 versionCode
= f
.get('versionCode')
101 l
= re
.escape(langcode
)
103 # Language preference from most to least priority
104 # Reference: section 6.8 of
105 # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
107 # original version in requested language, without subtitles
109 # original version in requested language, with partial subtitles in requested language
110 r
'VO{0}-ST{0}$'.format(l
),
111 # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
112 r
'VO{0}-STM{0}$'.format(l
),
113 # non-original (dubbed) version in requested language, without subtitles
115 # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
116 r
'V{0}-ST{0}$'.format(l
),
117 # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
118 r
'V{0}-STM{0}$'.format(l
),
119 # original version in requested language, with partial subtitles in different language
120 r
'VO{0}-ST(?!{0}).+?$'.format(l
),
121 # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
122 r
'VO{0}-STM(?!{0}).+?$'.format(l
),
123 # original version in different language, with partial subtitles in requested language
124 r
'VO(?:(?!{0}).+?)?-ST{0}$'.format(l
),
125 # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
126 r
'VO(?:(?!{0}).+?)?-STM{0}$'.format(l
),
127 # original version in different language, without subtitles
128 r
'VO(?:(?!{0}))?$'.format(l
),
129 # original version in different language, with partial subtitles in different language
130 r
'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l
),
131 # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
132 r
'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l
),
135 for pref
, p
in enumerate(PREFERENCES
):
136 if re
.match(p
, versionCode
):
137 lang_pref
= len(PREFERENCES
) - pref
142 media_type
= f
.get('mediaType')
143 if media_type
== 'hls':
144 m3u8_formats
= self
._extract
_m
3u8_formats
(
145 format_url
, video_id
, 'mp4', entry_protocol
='m3u8_native',
146 m3u8_id
=format_id
, fatal
=False)
147 for m3u8_format
in m3u8_formats
:
148 m3u8_format
['language_preference'] = lang_pref
149 formats
.extend(m3u8_formats
)
153 'format_id': format_id
,
154 'language_preference': lang_pref
,
155 'format_note': '%s, %s' % (f
.get('versionCode'), f
.get('versionLibelle')),
156 'width': int_or_none(f
.get('width')),
157 'height': int_or_none(f
.get('height')),
158 'tbr': int_or_none(f
.get('bitrate')),
159 'quality': qfunc(f
.get('quality')),
162 if media_type
== 'rtmp':
163 format
['url'] = f
['streamer']
164 format
['play_path'] = 'mp4:' + f
['url']
165 format
['ext'] = 'flv'
167 format
['url'] = f
['url']
169 formats
.append(format
)
171 # For this extractor, quality only represents the relative quality
172 # with respect to other formats with the same resolution
173 self
._sort
_formats
(formats
, ('res', 'quality'))
176 'id': player_info
.get('VID') or video_id
,
178 'description': player_info
.get('VDE') or player_info
.get('V7T'),
179 'upload_date': unified_strdate(upload_date_str
),
180 'thumbnail': player_info
.get('programImage') or player_info
.get('VTU', {}).get('IUR'),
185 class ArteTVEmbedIE(InfoExtractor
):
186 _VALID_URL
= r
'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
188 'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
190 'id': '100605-013-A',
192 'title': 'United we Stream November Lockdown Edition #13',
193 'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
194 'upload_date': '20201116',
197 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
198 'only_matching': True,
202 def _extract_urls(webpage
):
203 return [url
for _
, url
in re
.findall(
204 r
'<(?:iframe|script)[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:www\
.)?arte\
.tv
/player
/v\d
+/index\
.php
\?.*?
\bjson
_url
=.+?
)\
1',
207 def _real_extract(self, url):
209 json_url = qs['json_url
'][0]
210 video_id = ArteTVIE._match_id(json_url)
211 return self.url_result(
212 json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
215 class ArteTVPlaylistIE(ArteTVBaseIE):
216 _VALID_URL = r'https?
://(?
:www\
.)?arte\
.tv
/(?P
<lang
>%s)/videos
/(?P
<id>RC
-\d{6}
)' % ArteTVBaseIE._ARTE_LANGUAGES
218 'url
': 'https
://www
.arte
.tv
/en
/videos
/RC
-016954/earn
-a
-living
/',
221 'title
': 'Earn a Living
',
222 'description
': 'md5
:d322c55011514b3a7241f7fb80d494c2
',
224 'playlist_mincount
': 6,
226 'url
': 'https
://www
.arte
.tv
/pl
/videos
/RC
-014123/arte
-reportage
/',
227 'only_matching
': True,
230 def _real_extract(self, url):
231 lang, playlist_id = self._match_valid_url(url).groups()
232 collection = self._download_json(
233 '%s/collectionData
/%s/%s?source
=videos
'
234 % (self._API_BASE, lang, playlist_id), playlist_id)
236 for video in collection['videos
']:
237 if not isinstance(video, dict):
239 video_url = url_or_none(video.get('url
')) or url_or_none(video.get('jsonUrl
'))
242 video_id = video.get('programId
')
244 '_type
': 'url_transparent
',
247 'title
': video.get('title
'),
248 'alt_title
': video.get('subtitle
'),
249 'thumbnail
': url_or_none(try_get(video, lambda x: x['mainImage
']['url
'], compat_str)),
250 'duration
': int_or_none(video.get('durationSeconds
')),
251 'view_count
': int_or_none(video.get('views
')),
252 'ie_key
': ArteTVIE.ie_key(),
254 title = collection.get('title
')
255 description = collection.get('shortDescription
') or collection.get('teaserText
')
256 return self.playlist_result(entries, playlist_id, title, description)
259 class ArteTVCategoryIE(ArteTVBaseIE):
260 _VALID_URL = r'https?
://(?
:www\
.)?arte\
.tv
/(?P
<lang
>%s)/videos
/(?P
<id>[\w
-]+(?
:/[\w
-]+)*)/?\s
*$
' % ArteTVBaseIE._ARTE_LANGUAGES
262 'url
': 'https
://www
.arte
.tv
/en
/videos
/politics
-and-society
/',
264 'id': 'politics
-and-society
',
265 'title
': 'Politics
and society
',
266 'description
': 'Investigative documentary series
, geopolitical analysis
, and international commentary
',
268 'playlist_mincount
': 13,
273 def suitable(cls, url):
275 not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
276 and super(ArteTVCategoryIE, cls).suitable(url))
278 def _real_extract(self, url):
279 lang, playlist_id = self._match_valid_url(url).groups()
280 webpage = self._download_webpage(url, playlist_id)
283 for video in re.finditer(
284 r'<a
\b[^
>]*?href\s
*=\s
*(?P
<q
>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
286 video = video.group('url')
289 if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
292 title = (self._og_search_title(webpage, default=None)
293 or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
294 title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
296 return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
297 description=self._og_search_description(webpage, default=None))