2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
22 class ArteTVBaseIE(InfoExtractor
):
23 _ARTE_LANGUAGES
= 'fr|de|en|es|it|pl'
24 _API_BASE
= 'https://api.arte.tv/api/player/v1'
27 class ArteTVIE(ArteTVBaseIE
):
31 (?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
32 api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
34 /(?P<id>\d{6}-\d{3}-[AF])
35 ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
37 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
41 'title': 'Mexico: Stealing Petrol to Survive',
42 'upload_date': '20190628',
45 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
46 'only_matching': True,
48 'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
49 'only_matching': True,
52 def _real_extract(self
, url
):
53 mobj
= self
._match
_valid
_url
(url
)
54 video_id
= mobj
.group('id')
55 lang
= mobj
.group('lang') or mobj
.group('lang_2')
57 info
= self
._download
_json
(
58 '%s/config/%s/%s' % (self
._API
_BASE
, lang
, video_id
), video_id
)
59 player_info
= info
['videoJsonPlayer']
61 vsr
= try_get(player_info
, lambda x
: x
['VSR'], dict)
64 if try_get(player_info
, lambda x
: x
['custom_msg']['type']) == 'error':
66 player_info
, lambda x
: x
['custom_msg']['msg'], compat_str
)
68 error
= 'Video %s is not available' % player_info
.get('VID') or video_id
69 raise ExtractorError(error
, expected
=True)
71 upload_date_str
= player_info
.get('shootingDate')
72 if not upload_date_str
:
73 upload_date_str
= (player_info
.get('VRA') or player_info
.get('VDA') or '').split(' ')[0]
75 title
= (player_info
.get('VTI') or player_info
['VID']).strip()
76 subtitle
= player_info
.get('VSU', '').strip()
78 title
+= ' - %s' % subtitle
80 qfunc
= qualities(['MQ', 'HQ', 'EQ', 'SQ'])
91 langcode
= LANGS
.get(lang
, lang
)
94 for format_id
, format_dict
in vsr
.items():
96 format_url
= url_or_none(f
.get('url'))
97 streamer
= f
.get('streamer')
98 if not format_url
and not streamer
:
100 versionCode
= f
.get('versionCode')
101 l
= re
.escape(langcode
)
103 # Language preference from most to least priority
104 # Reference: section 6.8 of
105 # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
107 # original version in requested language, without subtitles
109 # original version in requested language, with partial subtitles in requested language
110 r
'VO{0}-ST{0}$'.format(l
),
111 # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
112 r
'VO{0}-STM{0}$'.format(l
),
113 # non-original (dubbed) version in requested language, without subtitles
115 # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
116 r
'V{0}-ST{0}$'.format(l
),
117 # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
118 r
'V{0}-STM{0}$'.format(l
),
119 # original version in requested language, with partial subtitles in different language
120 r
'VO{0}-ST(?!{0}).+?$'.format(l
),
121 # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
122 r
'VO{0}-STM(?!{0}).+?$'.format(l
),
123 # original version in different language, with partial subtitles in requested language
124 r
'VO(?:(?!{0}).+?)?-ST{0}$'.format(l
),
125 # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
126 r
'VO(?:(?!{0}).+?)?-STM{0}$'.format(l
),
127 # original version in different language, without subtitles
128 r
'VO(?:(?!{0}))?$'.format(l
),
129 # original version in different language, with partial subtitles in different language
130 r
'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l
),
131 # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
132 r
'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l
),
135 for pref
, p
in enumerate(PREFERENCES
):
136 if re
.match(p
, versionCode
):
137 lang_pref
= len(PREFERENCES
) - pref
141 format_note
= '%s, %s' % (f
.get('versionCode'), f
.get('versionLibelle'))
143 media_type
= f
.get('mediaType')
144 if media_type
== 'hls':
145 m3u8_formats
= self
._extract
_m
3u8_formats
(
146 format_url
, video_id
, 'mp4', entry_protocol
='m3u8_native',
147 m3u8_id
=format_id
, fatal
=False)
148 for m3u8_format
in m3u8_formats
:
150 'language_preference': lang_pref
,
151 'format_note': format_note
,
153 formats
.extend(m3u8_formats
)
157 'format_id': format_id
,
158 'language_preference': lang_pref
,
159 'format_note': format_note
,
160 'width': int_or_none(f
.get('width')),
161 'height': int_or_none(f
.get('height')),
162 'tbr': int_or_none(f
.get('bitrate')),
163 'quality': qfunc(f
.get('quality')),
166 if media_type
== 'rtmp':
167 format
['url'] = f
['streamer']
168 format
['play_path'] = 'mp4:' + f
['url']
169 format
['ext'] = 'flv'
171 format
['url'] = f
['url']
173 formats
.append(format
)
175 # For this extractor, quality only represents the relative quality
176 # with respect to other formats with the same resolution
177 self
._sort
_formats
(formats
, ('res', 'quality'))
180 'id': player_info
.get('VID') or video_id
,
182 'description': player_info
.get('VDE') or player_info
.get('V7T'),
183 'upload_date': unified_strdate(upload_date_str
),
184 'thumbnail': player_info
.get('programImage') or player_info
.get('VTU', {}).get('IUR'),
189 class ArteTVEmbedIE(InfoExtractor
):
190 _VALID_URL
= r
'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
192 'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
194 'id': '100605-013-A',
196 'title': 'United we Stream November Lockdown Edition #13',
197 'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
198 'upload_date': '20201116',
201 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
202 'only_matching': True,
206 def _extract_urls(webpage
):
207 return [url
for _
, url
in re
.findall(
208 r
'<(?:iframe|script)[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:www\
.)?arte\
.tv
/player
/v\d
+/index\
.php
\?.*?
\bjson
_url
=.+?
)\
1',
211 def _real_extract(self, url):
213 json_url = qs['json_url
'][0]
214 video_id = ArteTVIE._match_id(json_url)
215 return self.url_result(
216 json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
219 class ArteTVPlaylistIE(ArteTVBaseIE):
220 _VALID_URL = r'https?
://(?
:www\
.)?arte\
.tv
/(?P
<lang
>%s)/videos
/(?P
<id>RC
-\d{6}
)' % ArteTVBaseIE._ARTE_LANGUAGES
222 'url
': 'https
://www
.arte
.tv
/en
/videos
/RC
-016954/earn
-a
-living
/',
225 'title
': 'Earn a Living
',
226 'description
': 'md5
:d322c55011514b3a7241f7fb80d494c2
',
228 'playlist_mincount
': 6,
230 'url
': 'https
://www
.arte
.tv
/pl
/videos
/RC
-014123/arte
-reportage
/',
231 'only_matching
': True,
234 def _real_extract(self, url):
235 lang, playlist_id = self._match_valid_url(url).groups()
236 collection = self._download_json(
237 '%s/collectionData
/%s/%s?source
=videos
'
238 % (self._API_BASE, lang, playlist_id), playlist_id)
240 for video in collection['videos
']:
241 if not isinstance(video, dict):
243 video_url = url_or_none(video.get('url
')) or url_or_none(video.get('jsonUrl
'))
246 video_id = video.get('programId
')
248 '_type
': 'url_transparent
',
251 'title
': video.get('title
'),
252 'alt_title
': video.get('subtitle
'),
253 'thumbnail
': url_or_none(try_get(video, lambda x: x['mainImage
']['url
'], compat_str)),
254 'duration
': int_or_none(video.get('durationSeconds
')),
255 'view_count
': int_or_none(video.get('views
')),
256 'ie_key
': ArteTVIE.ie_key(),
258 title = collection.get('title
')
259 description = collection.get('shortDescription
') or collection.get('teaserText
')
260 return self.playlist_result(entries, playlist_id, title, description)
263 class ArteTVCategoryIE(ArteTVBaseIE):
264 _VALID_URL = r'https?
://(?
:www\
.)?arte\
.tv
/(?P
<lang
>%s)/videos
/(?P
<id>[\w
-]+(?
:/[\w
-]+)*)/?\s
*$
' % ArteTVBaseIE._ARTE_LANGUAGES
266 'url
': 'https
://www
.arte
.tv
/en
/videos
/politics
-and-society
/',
268 'id': 'politics
-and-society
',
269 'title
': 'Politics
and society
',
270 'description
': 'Investigative documentary series
, geopolitical analysis
, and international commentary
',
272 'playlist_mincount
': 13,
277 def suitable(cls, url):
279 not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
280 and super(ArteTVCategoryIE, cls).suitable(url))
282 def _real_extract(self, url):
283 lang, playlist_id = self._match_valid_url(url).groups()
284 webpage = self._download_webpage(url, playlist_id)
287 for video in re.finditer(
288 r'<a
\b[^
>]*?href\s
*=\s
*(?P
<q
>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
290 video = video.group('url')
293 if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
296 title = (self._og_search_title(webpage, default=None)
297 or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
298 title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)
300 return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
301 description=self._og_search_description(webpage, default=None))