2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
21 class ArteTVBaseIE(InfoExtractor
):
22 _ARTE_LANGUAGES
= 'fr|de|en|es|it|pl'
23 _API_BASE
= 'https://api.arte.tv/api/player/v1'
26 class ArteTVIE(ArteTVBaseIE
):
30 (?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
31 api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
33 /(?P<id>\d{6}-\d{3}-[AF])
34 ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
36 'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
40 'title': 'Mexico: Stealing Petrol to Survive',
41 'upload_date': '20190628',
44 'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
45 'only_matching': True,
47 'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
48 'only_matching': True,
51 def _real_extract(self
, url
):
52 mobj
= self
._match
_valid
_url
(url
)
53 video_id
= mobj
.group('id')
54 lang
= mobj
.group('lang') or mobj
.group('lang_2')
56 info
= self
._download
_json
(
57 '%s/config/%s/%s' % (self
._API
_BASE
, lang
, video_id
), video_id
)
58 player_info
= info
['videoJsonPlayer']
60 vsr
= try_get(player_info
, lambda x
: x
['VSR'], dict)
63 if try_get(player_info
, lambda x
: x
['custom_msg']['type']) == 'error':
65 player_info
, lambda x
: x
['custom_msg']['msg'], compat_str
)
67 error
= 'Video %s is not available' % player_info
.get('VID') or video_id
68 raise ExtractorError(error
, expected
=True)
70 upload_date_str
= player_info
.get('shootingDate')
71 if not upload_date_str
:
72 upload_date_str
= (player_info
.get('VRA') or player_info
.get('VDA') or '').split(' ')[0]
74 title
= (player_info
.get('VTI') or player_info
['VID']).strip()
75 subtitle
= player_info
.get('VSU', '').strip()
77 title
+= ' - %s' % subtitle
79 qfunc
= qualities(['MQ', 'HQ', 'EQ', 'SQ'])
90 langcode
= LANGS
.get(lang
, lang
)
93 for format_id
, format_dict
in vsr
.items():
95 format_url
= url_or_none(f
.get('url'))
96 streamer
= f
.get('streamer')
97 if not format_url
and not streamer
:
99 versionCode
= f
.get('versionCode')
100 l
= re
.escape(langcode
)
102 # Language preference from most to least priority
103 # Reference: section 6.8 of
104 # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
106 # original version in requested language, without subtitles
108 # original version in requested language, with partial subtitles in requested language
109 r
'VO{0}-ST{0}$'.format(l
),
110 # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
111 r
'VO{0}-STM{0}$'.format(l
),
112 # non-original (dubbed) version in requested language, without subtitles
114 # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
115 r
'V{0}-ST{0}$'.format(l
),
116 # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
117 r
'V{0}-STM{0}$'.format(l
),
118 # original version in requested language, with partial subtitles in different language
119 r
'VO{0}-ST(?!{0}).+?$'.format(l
),
120 # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
121 r
'VO{0}-STM(?!{0}).+?$'.format(l
),
122 # original version in different language, with partial subtitles in requested language
123 r
'VO(?:(?!{0}).+?)?-ST{0}$'.format(l
),
124 # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
125 r
'VO(?:(?!{0}).+?)?-STM{0}$'.format(l
),
126 # original version in different language, without subtitles
127 r
'VO(?:(?!{0}))?$'.format(l
),
128 # original version in different language, with partial subtitles in different language
129 r
'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l
),
130 # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
131 r
'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l
),
134 for pref
, p
in enumerate(PREFERENCES
):
135 if re
.match(p
, versionCode
):
136 lang_pref
= len(PREFERENCES
) - pref
141 media_type
= f
.get('mediaType')
142 if media_type
== 'hls':
143 m3u8_formats
= self
._extract
_m
3u8_formats
(
144 format_url
, video_id
, 'mp4', entry_protocol
='m3u8_native',
145 m3u8_id
=format_id
, fatal
=False)
146 for m3u8_format
in m3u8_formats
:
147 m3u8_format
['language_preference'] = lang_pref
148 formats
.extend(m3u8_formats
)
152 'format_id': format_id
,
153 'language_preference': lang_pref
,
154 'format_note': '%s, %s' % (f
.get('versionCode'), f
.get('versionLibelle')),
155 'width': int_or_none(f
.get('width')),
156 'height': int_or_none(f
.get('height')),
157 'tbr': int_or_none(f
.get('bitrate')),
158 'quality': qfunc(f
.get('quality')),
161 if media_type
== 'rtmp':
162 format
['url'] = f
['streamer']
163 format
['play_path'] = 'mp4:' + f
['url']
164 format
['ext'] = 'flv'
166 format
['url'] = f
['url']
168 formats
.append(format
)
170 # For this extractor, quality only represents the relative quality
171 # with respect to other formats with the same resolution
172 self
._sort
_formats
(formats
, ('res', 'quality'))
175 'id': player_info
.get('VID') or video_id
,
177 'description': player_info
.get('VDE') or player_info
.get('V7T'),
178 'upload_date': unified_strdate(upload_date_str
),
179 'thumbnail': player_info
.get('programImage') or player_info
.get('VTU', {}).get('IUR'),
184 class ArteTVEmbedIE(InfoExtractor
):
185 _VALID_URL
= r
'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
187 'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
189 'id': '100605-013-A',
191 'title': 'United we Stream November Lockdown Edition #13',
192 'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
193 'upload_date': '20201116',
196 'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
197 'only_matching': True,
201 def _extract_urls(webpage
):
202 return [url
for _
, url
in re
.findall(
203 r
'<(?:iframe|script)[^>]+src=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:www\
.)?arte\
.tv
/player
/v\d
+/index\
.php
\?.*?
\bjson
_url
=.+?
)\
1',
206 def _real_extract(self, url):
208 json_url = qs['json_url
'][0]
209 video_id = ArteTVIE._match_id(json_url)
210 return self.url_result(
211 json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
214 class ArteTVPlaylistIE(ArteTVBaseIE):
215 _VALID_URL = r'https?
://(?
:www\
.)?arte\
.tv
/(?P
<lang
>%s)/videos
/(?P
<id>RC
-\d{6}
)' % ArteTVBaseIE._ARTE_LANGUAGES
217 'url
': 'https
://www
.arte
.tv
/en
/videos
/RC
-016954/earn
-a
-living
/',
220 'title
': 'Earn a Living
',
221 'description
': 'md5
:d322c55011514b3a7241f7fb80d494c2
',
223 'playlist_mincount
': 6,
225 'url
': 'https
://www
.arte
.tv
/pl
/videos
/RC
-014123/arte
-reportage
/',
226 'only_matching
': True,
229 def _real_extract(self, url):
230 lang, playlist_id = self._match_valid_url(url).groups()
231 collection = self._download_json(
232 '%s/collectionData
/%s/%s?source
=videos
'
233 % (self._API_BASE, lang, playlist_id), playlist_id)
235 for video in collection['videos
']:
236 if not isinstance(video, dict):
238 video_url = url_or_none(video.get('url
')) or url_or_none(video.get('jsonUrl
'))
241 video_id = video.get('programId
')
243 '_type
': 'url_transparent
',
246 'title
': video.get('title
'),
247 'alt_title
': video.get('subtitle
'),
248 'thumbnail
': url_or_none(try_get(video, lambda x: x['mainImage
']['url
'], compat_str)),
249 'duration
': int_or_none(video.get('durationSeconds
')),
250 'view_count
': int_or_none(video.get('views
')),
251 'ie_key
': ArteTVIE.ie_key(),
253 title = collection.get('title
')
254 description = collection.get('shortDescription
') or collection.get('teaserText
')
255 return self.playlist_result(entries, playlist_id, title, description)