X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/cce889b900b3fabc765a249a78c6836da6d657d6..61edf57f8f13f6dfd81154174e647eb5fdd26089:/yt_dlp/extractor/francetv.py diff --git a/yt_dlp/extractor/francetv.py b/yt_dlp/extractor/francetv.py index 92ce233ae..ab08f1c6b 100644 --- a/yt_dlp/extractor/francetv.py +++ b/yt_dlp/extractor/francetv.py @@ -1,70 +1,64 @@ -# coding: utf-8 - -from __future__ import unicode_literals - import re +import urllib.parse from .common import InfoExtractor -from ..compat import ( - compat_str, - compat_urlparse, -) +from .dailymotion import DailymotionIE +from ..networking import HEADRequest from ..utils import ( clean_html, determine_ext, - ExtractorError, + filter_dict, + format_field, int_or_none, - parse_duration, - try_get, + join_nonempty, + parse_iso8601, + smuggle_url, + unsmuggle_url, url_or_none, - urljoin, ) -from .dailymotion import DailymotionIE +from ..utils.traversal import traverse_obj class FranceTVBaseInfoExtractor(InfoExtractor): - def _make_url_result(self, video_or_full_id, catalog=None): - full_id = 'francetv:%s' % video_or_full_id - if '@' not in video_or_full_id and catalog: - full_id += '@%s' % catalog - return self.url_result( - full_id, ie=FranceTVIE.ie_key(), - video_id=video_or_full_id.split('@')[0]) + def _make_url_result(self, video_id, url=None): + video_id = video_id.split('@')[0] # for compat with old @catalog IDs + full_id = f'francetv:{video_id}' + if url: + full_id = smuggle_url(full_id, {'hostname': urllib.parse.urlparse(url).hostname}) + return self.url_result(full_id, FranceTVIE, video_id) class FranceTVIE(InfoExtractor): - _VALID_URL = r'''(?x) - (?: - https?:// - sivideo\.webservices\.francetelevisions\.fr/tools/getInfosOeuvre/v2/\? - .*?\bidDiffusion=[^&]+| - (?: - https?://videos\.francetv\.fr/video/| - francetv: - ) - (?P[^@]+)(?:@(?P.+))? - ) - ''' + _VALID_URL = r'francetv:(?P[^@#]+)' + _GEO_COUNTRIES = ['FR'] + _GEO_BYPASS = False _TESTS = [{ - # without catalog - 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=162311093&callback=_jsonp_loader_callback_request_0', - 'md5': 'c2248a8de38c4e65ea8fae7b5df2d84f', + # tokenized url is in dinfo['video']['token'] + 'url': 'francetv:ec217ecc-0733-48cf-ac06-af1347b849d1', 'info_dict': { - 'id': '162311093', + 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', 'ext': 'mp4', 'title': '13h15, le dimanche... - Les mystères de Jésus', - 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', 'timestamp': 1502623500, + 'duration': 2580, + 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20170813', }, + 'params': {'skip_download': 'm3u8'}, }, { - # with catalog - 'url': 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/?idDiffusion=NI_1004933&catalogue=Zouzous&callback=_jsonp_loader_callback_request_4', - 'only_matching': True, - }, { - 'url': 'http://videos.francetv.fr/video/NI_657393@Regions', - 'only_matching': True, + # tokenized url is in dinfo['video']['token']['akamai'] + 'url': 'francetv:c5bda21d-2c6f-4470-8849-3d8327adb2ba', + 'info_dict': { + 'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba', + 'ext': 'mp4', + 'title': '13h15, le dimanche... - Les mystères de Jésus', + 'timestamp': 1514118300, + 'duration': 2880, + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20171224', + }, + 'params': {'skip_download': 'm3u8'}, }, { 'url': 'francetv:162311093', 'only_matching': True, @@ -86,102 +80,108 @@ class FranceTVIE(InfoExtractor): 'only_matching': True, }] - def _extract_video(self, video_id, catalogue=None): - # Videos are identified by idDiffusion so catalogue part is optional. - # However when provided, some extra formats may be returned so we pass - # it if available. - info = self._download_json( - 'https://sivideo.webservices.francetelevisions.fr/tools/getInfosOeuvre/v2/', - video_id, 'Downloading video JSON', query={ - 'idDiffusion': video_id, - 'catalogue': catalogue or '', - }) - - if info.get('status') == 'NOK': - raise ExtractorError( - '%s returned error: %s' % (self.IE_NAME, info['message']), - expected=True) - allowed_countries = info['videos'][0].get('geoblocage') - if allowed_countries: - georestricted = True - geo_info = self._download_json( - 'http://geo.francetv.fr/ws/edgescape.json', video_id, - 'Downloading geo restriction info') - country = geo_info['reponse']['geo_info']['country_code'] - if country not in allowed_countries: - raise ExtractorError( - 'The video is not available from your location', - expected=True) - else: - georestricted = False - - def sign(manifest_url, manifest_id): - for host in ('hdfauthftv-a.akamaihd.net', 'hdfauth.francetv.fr'): - signed_url = url_or_none(self._download_webpage( - 'https://%s/esi/TA' % host, video_id, - 'Downloading signed %s manifest URL' % manifest_id, - fatal=False, query={ - 'url': manifest_url, - })) - if signed_url: - return signed_url - return manifest_url - + def _extract_video(self, video_id, hostname=None): is_live = None - videos = [] - - for video in (info.get('videos') or []): - if video.get('statut') != 'ONLINE': - continue - if not video.get('url'): + drm_formats = False + title = None + subtitle = None + episode_number = None + season_number = None + image = None + duration = None + timestamp = None + spritesheets = None + + # desktop+chrome returns dash; mobile+safari returns hls + for device_type, browser in [('desktop', 'chrome'), ('mobile', 'safari')]: + dinfo = self._download_json( + f'https://k7.ftven.fr/videos/{video_id}', video_id, + f'Downloading {device_type} {browser} video JSON', query=filter_dict({ + 'device_type': device_type, + 'browser': browser, + 'domain': hostname, + }), fatal=False, expected_status=422) # 422 json gives detailed error code/message + + if not dinfo: continue - videos.append(video) - - if not videos: - for device_type in ['desktop', 'mobile']: - fallback_info = self._download_json( - 'https://player.webservices.francetelevisions.fr/v1/videos/%s' % video_id, - video_id, 'Downloading fallback %s video JSON' % device_type, query={ - 'device_type': device_type, - 'browser': 'chrome', - }, fatal=False) - - if fallback_info and fallback_info.get('video'): - videos.append(fallback_info['video']) - - formats = [] - for video in videos: - video_url = video.get('url') - if not video_url: + + if video := traverse_obj(dinfo, ('video', {dict})): + videos.append(video) + if duration is None: + duration = video.get('duration') + if is_live is None: + is_live = video.get('is_live') + if spritesheets is None: + spritesheets = video.get('spritesheets') + elif code := traverse_obj(dinfo, ('code', {int})): + if code == 2009: + self.raise_geo_restricted(countries=self._GEO_COUNTRIES) + elif code in (2015, 2017): + # 2015: L'accès à cette vidéo est impossible. (DRM-only) + # 2017: Cette vidéo n'est pas disponible depuis le site web mobile (b/c DRM) + drm_formats = True + continue + self.report_warning( + f'{self.IE_NAME} said: {code} "{clean_html(dinfo.get("message"))}"') continue - if is_live is None: - is_live = (try_get( - video, lambda x: x['plages_ouverture'][0]['direct'], bool) is True - or video.get('is_live') is True - or '/live.francetv.fr/' in video_url) + + if meta := traverse_obj(dinfo, ('meta', {dict})): + if title is None: + title = meta.get('title') + # meta['pre_title'] contains season and episode number for series in format "S E" + season_number, episode_number = self._search_regex( + r'S(\d+)\s*E(\d+)', meta.get('pre_title'), 'episode info', group=(1, 2), default=(None, None)) + if subtitle is None: + subtitle = meta.get('additional_title') + if image is None: + image = meta.get('image_url') + if timestamp is None: + timestamp = parse_iso8601(meta.get('broadcasted_at')) + + if not videos and drm_formats: + self.report_drm(video_id) + + formats, subtitles, video_url = [], {}, None + for video in traverse_obj(videos, lambda _, v: url_or_none(v['url'])): + video_url = video['url'] format_id = video.get('format') + + if token_url := traverse_obj(video, ('token', (None, 'akamai'), {url_or_none}, any)): + tokenized_url = traverse_obj(self._download_json( + token_url, video_id, f'Downloading signed {format_id} manifest URL', + fatal=False, query={ + 'format': 'json', + 'url': video_url, + }), ('url', {url_or_none})) + if tokenized_url: + video_url = tokenized_url + ext = determine_ext(video_url) if ext == 'f4m': - if georestricted: - # See https://github.com/ytdl-org/youtube-dl/issues/3963 - # m3u8 urls work fine - continue formats.extend(self._extract_f4m_formats( - sign(video_url, format_id) + '&hdcore=3.7.0&plugin=aasp-3.7.0.39.44', - video_id, f4m_id=format_id, fatal=False)) + video_url, video_id, f4m_id=format_id or ext, fatal=False)) elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - sign(video_url, format_id), video_id, 'mp4', - entry_protocol='m3u8_native', m3u8_id=format_id, - fatal=False)) + format_id = format_id or 'hls' + fmts, subs = self._extract_m3u8_formats_and_subtitles( + video_url, video_id, 'mp4', m3u8_id=format_id, fatal=False) + for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None): + if mobj := re.match(rf'{format_id}-[Aa]udio-\w+-(?P\d+)', f['format_id']): + f.update({ + 'tbr': int_or_none(mobj.group('bitrate')), + 'acodec': 'mp4a', + }) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) elif ext == 'mpd': - formats.extend(self._extract_mpd_formats( - sign(video_url, format_id), video_id, mpd_id=format_id, fatal=False)) + fmts, subs = self._extract_mpd_formats_and_subtitles( + video_url, video_id, mpd_id=format_id or 'dash', fatal=False) + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) elif video_url.startswith('rtmp'): formats.append({ 'url': video_url, - 'format_id': 'rtmp-%s' % format_id, + 'format_id': join_nonempty('rtmp', format_id), 'ext': 'flv', }) else: @@ -191,47 +191,60 @@ def sign(manifest_url, manifest_id): 'format_id': format_id, }) - self._sort_formats(formats) - - title = info['titre'] - subtitle = info.get('sous_titre') - if subtitle: - title += ' - %s' % subtitle - title = title.strip() - - subtitles = {} - subtitles_list = [{ - 'url': subformat['url'], - 'ext': subformat.get('format'), - } for subformat in info.get('subtitles', []) if subformat.get('url')] - if subtitles_list: - subtitles['fr'] = subtitles_list + # XXX: what is video['captions']? + + if not formats and video_url: + urlh = self._request_webpage( + HEADRequest(video_url), video_id, 'Checking for geo-restriction', + fatal=False, expected_status=403) + if urlh and urlh.headers.get('x-errortype') == 'geo': + self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) + + for f in formats: + if f.get('acodec') != 'none' and f.get('language') in ('qtz', 'qad'): + f['language_preference'] = -10 + f['format_note'] = 'audio description{}'.format(format_field(f, 'format_note', ', %s')) + + if spritesheets: + formats.append({ + 'format_id': 'spritesheets', + 'format_note': 'storyboard', + 'acodec': 'none', + 'vcodec': 'none', + 'ext': 'mhtml', + 'protocol': 'mhtml', + 'url': 'about:invalid', + 'fragments': [{ + 'url': sheet, + # XXX: not entirely accurate; each spritesheet seems to be + # a 10x10 grid of thumbnails corresponding to approximately + # 2 seconds of the video; the last spritesheet may be shorter + 'duration': 200, + } for sheet in traverse_obj(spritesheets, (..., {url_or_none}))], + }) return { 'id': video_id, - 'title': self._live_title(title) if is_live else title, - 'description': clean_html(info.get('synopsis')), - 'thumbnail': urljoin('https://sivideo.webservices.francetelevisions.fr', info.get('image')), - 'duration': int_or_none(info.get('real_duration')) or parse_duration(info.get('duree')), - 'timestamp': int_or_none(try_get(info, lambda x: x['diffusion']['timestamp'])), + 'title': join_nonempty(title, subtitle, delim=' - ').strip(), + 'thumbnail': image, + 'duration': duration, + 'timestamp': timestamp, 'is_live': is_live, 'formats': formats, 'subtitles': subtitles, + 'episode': subtitle if episode_number else None, + 'series': title if episode_number else None, + 'episode_number': int_or_none(episode_number), + 'season_number': int_or_none(season_number), + '_format_sort_fields': ('res', 'tbr', 'proto'), # prioritize m3u8 over dash } def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - catalog = mobj.group('catalog') - - if not video_id: - qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - video_id = qs.get('idDiffusion', [None])[0] - catalog = qs.get('catalogue', [None])[0] - if not video_id: - raise ExtractorError('Invalid URL', expected=True) + url, smuggled_data = unsmuggle_url(url, {}) + video_id = self._match_id(url) + hostname = smuggled_data.get('hostname') or 'www.france.tv' - return self._extract_video(video_id, catalog) + return self._extract_video(video_id, hostname=hostname) class FranceTVSiteIE(FranceTVBaseInfoExtractor): @@ -240,17 +253,55 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): _TESTS = [{ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'info_dict': { - 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', + 'id': 'c5bda21d-2c6f-4470-8849-3d8327adb2ba', 'ext': 'mp4', 'title': '13h15, le dimanche... - Les mystères de Jésus', - 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', - 'timestamp': 1502623500, - 'upload_date': '20170813', + 'timestamp': 1514118300, + 'duration': 2880, + 'thumbnail': r're:^https?://.*\.jpg$', + 'upload_date': '20171224', }, 'params': { 'skip_download': True, }, 'add_ie': [FranceTVIE.ie_key()], + }, { + # geo-restricted + 'url': 'https://www.france.tv/enfants/six-huit-ans/foot2rue/saison-1/3066387-duel-au-vieux-port.html', + 'info_dict': { + 'id': 'a9050959-eedd-4b4a-9b0d-de6eeaa73e44', + 'ext': 'mp4', + 'title': 'Foot2Rue - Duel au vieux port', + 'episode': 'Duel au vieux port', + 'series': 'Foot2Rue', + 'episode_number': 1, + 'season_number': 1, + 'timestamp': 1642761360, + 'upload_date': '20220121', + 'season': 'Season 1', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 1441, + }, + }, { + # geo-restricted livestream (workflow == 'token-akamai') + 'url': 'https://www.france.tv/france-4/direct.html', + 'info_dict': { + 'id': '9a6a7670-dde9-4264-adbc-55b89558594b', + 'ext': 'mp4', + 'title': r're:France 4 en direct .+', + 'live_status': 'is_live', + }, + 'skip': 'geo-restricted livestream', + }, { + # livestream (workflow == 'dai') + 'url': 'https://www.france.tv/france-2/direct.html', + 'info_dict': { + 'id': '006194ea-117d-4bcf-94a9-153d999c59ae', + 'ext': 'mp4', + 'title': r're:France 2 en direct .+', + 'live_status': 'is_live', + }, + 'params': {'skip_download': 'livestream'}, }, { # france3 'url': 'https://www.france.tv/france-3/des-chiffres-et-des-lettres/139063-emission-du-mardi-9-mai-2017.html', @@ -267,10 +318,6 @@ class FranceTVSiteIE(FranceTVBaseInfoExtractor): # franceo 'url': 'https://www.france.tv/france-o/archipels/132249-mon-ancetre-l-esclave.html', 'only_matching': True, - }, { - # france2 live - 'url': 'https://www.france.tv/france-2/direct.html', - 'only_matching': True, }, { 'url': 'https://www.france.tv/documentaires/histoire/136517-argentine-les-500-bebes-voles-de-la-dictature.html', 'only_matching': True, @@ -294,46 +341,16 @@ def _real_extract(self, url): webpage = self._download_webpage(url, display_id) - catalogue = None video_id = self._search_regex( r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'video id', default=None, group='id') if not video_id: - video_id, catalogue = self._html_search_regex( - r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@]+@[^"]+)"', - webpage, 'video ID').split('@') - - return self._make_url_result(video_id, catalogue) + video_id = self._html_search_regex( + r'(?:href=|player\.setVideo\(\s*)"http://videos?\.francetv\.fr/video/([^@"]+@[^"]+)"', + webpage, 'video ID') - -class FranceTVEmbedIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'https?://embed\.francetv\.fr/*\?.*?\bue=(?P[^&]+)' - - _TESTS = [{ - 'url': 'http://embed.francetv.fr/?ue=7fd581a2ccf59d2fc5719c5c13cf6961', - 'info_dict': { - 'id': 'NI_983319', - 'ext': 'mp4', - 'title': 'Le Pen Reims', - 'upload_date': '20170505', - 'timestamp': 1493981780, - 'duration': 16, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - video = self._download_json( - 'http://api-embed.webservices.francetelevisions.fr/key/%s' % video_id, - video_id) - - return self._make_url_result(video['video_id'], video.get('catalog')) + return self._make_url_result(video_id, url=url) class FranceTVInfoIE(FranceTVBaseInfoExtractor): @@ -347,8 +364,9 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): 'ext': 'mp4', 'title': 'Soir 3', 'upload_date': '20190822', - 'timestamp': 1566510900, - 'description': 'md5:72d167097237701d6e8452ff03b83c00', + 'timestamp': 1566510730, + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'duration': 1637, 'subtitles': { 'fr': 'mincount:2', }, @@ -357,6 +375,22 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): 'skip_download': True, }, 'add_ie': [FranceTVIE.ie_key()], + }, { + 'note': 'Only an image exists in initial webpage instead of the video', + 'url': 'https://www.francetvinfo.fr/sante/maladie/coronavirus/covid-19-en-inde-une-situation-catastrophique-a-new-dehli_4381095.html', + 'info_dict': { + 'id': '7d204c9e-a2d3-11eb-9e4c-000d3a23d482', + 'ext': 'mp4', + 'title': 'Covid-19 : une situation catastrophique à New Dehli - Édition du mercredi 21 avril 2021', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'duration': 76, + 'timestamp': 1619028518, + 'upload_date': '20210421', + }, + 'params': { + 'skip_download': True, + }, + 'add_ie': [FranceTVIE.ie_key()], }, { 'url': 'http://www.francetvinfo.fr/elections/europeennes/direct-europeennes-regardez-le-debat-entre-les-candidats-a-la-presidence-de-la-commission_600639.html', 'only_matching': True, @@ -374,16 +408,26 @@ class FranceTVInfoIE(FranceTVBaseInfoExtractor): 'id': 'x4iiko0', 'ext': 'mp4', 'title': 'NDDL, référendum, Brexit : Cécile Duflot répond à Patrick Cohen', - 'description': 'Au lendemain de la victoire du "oui" au référendum sur l\'aéroport de Notre-Dame-des-Landes, l\'ancienne ministre écologiste est l\'invitée de Patrick Cohen. Plus d\'info : https://www.franceinter.fr/emissions/le-7-9/le-7-9-27-juin-2016', + 'description': 'md5:fdcb582c370756293a65cdfbc6ecd90e', 'timestamp': 1467011958, - 'upload_date': '20160627', 'uploader': 'France Inter', 'uploader_id': 'x2q2ez', + 'upload_date': '20160627', + 'view_count': int, + 'tags': ['Politique', 'France Inter', '27 juin 2016', 'Linvité de 8h20', 'Cécile Duflot', 'Patrick Cohen'], + 'age_limit': 0, + 'duration': 640, + 'like_count': int, + 'thumbnail': r're:https://[^/?#]+/v/[^/?#]+/x1080', }, 'add_ie': ['Dailymotion'], }, { 'url': 'http://france3-regions.francetvinfo.fr/limousin/emissions/jt-1213-limousin', 'only_matching': True, + }, { + # "
]+href="(?:https?:)?//videos\.francetv\.fr/video/([^@]+@[^"]+)"', - r'data-id=["\']([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'), + r'(?:data-id|[^/?#&]+)' - _TESTS = [{ - 'url': 'https://sport.francetvinfo.fr/les-jeux-olympiques/retour-sur-les-meilleurs-moments-de-pyeongchang-2018', - 'info_dict': { - 'id': '6e49080e-3f45-11e8-b459-000d3a2439ea', - 'ext': 'mp4', - 'title': 'Retour sur les meilleurs moments de Pyeongchang 2018', - 'timestamp': 1523639962, - 'upload_date': '20180413', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - video_id = self._search_regex(r'data-video="([^"]+)"', webpage, 'video_id') - return self._make_url_result(video_id, 'Sport-web') - - -class GenerationWhatIE(InfoExtractor): - IE_NAME = 'france2.fr:generation-what' - _VALID_URL = r'https?://generation-what\.francetv\.fr/[^/]+/video/(?P[^/?#&]+)' - - _TESTS = [{ - 'url': 'http://generation-what.francetv.fr/portrait/video/present-arms', - 'info_dict': { - 'id': 'wtvKYUG45iw', - 'ext': 'mp4', - 'title': 'Generation What - Garde à vous - FRA', - 'uploader': 'Generation What', - 'uploader_id': 'UCHH9p1eetWCgt4kXBYCb3_w', - 'upload_date': '20160411', - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': ['Youtube'], - }, { - 'url': 'http://generation-what.francetv.fr/europe/video/present-arms', - 'only_matching': True, - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - youtube_id = self._search_regex( - r"window\.videoURL\s*=\s*'([0-9A-Za-z_-]{11})';", - webpage, 'youtube id') - - return self.url_result(youtube_id, ie='Youtube', video_id=youtube_id) - - -class CultureboxIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?:[^/]+/)*(?P[^/?#&]+)' - - _TESTS = [{ - 'url': 'https://culturebox.francetvinfo.fr/opera-classique/musique-classique/c-est-baroque/concerts/cantates-bwv-4-106-et-131-de-bach-par-raphael-pichon-57-268689', - 'info_dict': { - 'id': 'EV_134885', - 'ext': 'mp4', - 'title': 'Cantates BWV 4, 106 et 131 de Bach par Raphaël Pichon 5/7', - 'description': 'md5:19c44af004b88219f4daa50fa9a351d4', - 'upload_date': '20180206', - 'timestamp': 1517945220, - 'duration': 5981, - }, - 'params': { - 'skip_download': True, - }, - 'add_ie': [FranceTVIE.ie_key()], - }] - - def _real_extract(self, url): - display_id = self._match_id(url) - - webpage = self._download_webpage(url, display_id) - - if ">Ce live n'est plus disponible en replay<" in webpage: - raise ExtractorError( - 'Video %s is not available' % display_id, expected=True) - - video_id, catalogue = self._search_regex( - r'["\'>]https?://videos\.francetv\.fr/video/([^@]+@.+?)["\'<]', - webpage, 'video id').split('@') - - return self._make_url_result(video_id, catalogue) - - -class FranceTVJeunesseIE(FranceTVBaseInfoExtractor): - _VALID_URL = r'(?Phttps?://(?:www\.)?(?:zouzous|ludo)\.fr/heros/(?P[^/?#&]+))' - - _TESTS = [{ - 'url': 'https://www.zouzous.fr/heros/simon', - 'info_dict': { - 'id': 'simon', - }, - 'playlist_count': 9, - }, { - 'url': 'https://www.ludo.fr/heros/ninjago', - 'info_dict': { - 'id': 'ninjago', - }, - 'playlist_count': 10, - }, { - 'url': 'https://www.zouzous.fr/heros/simon?abc', - 'only_matching': True, - }] - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') - - playlist = self._download_json( - '%s/%s' % (mobj.group('url'), 'playlist'), playlist_id) - - if not playlist.get('count'): - raise ExtractorError( - '%s is not available' % playlist_id, expected=True) - - entries = [] - for item in playlist['items']: - identity = item.get('identity') - if identity and isinstance(identity, compat_str): - entries.append(self._make_url_result(identity)) - - return self.playlist_result(entries, playlist_id) + return self._make_url_result(video_id, url=url)