[yt-dlp.git] / yt_dlp / extractor / arte.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_str,
)
from ..utils import (
    ExtractorError,
    int_or_none,
    parse_qs,
    qualities,
    strip_or_none,
    try_get,
    unified_strdate,
    url_or_none,
)


class ArteTVBaseIE(InfoExtractor):
    _ARTE_LANGUAGES = 'fr|de|en|es|it|pl'
    _API_BASE = 'https://api.arte.tv/api/player/v1'


class ArteTVIE(ArteTVBaseIE):
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
                            (?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos|
                            api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
                        )
                        /(?P<id>\d{6}-\d{3}-[AF])
                    ''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
    _TESTS = [{
        'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
        'info_dict': {
            'id': '088501-000-A',
            'ext': 'mp4',
            'title': 'Mexico: Stealing Petrol to Survive',
            'upload_date': '20190628',
        },
    }, {
        'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
        'only_matching': True,
    }, {
        'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        mobj = self._match_valid_url(url)
        video_id = mobj.group('id')
        lang = mobj.group('lang') or mobj.group('lang_2')

        info = self._download_json(
            '%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
        player_info = info['videoJsonPlayer']

        vsr = try_get(player_info, lambda x: x['VSR'], dict)
        if not vsr:
            error = None
            if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
                error = try_get(
                    player_info, lambda x: x['custom_msg']['msg'], compat_str)
            if not error:
                error = 'Video %s is not available' % player_info.get('VID') or video_id
            raise ExtractorError(error, expected=True)

        upload_date_str = player_info.get('shootingDate')
        if not upload_date_str:
            upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]

        title = (player_info.get('VTI') or player_info['VID']).strip()
        subtitle = player_info.get('VSU', '').strip()
        if subtitle:
            title += ' - %s' % subtitle

        qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])

        LANGS = {
            'fr': 'F',
            'de': 'A',
            'en': 'E[ANG]',
            'es': 'E[ESP]',
            'it': 'E[ITA]',
            'pl': 'E[POL]',
        }

        langcode = LANGS.get(lang, lang)

        formats = []
        for format_id, format_dict in vsr.items():
            f = dict(format_dict)
            format_url = url_or_none(f.get('url'))
            streamer = f.get('streamer')
            if not format_url and not streamer:
                continue
            versionCode = f.get('versionCode')
            l = re.escape(langcode)

            # Language preference from most to least priority
            # Reference: section 6.8 of
            # https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
            PREFERENCES = (
                # original version in requested language, without subtitles
                r'VO{0}$'.format(l),
                # original version in requested language, with partial subtitles in requested language
                r'VO{0}-ST{0}$'.format(l),
                # original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
                r'VO{0}-STM{0}$'.format(l),
                # non-original (dubbed) version in requested language, without subtitles
                r'V{0}$'.format(l),
                # non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
                r'V{0}-ST{0}$'.format(l),
                # non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
                r'V{0}-STM{0}$'.format(l),
                # original version in requested language, with partial subtitles in different language
                r'VO{0}-ST(?!{0}).+?$'.format(l),
                # original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
                r'VO{0}-STM(?!{0}).+?$'.format(l),
                # original version in different language, with partial subtitles in requested language
                r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
                # original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
                r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
                # original version in different language, without subtitles
                r'VO(?:(?!{0}))?$'.format(l),
                # original version in different language, with partial subtitles in different language
                r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
                # original version in different language, with subtitles for the deaf and hard-of-hearing in different language
                r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
            )

            for pref, p in enumerate(PREFERENCES):
                if re.match(p, versionCode):
                    lang_pref = len(PREFERENCES) - pref
                    break
            else:
                lang_pref = -1

            media_type = f.get('mediaType')
            if media_type == 'hls':
                m3u8_formats = self._extract_m3u8_formats(
                    format_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id=format_id, fatal=False)
                for m3u8_format in m3u8_formats:
                    m3u8_format['language_preference'] = lang_pref
                formats.extend(m3u8_formats)
                continue

            format = {
                'format_id': format_id,
                'language_preference': lang_pref,
                'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
                'width': int_or_none(f.get('width')),
                'height': int_or_none(f.get('height')),
                'tbr': int_or_none(f.get('bitrate')),
                'quality': qfunc(f.get('quality')),
            }

            if media_type == 'rtmp':
                format['url'] = f['streamer']
                format['play_path'] = 'mp4:' + f['url']
                format['ext'] = 'flv'
            else:
                format['url'] = f['url']

            formats.append(format)

        # For this extractor, quality only represents the relative quality
        # with respect to other formats with the same resolution
        self._sort_formats(formats, ('res', 'quality'))

        return {
            'id': player_info.get('VID') or video_id,
            'title': title,
            'description': player_info.get('VDE') or player_info.get('V7T'),
            'upload_date': unified_strdate(upload_date_str),
            'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
            'formats': formats,
        }


class ArteTVEmbedIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
    _TESTS = [{
        'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
        'info_dict': {
            'id': '100605-013-A',
            'ext': 'mp4',
            'title': 'United we Stream November Lockdown Edition #13',
            'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
            'upload_date': '20201116',
        },
    }, {
        'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
        'only_matching': True,
    }]

    @staticmethod
    def _extract_urls(webpage):
        return [url for _, url in re.findall(
            r'<(?:iframe|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
            webpage)]

    def _real_extract(self, url):
        qs = parse_qs(url)
        json_url = qs['json_url'][0]
        video_id = ArteTVIE._match_id(json_url)
        return self.url_result(
            json_url, ie=ArteTVIE.ie_key(), video_id=video_id)


class ArteTVPlaylistIE(ArteTVBaseIE):
    _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
    _TESTS = [{
        'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
        'info_dict': {
            'id': 'RC-016954',
            'title': 'Earn a Living',
            'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
        },
        'playlist_mincount': 6,
    }, {
        'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        lang, playlist_id = self._match_valid_url(url).groups()
        collection = self._download_json(
            '%s/collectionData/%s/%s?source=videos'
            % (self._API_BASE, lang, playlist_id), playlist_id)
        entries = []
        for video in collection['videos']:
            if not isinstance(video, dict):
                continue
            video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
            if not video_url:
                continue
            video_id = video.get('programId')
            entries.append({
                '_type': 'url_transparent',
                'url': video_url,
                'id': video_id,
                'title': video.get('title'),
                'alt_title': video.get('subtitle'),
                'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
                'duration': int_or_none(video.get('durationSeconds')),
                'view_count': int_or_none(video.get('views')),
                'ie_key': ArteTVIE.ie_key(),
            })
        title = collection.get('title')
        description = collection.get('shortDescription') or collection.get('teaserText')
        return self.playlist_result(entries, playlist_id, title, description)


class ArteTVCategoryIE(ArteTVBaseIE):
    _VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+)*)/?\s*$' % ArteTVBaseIE._ARTE_LANGUAGES
    _TESTS = [{
        'url': 'https://www.arte.tv/en/videos/politics-and-society/',
        'info_dict': {
            'id': 'politics-and-society',
            'title': 'Politics and society',
            'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
        },
        'playlist_mincount': 13,
    },
    ]

    @classmethod
    def suitable(cls, url):
        return (
            not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
            and super(ArteTVCategoryIE, cls).suitable(url))

    def _real_extract(self, url):
        lang, playlist_id = self._match_valid_url(url).groups()
        webpage = self._download_webpage(url, playlist_id)

        items = []
        for video in re.finditer(
                r'<a\b[^>]*?href\s*=\s*(?P<q>"|\'|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
                webpage):
            video = video.group('url')
            if video == url:
                continue
            if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
                items.append(video)

        title = (self._og_search_title(webpage, default=None)
                 or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
        title = strip_or_none(title.rsplit('|', 1)[0]) or self._generic_title(url)

        return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
                                          description=self._og_search_description(webpage, default=None))
Commit	Line	Data
dcdb292f	1	# coding: utf-8
3798eadc PH	2	from __future__ import unicode_literals
3798eadc PH	3
d5822b96	4	import re
d5822b96 PH	5
d5822b96 PH	6	from .common import InfoExtractor
8bdd16b4	7	from ..compat import (
8bdd16b4	8	compat_str,
8bdd16b4	9	)
d5822b96	10	from ..utils import (
c0892b2b	11	ExtractorError,
d24a2b20	12	int_or_none,
4dfbf869	13	parse_qs,
aff2f4f4	14	qualities,
50e93e03	15	strip_or_none,
8cc1840c	16	try_get,
c0892b2b	17	unified_strdate,
8bdd16b4	18	url_or_none,
d5822b96 PH	19	)
d5822b96 PH	20
d5822b96	21
6e6b9f60	22	class ArteTVBaseIE(InfoExtractor):
8bdd16b4	23	_ARTE_LANGUAGES = 'fr\|de\|en\|es\|it\|pl'
	24	_API_BASE = 'https://api.arte.tv/api/player/v1'
	25
	26
	27	class ArteTVIE(ArteTVBaseIE):
	28	_VALID_URL = r'''(?x)
	29	https?://
	30	(?:
	31	(?:www\.)?arte\.tv/(?P<lang>%(langs)s)/videos\|
	32	api\.arte\.tv/api/player/v\d+/config/(?P<lang_2>%(langs)s)
	33	)
	34	/(?P<id>\d{6}-\d{3}-[AF])
	35	''' % {'langs': ArteTVBaseIE._ARTE_LANGUAGES}
	36	_TESTS = [{
	37	'url': 'https://www.arte.tv/en/videos/088501-000-A/mexico-stealing-petrol-to-survive/',
	38	'info_dict': {
	39	'id': '088501-000-A',
	40	'ext': 'mp4',
	41	'title': 'Mexico: Stealing Petrol to Survive',
	42	'upload_date': '20190628',
	43	},
	44	}, {
	45	'url': 'https://www.arte.tv/pl/videos/100103-000-A/usa-dyskryminacja-na-porodowce/',
	46	'only_matching': True,
	47	}, {
	48	'url': 'https://api.arte.tv/api/player/v2/config/de/100605-013-A',
	49	'only_matching': True,
	50	}]
	51
	52	def _real_extract(self, url):
5ad28e7f	53	mobj = self._match_valid_url(url)
8bdd16b4	54	video_id = mobj.group('id')
	55	lang = mobj.group('lang') or mobj.group('lang_2')
	56
	57	info = self._download_json(
	58	'%s/config/%s/%s' % (self._API_BASE, lang, video_id), video_id)
c40f5cf4 JMF	59	player_info = info['videoJsonPlayer']
c40f5cf4 JMF	60
8cc1840c	61	vsr = try_get(player_info, lambda x: x['VSR'], dict)
6348671c	62	if not vsr:
8cc1840c S	63	error = None
	64	if try_get(player_info, lambda x: x['custom_msg']['type']) == 'error':
	65	error = try_get(
	66	player_info, lambda x: x['custom_msg']['msg'], compat_str)
	67	if not error:
	68	error = 'Video %s is not available' % player_info.get('VID') or video_id
	69	raise ExtractorError(error, expected=True)
c0892b2b	70
99b67fec PH	71	upload_date_str = player_info.get('shootingDate')
99b67fec PH	72	if not upload_date_str:
8bbd3d14	73	upload_date_str = (player_info.get('VRA') or player_info.get('VDA') or '').split(' ')[0]
99b67fec	74
8bdd16b4	75	title = (player_info.get('VTI') or player_info['VID']).strip()
74214d35 S	76	subtitle = player_info.get('VSU', '').strip()
	77	if subtitle:
	78	title += ' - %s' % subtitle
	79
ff0f4cfe	80	qfunc = qualities(['MQ', 'HQ', 'EQ', 'SQ'])
c40f5cf4	81
08d65046 S	82	LANGS = {
	83	'fr': 'F',
	84	'de': 'A',
	85	'en': 'E[ANG]',
	86	'es': 'E[ESP]',
ff0f4cfe RA	87	'it': 'E[ITA]',
ff0f4cfe RA	88	'pl': 'E[POL]',
08d65046 S	89	}
08d65046 S	90
9c072d38 S	91	langcode = LANGS.get(lang, lang)
9c072d38 S	92
aff2f4f4	93	formats = []
c0892b2b	94	for format_id, format_dict in vsr.items():
aff2f4f4	95	f = dict(format_dict)
8bdd16b4	96	format_url = url_or_none(f.get('url'))
	97	streamer = f.get('streamer')
	98	if not format_url and not streamer:
	99	continue
aff2f4f4	100	versionCode = f.get('versionCode')
9c072d38 S	101	l = re.escape(langcode)
	102
	103	# Language preference from most to least priority
ff0f4cfe RA	104	# Reference: section 6.8 of
ff0f4cfe RA	105	# https://www.arte.tv/sites/en/corporate/files/complete-technical-guidelines-arte-geie-v1-07-1.pdf
9c072d38 S	106	PREFERENCES = (
	107	# original version in requested language, without subtitles
	108	r'VO{0}$'.format(l),
	109	# original version in requested language, with partial subtitles in requested language
	110	r'VO{0}-ST{0}$'.format(l),
	111	# original version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
	112	r'VO{0}-STM{0}$'.format(l),
	113	# non-original (dubbed) version in requested language, without subtitles
	114	r'V{0}$'.format(l),
	115	# non-original (dubbed) version in requested language, with subtitles partial subtitles in requested language
	116	r'V{0}-ST{0}$'.format(l),
	117	# non-original (dubbed) version in requested language, with subtitles for the deaf and hard-of-hearing in requested language
	118	r'V{0}-STM{0}$'.format(l),
	119	# original version in requested language, with partial subtitles in different language
	120	r'VO{0}-ST(?!{0}).+?$'.format(l),
	121	# original version in requested language, with subtitles for the deaf and hard-of-hearing in different language
	122	r'VO{0}-STM(?!{0}).+?$'.format(l),
	123	# original version in different language, with partial subtitles in requested language
	124	r'VO(?:(?!{0}).+?)?-ST{0}$'.format(l),
	125	# original version in different language, with subtitles for the deaf and hard-of-hearing in requested language
	126	r'VO(?:(?!{0}).+?)?-STM{0}$'.format(l),
	127	# original version in different language, without subtitles
	128	r'VO(?:(?!{0}))?$'.format(l),
	129	# original version in different language, with partial subtitles in different language
	130	r'VO(?:(?!{0}).+?)?-ST(?!{0}).+?$'.format(l),
	131	# original version in different language, with subtitles for the deaf and hard-of-hearing in different language
	132	r'VO(?:(?!{0}).+?)?-STM(?!{0}).+?$'.format(l),
	133	)
	134
	135	for pref, p in enumerate(PREFERENCES):
	136	if re.match(p, versionCode):
	137	lang_pref = len(PREFERENCES) - pref
	138	break
	139	else:
	140	lang_pref = -1
	141
8bdd16b4	142	media_type = f.get('mediaType')
	143	if media_type == 'hls':
	144	m3u8_formats = self._extract_m3u8_formats(
	145	format_url, video_id, 'mp4', entry_protocol='m3u8_native',
	146	m3u8_id=format_id, fatal=False)
	147	for m3u8_format in m3u8_formats:
	148	m3u8_format['language_preference'] = lang_pref
	149	formats.extend(m3u8_formats)
	150	continue
	151
aff2f4f4 PH	152	format = {
aff2f4f4 PH	153	'format_id': format_id,
aff2f4f4 PH	154	'language_preference': lang_pref,
	155	'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
	156	'width': int_or_none(f.get('width')),
	157	'height': int_or_none(f.get('height')),
	158	'tbr': int_or_none(f.get('bitrate')),
1b7b1d6e	159	'quality': qfunc(f.get('quality')),
c40f5cf4	160	}
aff2f4f4	161
8bdd16b4	162	if media_type == 'rtmp':
aff2f4f4 PH	163	format['url'] = f['streamer']
	164	format['play_path'] = 'mp4:' + f['url']
	165	format['ext'] = 'flv'
c40f5cf4	166	else:
aff2f4f4 PH	167	format['url'] = f['url']
	168
	169	formats.append(format)
	170
dca3ff4a	171	# For this extractor, quality only represents the relative quality
	172	# with respect to other formats with the same resolution
	173	self._sort_formats(formats, ('res', 'quality'))
c40f5cf4	174
8bdd16b4	175	return {
	176	'id': player_info.get('VID') or video_id,
	177	'title': title,
e27cc5d8	178	'description': player_info.get('VDE') or player_info.get('V7T'),
8bdd16b4	179	'upload_date': unified_strdate(upload_date_str),
	180	'thumbnail': player_info.get('programImage') or player_info.get('VTU', {}).get('IUR'),
	181	'formats': formats,
	182	}
c40f5cf4	183
24114fee	184
8bdd16b4	185	class ArteTVEmbedIE(InfoExtractor):
8bdd16b4	186	_VALID_URL = r'https?://(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+'
9c54ae33	187	_TESTS = [{
8bdd16b4	188	'url': 'https://www.arte.tv/player/v5/index.php?json_url=https%3A%2F%2Fapi.arte.tv%2Fapi%2Fplayer%2Fv2%2Fconfig%2Fde%2F100605-013-A&lang=de&autoplay=true&mute=0100605-013-A',
9c54ae33	189	'info_dict': {
8bdd16b4	190	'id': '100605-013-A',
9c54ae33	191	'ext': 'mp4',
8bdd16b4	192	'title': 'United we Stream November Lockdown Edition #13',
	193	'description': 'md5:be40b667f45189632b78c1425c7c2ce1',
	194	'upload_date': '20201116',
69a0c470	195	},
8bdd16b4	196	}, {
	197	'url': 'https://www.arte.tv/player/v3/index.php?json_url=https://api.arte.tv/api/player/v2/config/de/100605-013-A',
	198	'only_matching': True,
9c54ae33	199	}]
56a8ab7d	200
8bdd16b4	201	@staticmethod
	202	def _extract_urls(webpage):
	203	return [url for _, url in re.findall(
	204	r'<(?:iframe\|script)[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?arte\.tv/player/v\d+/index\.php\?.*?\bjson_url=.+?)\1',
	205	webpage)]
6e6b9f60	206
893f8832	207	def _real_extract(self, url):
4dfbf869	208	qs = parse_qs(url)
8bdd16b4	209	json_url = qs['json_url'][0]
	210	video_id = ArteTVIE._match_id(json_url)
	211	return self.url_result(
	212	json_url, ie=ArteTVIE.ie_key(), video_id=video_id)
4b492e35 S	213
4b492e35 S	214
6e6b9f60	215	class ArteTVPlaylistIE(ArteTVBaseIE):
8bdd16b4	216	_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>RC-\d{6})' % ArteTVBaseIE._ARTE_LANGUAGES
6e6b9f60	217	_TESTS = [{
ff0f4cfe	218	'url': 'https://www.arte.tv/en/videos/RC-016954/earn-a-living/',
6e6b9f60	219	'info_dict': {
ff0f4cfe RA	220	'id': 'RC-016954',
	221	'title': 'Earn a Living',
	222	'description': 'md5:d322c55011514b3a7241f7fb80d494c2',
6e6b9f60 S	223	},
6e6b9f60 S	224	'playlist_mincount': 6,
8bdd16b4	225	}, {
	226	'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/',
	227	'only_matching': True,
6e6b9f60 S	228	}]
	229
	230	def _real_extract(self, url):
5ad28e7f	231	lang, playlist_id = self._match_valid_url(url).groups()
6e6b9f60	232	collection = self._download_json(
8bdd16b4	233	'%s/collectionData/%s/%s?source=videos'
	234	% (self._API_BASE, lang, playlist_id), playlist_id)
	235	entries = []
	236	for video in collection['videos']:
	237	if not isinstance(video, dict):
	238	continue
	239	video_url = url_or_none(video.get('url')) or url_or_none(video.get('jsonUrl'))
	240	if not video_url:
	241	continue
	242	video_id = video.get('programId')
	243	entries.append({
	244	'_type': 'url_transparent',
	245	'url': video_url,
	246	'id': video_id,
	247	'title': video.get('title'),
	248	'alt_title': video.get('subtitle'),
	249	'thumbnail': url_or_none(try_get(video, lambda x: x['mainImage']['url'], compat_str)),
	250	'duration': int_or_none(video.get('durationSeconds')),
	251	'view_count': int_or_none(video.get('views')),
	252	'ie_key': ArteTVIE.ie_key(),
	253	})
6e6b9f60 S	254	title = collection.get('title')
6e6b9f60 S	255	description = collection.get('shortDescription') or collection.get('teaserText')
6e6b9f60	256	return self.playlist_result(entries, playlist_id, title, description)
50e93e03	257
	258
	259	class ArteTVCategoryIE(ArteTVBaseIE):
	260	_VALID_URL = r'https?://(?:www\.)?arte\.tv/(?P<lang>%s)/videos/(?P<id>[\w-]+(?:/[\w-]+))/?\s$' % ArteTVBaseIE._ARTE_LANGUAGES
	261	_TESTS = [{
	262	'url': 'https://www.arte.tv/en/videos/politics-and-society/',
	263	'info_dict': {
	264	'id': 'politics-and-society',
	265	'title': 'Politics and society',
	266	'description': 'Investigative documentary series, geopolitical analysis, and international commentary',
	267	},
	268	'playlist_mincount': 13,
	269	},
	270	]
	271
	272	@classmethod
	273	def suitable(cls, url):
	274	return (
	275	not any(ie.suitable(url) for ie in (ArteTVIE, ArteTVPlaylistIE, ))
	276	and super(ArteTVCategoryIE, cls).suitable(url))
	277
	278	def _real_extract(self, url):
	279	lang, playlist_id = self._match_valid_url(url).groups()
	280	webpage = self._download_webpage(url, playlist_id)
	281
	282	items = []
	283	for video in re.finditer(
	284	r'<a\b[^>]?href\s=\s*(?P<q>"\|\'\|\b)(?P<url>https?://www\.arte\.tv/%s/videos/[\w/-]+)(?P=q)' % lang,
	285	webpage):
	286	video = video.group('url')
	287	if video == url:
	288	continue
	289	if any(ie.suitable(video) for ie in (ArteTVIE, ArteTVPlaylistIE, )):
	290	items.append(video)
	291
	292	title = (self._og_search_title(webpage, default=None)
	293	or self._html_search_regex(r'<title\b[^>]*>([^<]+)</title>', default=None))
	294	title = strip_or_none(title.rsplit('\|', 1)[0]) or self._generic_title(url)
	295
	296	return self.playlist_from_matches(items, playlist_id=playlist_id, playlist_title=title,
	297	description=self._og_search_description(webpage, default=None))