[yt-dlp.git] / yt_dlp / extractor / nzz.py

import re

from .common import InfoExtractor
from ..utils import (
    extract_attributes,
)


class NZZIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
        'info_dict': {
            'id': '9153',
        },
        'playlist_mincount': 6,
    }, {
        'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
        'info_dict': {
            'id': '1368112',
        },
        'playlist_count': 1,
    }]

    def _real_extract(self, url):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)

        entries = []
        for player_element in re.findall(
                r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
            player_params = extract_attributes(player_element)
            if player_params.get('data-type') not in ('kaltura_singleArticle',):
                self.report_warning('Unsupported player type')
                continue
            entry_id = player_params['data-id']
            entries.append(self.url_result(
                'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))

        return self.playlist_result(entries, page_id)
Commit	Line	Data
33898fb1 RA	1	import re
	2
	3	from .common import InfoExtractor
	4	from ..utils import (
	5	extract_attributes,
	6	)
	7
	8
	9	class NZZIE(InfoExtractor):
	10	_VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
15ed5a27	11	_TESTS = [{
33898fb1 RA	12	'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
	13	'info_dict': {
	14	'id': '9153',
	15	},
	16	'playlist_mincount': 6,
15ed5a27 AS	17	}, {
	18	'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
	19	'info_dict': {
	20	'id': '1368112',
	21	},
	22	'playlist_count': 1,
	23	}]
33898fb1 RA	24
	25	def _real_extract(self, url):
	26	page_id = self._match_id(url)
	27	webpage = self._download_webpage(url, page_id)
	28
	29	entries = []
15ed5a27 AS	30	for player_element in re.findall(
15ed5a27 AS	31	r'(<[^>]+class="kalturaPlayer[^"]"[^>]>)', webpage):
33898fb1 RA	32	player_params = extract_attributes(player_element)
	33	if player_params.get('data-type') not in ('kaltura_singleArticle',):
	34	self.report_warning('Unsupported player type')
	35	continue
	36	entry_id = player_params['data-id']
	37	entries.append(self.url_result(
	38	'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
	39
	40	return self.playlist_result(entries, page_id)