[yt-dlp.git] / yt_dlp / extractor / closertotruth.py

import re

from .common import InfoExtractor


class CloserToTruthIE(InfoExtractor):
    _WORKING = False
    _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
        'info_dict': {
            'id': '0_zof1ktre',
            'display_id': 'solutions-the-mind-body-problem',
            'ext': 'mov',
            'title': 'Solutions to the Mind-Body Problem?',
            'upload_date': '20140221',
            'timestamp': 1392956007,
            'uploader_id': 'CTTXML',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://closertotruth.com/episodes/how-do-brains-work',
        'info_dict': {
            'id': '0_iuxai6g6',
            'display_id': 'how-do-brains-work',
            'ext': 'mov',
            'title': 'How do Brains Work?',
            'upload_date': '20140221',
            'timestamp': 1392956024,
            'uploader_id': 'CTTXML',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://closertotruth.com/interviews/1725',
        'info_dict': {
            'id': '1725',
            'title': 'AyaFr-002',
        },
        'playlist_mincount': 2,
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)

        webpage = self._download_webpage(url, display_id)

        partner_id = self._search_regex(
            r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
            webpage, 'kaltura partner_id')

        title = self._html_extract_title(webpage, 'video title')

        select = self._search_regex(
            r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
            webpage, 'select version', default=None)
        if select:
            entry_ids = set()
            entries = []
            for mobj in re.finditer(
                    r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
                    webpage):
                entry_id = mobj.group('id')
                if entry_id in entry_ids:
                    continue
                entry_ids.add(entry_id)
                entries.append({
                    '_type': 'url_transparent',
                    'url': f'kaltura:{partner_id}:{entry_id}',
                    'ie_key': 'Kaltura',
                    'title': mobj.group('title'),
                })
            if entries:
                return self.playlist_result(entries, display_id, title)

        entry_id = self._search_regex(
            r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
            webpage, 'kaltura entry_id', group='id')

        return {
            '_type': 'url_transparent',
            'display_id': display_id,
            'url': f'kaltura:{partner_id}:{entry_id}',
            'ie_key': 'Kaltura',
            'title': title,
        }
Commit	Line	Data
cb23192b S	1	import re
cb23192b S	2
41c10233 SG	3	from .common import InfoExtractor
	4
	5
	6	class CloserToTruthIE(InfoExtractor):
df773c3d	7	_WORKING = False
cb23192b S	8	_VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
	9	_TESTS = [{
	10	'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
	11	'info_dict': {
	12	'id': '0_zof1ktre',
	13	'display_id': 'solutions-the-mind-body-problem',
	14	'ext': 'mov',
	15	'title': 'Solutions to the Mind-Body Problem?',
	16	'upload_date': '20140221',
	17	'timestamp': 1392956007,
add96eb9	18	'uploader_id': 'CTTXML',
cb23192b S	19	},
	20	'params': {
	21	'skip_download': True,
	22	},
	23	}, {
	24	'url': 'http://closertotruth.com/episodes/how-do-brains-work',
	25	'info_dict': {
	26	'id': '0_iuxai6g6',
	27	'display_id': 'how-do-brains-work',
	28	'ext': 'mov',
	29	'title': 'How do Brains Work?',
	30	'upload_date': '20140221',
	31	'timestamp': 1392956024,
add96eb9	32	'uploader_id': 'CTTXML',
41c10233	33	},
cb23192b S	34	'params': {
cb23192b S	35	'skip_download': True,
41c10233	36	},
cb23192b S	37	}, {
	38	'url': 'http://closertotruth.com/interviews/1725',
	39	'info_dict': {
	40	'id': '1725',
	41	'title': 'AyaFr-002',
41c10233	42	},
cb23192b S	43	'playlist_mincount': 2,
cb23192b S	44	}]
41c10233 SG	45
41c10233 SG	46	def _real_extract(self, url):
cb23192b	47	display_id = self._match_id(url)
41c10233	48
cb23192b	49	webpage = self._download_webpage(url, display_id)
41c10233	50
cb23192b S	51	partner_id = self._search_regex(
	52	r'<script[^>]+src=["\'].*?\b(?:partner_id\|p)/(\d+)',
	53	webpage, 'kaltura partner_id')
41c10233	54
04f3fd2c	55	title = self._html_extract_title(webpage, 'video title')
41c10233	56
cb23192b S	57	select = self._search_regex(
	58	r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
	59	webpage, 'select version', default=None)
	60	if select:
	61	entry_ids = set()
	62	entries = []
	63	for mobj in re.finditer(
	64	r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
	65	webpage):
	66	entry_id = mobj.group('id')
	67	if entry_id in entry_ids:
	68	continue
	69	entry_ids.add(entry_id)
	70	entries.append({
	71	'_type': 'url_transparent',
add96eb9	72	'url': f'kaltura:{partner_id}:{entry_id}',
cb23192b S	73	'ie_key': 'Kaltura',
	74	'title': mobj.group('title'),
	75	})
	76	if entries:
	77	return self.playlist_result(entries, display_id, title)
41c10233	78
cb23192b S	79	entry_id = self._search_regex(
	80	r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
	81	webpage, 'kaltura entry_id', group='id')
41c10233 SG	82
	83	return {
	84	'_type': 'url_transparent',
cb23192b	85	'display_id': display_id,
add96eb9	86	'url': f'kaltura:{partner_id}:{entry_id}',
41c10233	87	'ie_key': 'Kaltura',
add96eb9	88	'title': title,
41c10233	89	}