jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/closertotruth.py

... / ...

Commit	Line	Data
	1	import re
	2
	3	from .common import InfoExtractor
	4
	5
	6	class CloserToTruthIE(InfoExtractor):
	7	_WORKING = False
	8	_VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
	9	_TESTS = [{
	10	'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
	11	'info_dict': {
	12	'id': '0_zof1ktre',
	13	'display_id': 'solutions-the-mind-body-problem',
	14	'ext': 'mov',
	15	'title': 'Solutions to the Mind-Body Problem?',
	16	'upload_date': '20140221',
	17	'timestamp': 1392956007,
	18	'uploader_id': 'CTTXML'
	19	},
	20	'params': {
	21	'skip_download': True,
	22	},
	23	}, {
	24	'url': 'http://closertotruth.com/episodes/how-do-brains-work',
	25	'info_dict': {
	26	'id': '0_iuxai6g6',
	27	'display_id': 'how-do-brains-work',
	28	'ext': 'mov',
	29	'title': 'How do Brains Work?',
	30	'upload_date': '20140221',
	31	'timestamp': 1392956024,
	32	'uploader_id': 'CTTXML'
	33	},
	34	'params': {
	35	'skip_download': True,
	36	},
	37	}, {
	38	'url': 'http://closertotruth.com/interviews/1725',
	39	'info_dict': {
	40	'id': '1725',
	41	'title': 'AyaFr-002',
	42	},
	43	'playlist_mincount': 2,
	44	}]
	45
	46	def _real_extract(self, url):
	47	display_id = self._match_id(url)
	48
	49	webpage = self._download_webpage(url, display_id)
	50
	51	partner_id = self._search_regex(
	52	r'<script[^>]+src=["\'].*?\b(?:partner_id\|p)/(\d+)',
	53	webpage, 'kaltura partner_id')
	54
	55	title = self._html_extract_title(webpage, 'video title')
	56
	57	select = self._search_regex(
	58	r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
	59	webpage, 'select version', default=None)
	60	if select:
	61	entry_ids = set()
	62	entries = []
	63	for mobj in re.finditer(
	64	r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
	65	webpage):
	66	entry_id = mobj.group('id')
	67	if entry_id in entry_ids:
	68	continue
	69	entry_ids.add(entry_id)
	70	entries.append({
	71	'_type': 'url_transparent',
	72	'url': 'kaltura:%s:%s' % (partner_id, entry_id),
	73	'ie_key': 'Kaltura',
	74	'title': mobj.group('title'),
	75	})
	76	if entries:
	77	return self.playlist_result(entries, display_id, title)
	78
	79	entry_id = self._search_regex(
	80	r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
	81	webpage, 'kaltura entry_id', group='id')
	82
	83	return {
	84	'_type': 'url_transparent',
	85	'display_id': display_id,
	86	'url': 'kaltura:%s:%s' % (partner_id, entry_id),
	87	'ie_key': 'Kaltura',
	88	'title': title
	89	}