[yt-dlp.git] / youtube_dlc / extractor / closertotruth.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor


class CloserToTruthIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
    _TESTS = [{
        'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
        'info_dict': {
            'id': '0_zof1ktre',
            'display_id': 'solutions-the-mind-body-problem',
            'ext': 'mov',
            'title': 'Solutions to the Mind-Body Problem?',
            'upload_date': '20140221',
            'timestamp': 1392956007,
            'uploader_id': 'CTTXML'
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://closertotruth.com/episodes/how-do-brains-work',
        'info_dict': {
            'id': '0_iuxai6g6',
            'display_id': 'how-do-brains-work',
            'ext': 'mov',
            'title': 'How do Brains Work?',
            'upload_date': '20140221',
            'timestamp': 1392956024,
            'uploader_id': 'CTTXML'
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://closertotruth.com/interviews/1725',
        'info_dict': {
            'id': '1725',
            'title': 'AyaFr-002',
        },
        'playlist_mincount': 2,
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)

        webpage = self._download_webpage(url, display_id)

        partner_id = self._search_regex(
            r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
            webpage, 'kaltura partner_id')

        title = self._search_regex(
            r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')

        select = self._search_regex(
            r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
            webpage, 'select version', default=None)
        if select:
            entry_ids = set()
            entries = []
            for mobj in re.finditer(
                    r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
                    webpage):
                entry_id = mobj.group('id')
                if entry_id in entry_ids:
                    continue
                entry_ids.add(entry_id)
                entries.append({
                    '_type': 'url_transparent',
                    'url': 'kaltura:%s:%s' % (partner_id, entry_id),
                    'ie_key': 'Kaltura',
                    'title': mobj.group('title'),
                })
            if entries:
                return self.playlist_result(entries, display_id, title)

        entry_id = self._search_regex(
            r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
            webpage, 'kaltura entry_id', group='id')

        return {
            '_type': 'url_transparent',
            'display_id': display_id,
            'url': 'kaltura:%s:%s' % (partner_id, entry_id),
            'ie_key': 'Kaltura',
            'title': title
        }
Commit	Line	Data
41c10233 SG	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
cb23192b S	4	import re
cb23192b S	5
41c10233 SG	6	from .common import InfoExtractor
	7
	8
	9	class CloserToTruthIE(InfoExtractor):
cb23192b S	10	_VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
	11	_TESTS = [{
	12	'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
	13	'info_dict': {
	14	'id': '0_zof1ktre',
	15	'display_id': 'solutions-the-mind-body-problem',
	16	'ext': 'mov',
	17	'title': 'Solutions to the Mind-Body Problem?',
	18	'upload_date': '20140221',
	19	'timestamp': 1392956007,
	20	'uploader_id': 'CTTXML'
	21	},
	22	'params': {
	23	'skip_download': True,
	24	},
	25	}, {
	26	'url': 'http://closertotruth.com/episodes/how-do-brains-work',
	27	'info_dict': {
	28	'id': '0_iuxai6g6',
	29	'display_id': 'how-do-brains-work',
	30	'ext': 'mov',
	31	'title': 'How do Brains Work?',
	32	'upload_date': '20140221',
	33	'timestamp': 1392956024,
	34	'uploader_id': 'CTTXML'
41c10233	35	},
cb23192b S	36	'params': {
cb23192b S	37	'skip_download': True,
41c10233	38	},
cb23192b S	39	}, {
	40	'url': 'http://closertotruth.com/interviews/1725',
	41	'info_dict': {
	42	'id': '1725',
	43	'title': 'AyaFr-002',
41c10233	44	},
cb23192b S	45	'playlist_mincount': 2,
cb23192b S	46	}]
41c10233 SG	47
41c10233 SG	48	def _real_extract(self, url):
cb23192b	49	display_id = self._match_id(url)
41c10233	50
cb23192b	51	webpage = self._download_webpage(url, display_id)
41c10233	52
cb23192b S	53	partner_id = self._search_regex(
	54	r'<script[^>]+src=["\'].*?\b(?:partner_id\|p)/(\d+)',
	55	webpage, 'kaltura partner_id')
41c10233	56
cb23192b S	57	title = self._search_regex(
cb23192b S	58	r'<title>(.+?)\s\\|\s.+?</title>', webpage, 'video title')
41c10233	59
cb23192b S	60	select = self._search_regex(
	61	r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
	62	webpage, 'select version', default=None)
	63	if select:
	64	entry_ids = set()
	65	entries = []
	66	for mobj in re.finditer(
	67	r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
	68	webpage):
	69	entry_id = mobj.group('id')
	70	if entry_id in entry_ids:
	71	continue
	72	entry_ids.add(entry_id)
	73	entries.append({
	74	'_type': 'url_transparent',
	75	'url': 'kaltura:%s:%s' % (partner_id, entry_id),
	76	'ie_key': 'Kaltura',
	77	'title': mobj.group('title'),
	78	})
	79	if entries:
	80	return self.playlist_result(entries, display_id, title)
41c10233	81
cb23192b S	82	entry_id = self._search_regex(
	83	r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
	84	webpage, 'kaltura entry_id', group='id')
41c10233 SG	85
	86	return {
	87	'_type': 'url_transparent',
cb23192b S	88	'display_id': display_id,
cb23192b S	89	'url': 'kaltura:%s:%s' % (partner_id, entry_id),
41c10233	90	'ie_key': 'Kaltura',
cb23192b	91	'title': title
41c10233	92	}