]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/closertotruth.py
[cleanup] Use `_html_extract_title`
[yt-dlp.git] / yt_dlp / extractor / closertotruth.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7
8
9 class CloserToTruthIE(InfoExtractor):
10 _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
11 _TESTS = [{
12 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
13 'info_dict': {
14 'id': '0_zof1ktre',
15 'display_id': 'solutions-the-mind-body-problem',
16 'ext': 'mov',
17 'title': 'Solutions to the Mind-Body Problem?',
18 'upload_date': '20140221',
19 'timestamp': 1392956007,
20 'uploader_id': 'CTTXML'
21 },
22 'params': {
23 'skip_download': True,
24 },
25 }, {
26 'url': 'http://closertotruth.com/episodes/how-do-brains-work',
27 'info_dict': {
28 'id': '0_iuxai6g6',
29 'display_id': 'how-do-brains-work',
30 'ext': 'mov',
31 'title': 'How do Brains Work?',
32 'upload_date': '20140221',
33 'timestamp': 1392956024,
34 'uploader_id': 'CTTXML'
35 },
36 'params': {
37 'skip_download': True,
38 },
39 }, {
40 'url': 'http://closertotruth.com/interviews/1725',
41 'info_dict': {
42 'id': '1725',
43 'title': 'AyaFr-002',
44 },
45 'playlist_mincount': 2,
46 }]
47
48 def _real_extract(self, url):
49 display_id = self._match_id(url)
50
51 webpage = self._download_webpage(url, display_id)
52
53 partner_id = self._search_regex(
54 r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
55 webpage, 'kaltura partner_id')
56
57 title = self._html_extract_title(webpage, 'video title')
58
59 select = self._search_regex(
60 r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
61 webpage, 'select version', default=None)
62 if select:
63 entry_ids = set()
64 entries = []
65 for mobj in re.finditer(
66 r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
67 webpage):
68 entry_id = mobj.group('id')
69 if entry_id in entry_ids:
70 continue
71 entry_ids.add(entry_id)
72 entries.append({
73 '_type': 'url_transparent',
74 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
75 'ie_key': 'Kaltura',
76 'title': mobj.group('title'),
77 })
78 if entries:
79 return self.playlist_result(entries, display_id, title)
80
81 entry_id = self._search_regex(
82 r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
83 webpage, 'kaltura entry_id', group='id')
84
85 return {
86 '_type': 'url_transparent',
87 'display_id': display_id,
88 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
89 'ie_key': 'Kaltura',
90 'title': title
91 }