]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/camtasia.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / camtasia.py
CommitLineData
5fff2e57 1import os
2import urllib.parse
3
4from .common import InfoExtractor
5from ..utils import float_or_none
6
7
8class CamtasiaEmbedIE(InfoExtractor):
9 _VALID_URL = False
10 _WEBPAGE_TESTS = [
11 {
12 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/',
13 'playlist': [{
14 'md5': '0c5e352edabf715d762b0ad4e6d9ee67',
15 'info_dict': {
16 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
17 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - video1',
18 'ext': 'flv',
19 'duration': 2235.90,
20 }
21 }, {
22 'md5': '10e4bb3aaca9fd630e273ff92d9f3c63',
23 'info_dict': {
24 'id': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final_PIP',
25 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final - pip',
26 'ext': 'flv',
27 'duration': 2235.93,
28 }
29 }],
30 'info_dict': {
31 'title': 'Fenn-AA_PA_Radar_Course_Lecture_1c_Final',
32 },
33 'skip': 'webpage dead'
34 },
35
36 ]
37
38 def _extract_from_webpage(self, url, webpage):
39 camtasia_cfg = self._search_regex(
40 r'fo\.addVariable\(\s*"csConfigFile",\s*"([^"]+)"\s*\);',
41 webpage, 'camtasia configuration file', default=None)
42 if camtasia_cfg is None:
43 return None
44
45 title = self._html_search_meta('DC.title', webpage, fatal=True)
46
47 camtasia_url = urllib.parse.urljoin(url, camtasia_cfg)
48 camtasia_cfg = self._download_xml(
49 camtasia_url, self._generic_id(url),
50 note='Downloading camtasia configuration',
51 errnote='Failed to download camtasia configuration')
52 fileset_node = camtasia_cfg.find('./playlist/array/fileset')
53
54 entries = []
55 for n in fileset_node.getchildren():
56 url_n = n.find('./uri')
57 if url_n is None:
58 continue
59
60 entries.append({
61 'id': os.path.splitext(url_n.text.rpartition('/')[2])[0],
62 'title': f'{title} - {n.tag}',
63 'url': urllib.parse.urljoin(url, url_n.text),
64 'duration': float_or_none(n.find('./duration').text),
65 })
66
67 return {
68 '_type': 'playlist',
69 'entries': entries,
70 'title': title,
71 }