]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/screencast.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / screencast.py
CommitLineData
ac668111 1import urllib.request
2
38ad119f 3from .common import InfoExtractor
ac668111 4from ..compat import compat_parse_qs
5from ..utils import ExtractorError
38ad119f
PH
6
7
8class ScreencastIE(InfoExtractor):
83cedc1c 9 _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
40c696e5 10 _TESTS = [{
38ad119f
PH
11 'url': 'http://www.screencast.com/t/3ZEjQXlT',
12 'md5': '917df1c13798a3e96211dd1561fded83',
13 'info_dict': {
14 'id': '3ZEjQXlT',
15 'ext': 'm4v',
16 'title': 'Color Measurement with Ocean Optics Spectrometers',
17 'description': 'md5:240369cde69d8bed61349a199c5fb153',
ec85ded8 18 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
38ad119f 19 }
40c696e5
PH
20 }, {
21 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI',
22 'md5': 'e8e4b375a7660a9e7e35c33973410d34',
23 'info_dict': {
24 'id': 'V2uXehPJa1ZI',
25 'ext': 'mov',
26 'title': 'The Amadeus Spectrometer',
27 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit',
ec85ded8 28 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
40c696e5
PH
29 }
30 }, {
81de73e5 31 'url': 'http://www.screencast.com/t/aAB3iowa',
40c696e5
PH
32 'md5': 'dedb2734ed00c9755761ccaee88527cd',
33 'info_dict': {
34 'id': 'aAB3iowa',
35 'ext': 'mp4',
36 'title': 'Google Earth Export',
37 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.',
ec85ded8 38 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
40c696e5 39 }
c961a0e6
S
40 }, {
41 'url': 'http://www.screencast.com/t/X3ddTrYh',
42 'md5': '669ee55ff9c51988b4ebc0877cc8b159',
43 'info_dict': {
44 'id': 'X3ddTrYh',
45 'ext': 'wmv',
46 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression',
47 'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
ec85ded8 48 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
c961a0e6 49 }
81de73e5
S
50 }, {
51 'url': 'http://screencast.com/t/aAB3iowa',
52 'only_matching': True,
53 }]
38ad119f
PH
54
55 def _real_extract(self, url):
1cc79574 56 video_id = self._match_id(url)
38ad119f
PH
57 webpage = self._download_webpage(url, video_id)
58
40c696e5
PH
59 video_url = self._html_search_regex(
60 r'<embed name="Video".*?src="([^"]+)"', webpage,
61 'QuickTime embed', default=None)
62
63 if video_url is None:
64 flash_vars_s = self._html_search_regex(
65 r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars',
66 default=None)
c961a0e6
S
67 if not flash_vars_s:
68 flash_vars_s = self._html_search_regex(
69 r'<param name="initParams" value="([^"]+)"', webpage, 'flash vars',
70 default=None)
71 if flash_vars_s:
72 flash_vars_s = flash_vars_s.replace(',', '&')
40c696e5
PH
73 if flash_vars_s:
74 flash_vars = compat_parse_qs(flash_vars_s)
ac668111 75 video_url_raw = urllib.request.quote(
40c696e5
PH
76 flash_vars['content'][0])
77 video_url = video_url_raw.replace('http%3A', 'http:')
78
79 if video_url is None:
80 video_meta = self._html_search_meta(
81 'og:video', webpage, default=None)
82 if video_meta:
83 video_url = self._search_regex(
84 r'src=(.*?)(?:$|&)', video_meta,
85 'meta tag video URL', default=None)
86
1fafb329 87 if video_url is None:
88 video_url = self._html_search_regex(
a1d1c636
S
89 r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1',
90 webpage, 'video url', default=None, group='url')
1fafb329 91
92 if video_url is None:
93 video_url = self._html_search_meta(
94 'og:video', webpage, default=None)
95
40c696e5
PH
96 if video_url is None:
97 raise ExtractorError('Cannot find video')
38ad119f 98
40c696e5
PH
99 title = self._og_search_title(webpage, default=None)
100 if title is None:
101 title = self._html_search_regex(
75d572e9
S
102 [r'<b>Title:</b> ([^<]+)</div>',
103 r'class="tabSeperator">></span><span class="tabText">(.+?)<',
104 r'<title>([^<]+)</title>'],
40c696e5
PH
105 webpage, 'title')
106 thumbnail = self._og_search_thumbnail(webpage)
107 description = self._og_search_description(webpage, default=None)
108 if description is None:
109 description = self._html_search_meta('description', webpage)
38ad119f
PH
110
111 return {
112 'id': video_id,
113 'url': video_url,
114 'title': title,
115 'description': description,
116 'thumbnail': thumbnail,
117 }