]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/screencast.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / screencast.py
1 import urllib.request
2
3 from .common import InfoExtractor
4 from ..compat import compat_parse_qs
5 from ..utils import ExtractorError
6
7
8 class ScreencastIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P<id>[a-zA-Z0-9]+)'
10 _TESTS = [{
11 'url': 'http://www.screencast.com/t/3ZEjQXlT',
12 'md5': '917df1c13798a3e96211dd1561fded83',
13 'info_dict': {
14 'id': '3ZEjQXlT',
15 'ext': 'm4v',
16 'title': 'Color Measurement with Ocean Optics Spectrometers',
17 'description': 'md5:240369cde69d8bed61349a199c5fb153',
18 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
19 }
20 }, {
21 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI',
22 'md5': 'e8e4b375a7660a9e7e35c33973410d34',
23 'info_dict': {
24 'id': 'V2uXehPJa1ZI',
25 'ext': 'mov',
26 'title': 'The Amadeus Spectrometer',
27 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit',
28 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
29 }
30 }, {
31 'url': 'http://www.screencast.com/t/aAB3iowa',
32 'md5': 'dedb2734ed00c9755761ccaee88527cd',
33 'info_dict': {
34 'id': 'aAB3iowa',
35 'ext': 'mp4',
36 'title': 'Google Earth Export',
37 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.',
38 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
39 }
40 }, {
41 'url': 'http://www.screencast.com/t/X3ddTrYh',
42 'md5': '669ee55ff9c51988b4ebc0877cc8b159',
43 'info_dict': {
44 'id': 'X3ddTrYh',
45 'ext': 'wmv',
46 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression',
47 'description': 'md5:7b9f393bc92af02326a5c5889639eab0',
48 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$',
49 }
50 }, {
51 'url': 'http://screencast.com/t/aAB3iowa',
52 'only_matching': True,
53 }]
54
55 def _real_extract(self, url):
56 video_id = self._match_id(url)
57 webpage = self._download_webpage(url, video_id)
58
59 video_url = self._html_search_regex(
60 r'<embed name="Video".*?src="([^"]+)"', webpage,
61 'QuickTime embed', default=None)
62
63 if video_url is None:
64 flash_vars_s = self._html_search_regex(
65 r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars',
66 default=None)
67 if not flash_vars_s:
68 flash_vars_s = self._html_search_regex(
69 r'<param name="initParams" value="([^"]+)"', webpage, 'flash vars',
70 default=None)
71 if flash_vars_s:
72 flash_vars_s = flash_vars_s.replace(',', '&')
73 if flash_vars_s:
74 flash_vars = compat_parse_qs(flash_vars_s)
75 video_url_raw = urllib.request.quote(
76 flash_vars['content'][0])
77 video_url = video_url_raw.replace('http%3A', 'http:')
78
79 if video_url is None:
80 video_meta = self._html_search_meta(
81 'og:video', webpage, default=None)
82 if video_meta:
83 video_url = self._search_regex(
84 r'src=(.*?)(?:$|&)', video_meta,
85 'meta tag video URL', default=None)
86
87 if video_url is None:
88 video_url = self._html_search_regex(
89 r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1',
90 webpage, 'video url', default=None, group='url')
91
92 if video_url is None:
93 video_url = self._html_search_meta(
94 'og:video', webpage, default=None)
95
96 if video_url is None:
97 raise ExtractorError('Cannot find video')
98
99 title = self._og_search_title(webpage, default=None)
100 if title is None:
101 title = self._html_search_regex(
102 [r'<b>Title:</b> ([^<]+)</div>',
103 r'class="tabSeperator">></span><span class="tabText">(.+?)<',
104 r'<title>([^<]+)</title>'],
105 webpage, 'title')
106 thumbnail = self._og_search_thumbnail(webpage)
107 description = self._og_search_description(webpage, default=None)
108 if description is None:
109 description = self._html_search_meta('description', webpage)
110
111 return {
112 'id': video_id,
113 'url': video_url,
114 'title': title,
115 'description': description,
116 'thumbnail': thumbnail,
117 }