]>
Commit | Line | Data |
---|---|---|
1 | from .common import InfoExtractor | |
2 | from ..utils import ( | |
3 | find_xpath_attr, | |
4 | int_or_none, | |
5 | js_to_json, | |
6 | unescapeHTML, | |
7 | determine_ext, | |
8 | ) | |
9 | ||
10 | ||
11 | class HowStuffWorksIE(InfoExtractor): | |
12 | _VALID_URL = r'https?://[\da-z-]+\.(?:howstuffworks|stuff(?:(?:youshould|theydontwantyouto)know|toblowyourmind|momnevertoldyou)|(?:brain|car)stuffshow|fwthinking|geniusstuff)\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm' | |
13 | _TESTS = [ | |
14 | { | |
15 | 'url': 'http://www.stufftoblowyourmind.com/videos/optical-illusions-video.htm', | |
16 | 'md5': '76646a5acc0c92bf7cd66751ca5db94d', | |
17 | 'info_dict': { | |
18 | 'id': '855410', | |
19 | 'ext': 'mp4', | |
20 | 'title': 'Your Trickster Brain: Optical Illusions -- Science on the Web', | |
21 | 'description': 'md5:e374ff9561f6833ad076a8cc0a5ab2fb', | |
22 | }, | |
23 | }, | |
24 | { | |
25 | 'url': 'http://shows.howstuffworks.com/more-shows/why-does-balloon-stick-to-hair-video.htm', | |
26 | 'only_matching': True, | |
27 | } | |
28 | ] | |
29 | ||
30 | def _real_extract(self, url): | |
31 | display_id = self._match_id(url) | |
32 | webpage = self._download_webpage(url, display_id) | |
33 | clip_js = self._search_regex( | |
34 | r'(?s)var clip = ({.*?});', webpage, 'clip info') | |
35 | clip_info = self._parse_json( | |
36 | clip_js, display_id, transform_source=js_to_json) | |
37 | ||
38 | video_id = clip_info['content_id'] | |
39 | formats = [] | |
40 | m3u8_url = clip_info.get('m3u8') | |
41 | if m3u8_url and determine_ext(m3u8_url) == 'm3u8': | |
42 | formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', format_id='hls', fatal=True)) | |
43 | flv_url = clip_info.get('flv_url') | |
44 | if flv_url: | |
45 | formats.append({ | |
46 | 'url': flv_url, | |
47 | 'format_id': 'flv', | |
48 | }) | |
49 | for video in clip_info.get('mp4', []): | |
50 | formats.append({ | |
51 | 'url': video['src'], | |
52 | 'format_id': 'mp4-%s' % video['bitrate'], | |
53 | 'vbr': int_or_none(video['bitrate'].rstrip('k')), | |
54 | }) | |
55 | ||
56 | if not formats: | |
57 | smil = self._download_xml( | |
58 | 'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id, | |
59 | video_id, 'Downloading video SMIL') | |
60 | ||
61 | http_base = find_xpath_attr( | |
62 | smil, | |
63 | './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'), | |
64 | 'name', | |
65 | 'httpBase').get('content') | |
66 | ||
67 | URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A' | |
68 | ||
69 | for video in smil.findall( | |
70 | './{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')): | |
71 | vbr = int_or_none(video.attrib['system-bitrate'], scale=1000) | |
72 | formats.append({ | |
73 | 'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX), | |
74 | 'format_id': '%dk' % vbr, | |
75 | 'vbr': vbr, | |
76 | }) | |
77 | ||
78 | return { | |
79 | 'id': '%s' % video_id, | |
80 | 'display_id': display_id, | |
81 | 'title': unescapeHTML(clip_info['clip_title']), | |
82 | 'description': unescapeHTML(clip_info.get('caption')), | |
83 | 'thumbnail': clip_info.get('video_still_url'), | |
84 | 'duration': int_or_none(clip_info.get('duration')), | |
85 | 'formats': formats, | |
86 | } |