]>
Commit | Line | Data |
---|---|---|
c990bb36 | 1 | from .common import InfoExtractor |
62651c55 PH |
2 | from ..utils import ( |
3 | find_xpath_attr, | |
4 | int_or_none, | |
5 | js_to_json, | |
6 | unescapeHTML, | |
4ecc1fc6 | 7 | determine_ext, |
62651c55 | 8 | ) |
c990bb36 S |
9 | |
10 | ||
11 | class HowStuffWorksIE(InfoExtractor): | |
94a530c6 | 12 | _VALID_URL = r'https?://[\da-z-]+\.(?:howstuffworks|stuff(?:(?:youshould|theydontwantyouto)know|toblowyourmind|momnevertoldyou)|(?:brain|car)stuffshow|fwthinking|geniusstuff)\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm' |
c990bb36 S |
13 | _TESTS = [ |
14 | { | |
94a530c6 RA |
15 | 'url': 'http://www.stufftoblowyourmind.com/videos/optical-illusions-video.htm', |
16 | 'md5': '76646a5acc0c92bf7cd66751ca5db94d', | |
c990bb36 | 17 | 'info_dict': { |
94a530c6 | 18 | 'id': '855410', |
4ecc1fc6 | 19 | 'ext': 'mp4', |
94a530c6 RA |
20 | 'title': 'Your Trickster Brain: Optical Illusions -- Science on the Web', |
21 | 'description': 'md5:e374ff9561f6833ad076a8cc0a5ab2fb', | |
c990bb36 | 22 | }, |
c990bb36 | 23 | }, |
f354385b | 24 | { |
94a530c6 | 25 | 'url': 'http://shows.howstuffworks.com/more-shows/why-does-balloon-stick-to-hair-video.htm', |
c4fe07c7 | 26 | 'only_matching': True, |
f354385b | 27 | } |
c990bb36 S |
28 | ] |
29 | ||
30 | def _real_extract(self, url): | |
d958fa9f | 31 | display_id = self._match_id(url) |
c990bb36 | 32 | webpage = self._download_webpage(url, display_id) |
62651c55 PH |
33 | clip_js = self._search_regex( |
34 | r'(?s)var clip = ({.*?});', webpage, 'clip info') | |
35 | clip_info = self._parse_json( | |
36 | clip_js, display_id, transform_source=js_to_json) | |
e638e836 | 37 | |
62651c55 | 38 | video_id = clip_info['content_id'] |
c990bb36 | 39 | formats = [] |
62651c55 | 40 | m3u8_url = clip_info.get('m3u8') |
4ecc1fc6 | 41 | if m3u8_url and determine_ext(m3u8_url) == 'm3u8': |
42 | formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', format_id='hls', fatal=True)) | |
43 | flv_url = clip_info.get('flv_url') | |
44 | if flv_url: | |
45 | formats.append({ | |
46 | 'url': flv_url, | |
47 | 'format_id': 'flv', | |
48 | }) | |
62651c55 | 49 | for video in clip_info.get('mp4', []): |
d958fa9f TF |
50 | formats.append({ |
51 | 'url': video['src'], | |
4ecc1fc6 | 52 | 'format_id': 'mp4-%s' % video['bitrate'], |
53 | 'vbr': int_or_none(video['bitrate'].rstrip('k')), | |
d958fa9f | 54 | }) |
e638e836 TF |
55 | |
56 | if not formats: | |
57 | smil = self._download_xml( | |
58 | 'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id, | |
59 | video_id, 'Downloading video SMIL') | |
60 | ||
61 | http_base = find_xpath_attr( | |
62 | smil, | |
63 | './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'), | |
64 | 'name', | |
65 | 'httpBase').get('content') | |
66 | ||
67 | URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A' | |
68 | ||
69 | for video in smil.findall( | |
70 | './{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')): | |
62651c55 | 71 | vbr = int_or_none(video.attrib['system-bitrate'], scale=1000) |
e638e836 TF |
72 | formats.append({ |
73 | 'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX), | |
74 | 'format_id': '%dk' % vbr, | |
75 | 'vbr': vbr, | |
76 | }) | |
77 | ||
c990bb36 | 78 | return { |
62651c55 | 79 | 'id': '%s' % video_id, |
c990bb36 | 80 | 'display_id': display_id, |
62651c55 PH |
81 | 'title': unescapeHTML(clip_info['clip_title']), |
82 | 'description': unescapeHTML(clip_info.get('caption')), | |
83 | 'thumbnail': clip_info.get('video_still_url'), | |
4ecc1fc6 | 84 | 'duration': int_or_none(clip_info.get('duration')), |
c990bb36 S |
85 | 'formats': formats, |
86 | } |