]>
Commit | Line | Data |
---|---|---|
c990bb36 S |
1 | from __future__ import unicode_literals |
2 | ||
c990bb36 | 3 | from .common import InfoExtractor |
62651c55 PH |
4 | from ..utils import ( |
5 | find_xpath_attr, | |
6 | int_or_none, | |
7 | js_to_json, | |
8 | unescapeHTML, | |
4ecc1fc6 | 9 | determine_ext, |
62651c55 | 10 | ) |
c990bb36 S |
11 | |
12 | ||
13 | class HowStuffWorksIE(InfoExtractor): | |
1186e3f9 | 14 | _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm' |
c990bb36 S |
15 | _TESTS = [ |
16 | { | |
e638e836 | 17 | 'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm', |
c990bb36 | 18 | 'info_dict': { |
e638e836 TF |
19 | 'id': '450221', |
20 | 'ext': 'flv', | |
21 | 'title': 'Cool Jobs - Iditarod Musher', | |
22 | 'description': 'Cold sleds, freezing temps and warm dog breath... an Iditarod musher\'s dream. Kasey-Dee Gardner jumps on a sled to find out what the big deal is.', | |
62651c55 | 23 | 'display_id': 'cool-jobs-iditarod-musher', |
ec85ded8 | 24 | 'thumbnail': r're:^https?://.*\.jpg$', |
62651c55 | 25 | 'duration': 161, |
c990bb36 | 26 | }, |
8865b8ab | 27 | 'skip': 'Video broken', |
c990bb36 S |
28 | }, |
29 | { | |
f4cf848d | 30 | 'url': 'http://adventure.howstuffworks.com/7199-survival-zone-food-and-water-in-the-savanna-video.htm', |
c990bb36 | 31 | 'info_dict': { |
f4cf848d | 32 | 'id': '453464', |
c990bb36 | 33 | 'ext': 'mp4', |
f4cf848d | 34 | 'title': 'Survival Zone: Food and Water In the Savanna', |
d958fa9f | 35 | 'description': 'Learn how to find both food and water while trekking in the African savannah. In this video from the Discovery Channel.', |
62651c55 | 36 | 'display_id': 'survival-zone-food-and-water-in-the-savanna', |
ec85ded8 | 37 | 'thumbnail': r're:^https?://.*\.jpg$', |
c990bb36 | 38 | }, |
c990bb36 S |
39 | }, |
40 | { | |
e638e836 | 41 | 'url': 'http://entertainment.howstuffworks.com/arts/2706-sword-swallowing-1-by-dan-meyer-video.htm', |
c990bb36 | 42 | 'info_dict': { |
e638e836 | 43 | 'id': '440011', |
4ecc1fc6 | 44 | 'ext': 'mp4', |
e638e836 TF |
45 | 'title': 'Sword Swallowing #1 by Dan Meyer', |
46 | 'description': 'Video footage (1 of 3) used by permission of the owner Dan Meyer through Sword Swallowers Association International <www.swordswallow.org>', | |
62651c55 | 47 | 'display_id': 'sword-swallowing-1-by-dan-meyer', |
ec85ded8 | 48 | 'thumbnail': r're:^https?://.*\.jpg$', |
c990bb36 | 49 | }, |
c990bb36 | 50 | }, |
f354385b | 51 | { |
52 | 'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm', | |
c4fe07c7 | 53 | 'only_matching': True, |
f354385b | 54 | } |
c990bb36 S |
55 | ] |
56 | ||
57 | def _real_extract(self, url): | |
d958fa9f | 58 | display_id = self._match_id(url) |
c990bb36 | 59 | webpage = self._download_webpage(url, display_id) |
62651c55 PH |
60 | clip_js = self._search_regex( |
61 | r'(?s)var clip = ({.*?});', webpage, 'clip info') | |
62 | clip_info = self._parse_json( | |
63 | clip_js, display_id, transform_source=js_to_json) | |
e638e836 | 64 | |
62651c55 | 65 | video_id = clip_info['content_id'] |
c990bb36 | 66 | formats = [] |
62651c55 | 67 | m3u8_url = clip_info.get('m3u8') |
4ecc1fc6 | 68 | if m3u8_url and determine_ext(m3u8_url) == 'm3u8': |
69 | formats.extend(self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', format_id='hls', fatal=True)) | |
70 | flv_url = clip_info.get('flv_url') | |
71 | if flv_url: | |
72 | formats.append({ | |
73 | 'url': flv_url, | |
74 | 'format_id': 'flv', | |
75 | }) | |
62651c55 | 76 | for video in clip_info.get('mp4', []): |
d958fa9f TF |
77 | formats.append({ |
78 | 'url': video['src'], | |
4ecc1fc6 | 79 | 'format_id': 'mp4-%s' % video['bitrate'], |
80 | 'vbr': int_or_none(video['bitrate'].rstrip('k')), | |
d958fa9f | 81 | }) |
e638e836 TF |
82 | |
83 | if not formats: | |
84 | smil = self._download_xml( | |
85 | 'http://services.media.howstuffworks.com/videos/%s/smil-service.smil' % video_id, | |
86 | video_id, 'Downloading video SMIL') | |
87 | ||
88 | http_base = find_xpath_attr( | |
89 | smil, | |
90 | './{0}head/{0}meta'.format('{http://www.w3.org/2001/SMIL20/Language}'), | |
91 | 'name', | |
92 | 'httpBase').get('content') | |
93 | ||
94 | URL_SUFFIX = '?v=2.11.3&fp=LNX 11,2,202,356&r=A&g=A' | |
95 | ||
96 | for video in smil.findall( | |
97 | './{0}body/{0}switch/{0}video'.format('{http://www.w3.org/2001/SMIL20/Language}')): | |
62651c55 | 98 | vbr = int_or_none(video.attrib['system-bitrate'], scale=1000) |
e638e836 TF |
99 | formats.append({ |
100 | 'url': '%s/%s%s' % (http_base, video.attrib['src'], URL_SUFFIX), | |
101 | 'format_id': '%dk' % vbr, | |
102 | 'vbr': vbr, | |
103 | }) | |
104 | ||
c990bb36 S |
105 | self._sort_formats(formats) |
106 | ||
c990bb36 | 107 | return { |
62651c55 | 108 | 'id': '%s' % video_id, |
c990bb36 | 109 | 'display_id': display_id, |
62651c55 PH |
110 | 'title': unescapeHTML(clip_info['clip_title']), |
111 | 'description': unescapeHTML(clip_info.get('caption')), | |
112 | 'thumbnail': clip_info.get('video_still_url'), | |
4ecc1fc6 | 113 | 'duration': int_or_none(clip_info.get('duration')), |
c990bb36 S |
114 | 'formats': formats, |
115 | } |