]>
Commit | Line | Data |
---|---|---|
f8286385 JMF |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
f8286385 JMF |
6 | |
7 | ||
8 | class AolIE(InfoExtractor): | |
9 | IE_NAME = 'on.aol.com' | |
de906ef5 PH |
10 | _VALID_URL = r'''(?x) |
11 | (?: | |
12 | aol-video:| | |
13 | http://on\.aol\.com/ | |
14 | (?: | |
15 | video/.*-| | |
16 | playlist/(?P<playlist_display_id>[^/?#]+?)-(?P<playlist_id>[0-9]+)[?#].*_videoid= | |
17 | ) | |
18 | ) | |
19 | (?P<id>[0-9]+) | |
20 | (?:$|\?) | |
21 | ''' | |
f8286385 | 22 | |
22a6f150 | 23 | _TESTS = [{ |
f8286385 JMF |
24 | 'url': 'http://on.aol.com/video/u-s--official-warns-of-largest-ever-irs-phone-scam-518167793?icid=OnHomepageC2Wide_MustSee_Img', |
25 | 'md5': '18ef68f48740e86ae94b98da815eec42', | |
26 | 'info_dict': { | |
27 | 'id': '518167793', | |
28 | 'ext': 'mp4', | |
29 | 'title': 'U.S. Official Warns Of \'Largest Ever\' IRS Phone Scam', | |
30 | }, | |
31 | 'add_ie': ['FiveMin'], | |
22a6f150 PH |
32 | }, { |
33 | 'url': 'http://on.aol.com/playlist/brace-yourself---todays-weirdest-news-152147?icid=OnHomepageC4_Omg_Img#_videoid=518184316', | |
34 | 'info_dict': { | |
35 | 'id': '152147', | |
36 | 'title': 'Brace Yourself - Today\'s Weirdest News', | |
37 | }, | |
38 | 'playlist_mincount': 10, | |
39 | }] | |
f8286385 JMF |
40 | |
41 | def _real_extract(self, url): | |
42 | mobj = re.match(self._VALID_URL, url) | |
43 | video_id = mobj.group('id') | |
de906ef5 | 44 | playlist_id = mobj.group('playlist_id') |
5e1912cf PH |
45 | if not playlist_id or self._downloader.params.get('noplaylist'): |
46 | return self.url_result('5min:%s' % video_id) | |
47 | ||
48 | self.to_screen('Downloading playlist %s - add --no-playlist to just download video %s' % (playlist_id, video_id)) | |
de906ef5 | 49 | |
5e1912cf PH |
50 | webpage = self._download_webpage(url, playlist_id) |
51 | title = self._html_search_regex( | |
52 | r'<h1 class="video-title[^"]*">(.+?)</h1>', webpage, 'title') | |
53 | playlist_html = self._search_regex( | |
54 | r"(?s)<ul\s+class='video-related[^']*'>(.*?)</ul>", webpage, | |
55 | 'playlist HTML') | |
56 | entries = [{ | |
57 | '_type': 'url', | |
58 | 'url': 'aol-video:%s' % m.group('id'), | |
59 | 'ie_key': 'Aol', | |
60 | } for m in re.finditer( | |
61 | r"<a\s+href='.*videoid=(?P<id>[0-9]+)'\s+class='video-thumb'>", | |
62 | playlist_html)] | |
de906ef5 | 63 | |
5e1912cf PH |
64 | return { |
65 | '_type': 'playlist', | |
66 | 'id': playlist_id, | |
67 | 'display_id': mobj.group('playlist_display_id'), | |
68 | 'title': title, | |
69 | 'entries': entries, | |
70 | } |