]>
Commit | Line | Data |
---|---|---|
ccf9114e PH |
1 | from __future__ import unicode_literals |
2 | ||
3fc03845 PH |
3 | import itertools |
4 | import re | |
5 | ||
6 | from .common import SearchInfoExtractor | |
3fc03845 PH |
7 | |
8 | ||
9 | class GoogleSearchIE(SearchInfoExtractor): | |
ccf9114e | 10 | IE_DESC = 'Google Video search' |
3fc03845 | 11 | _MAX_RESULTS = 1000 |
ccf9114e | 12 | IE_NAME = 'video.google:search' |
3fc03845 | 13 | _SEARCH_KEY = 'gvsearch' |
829476b8 PH |
14 | _TEST = { |
15 | 'url': 'gvsearch15:python language', | |
16 | 'info_dict': { | |
17 | 'id': 'python language', | |
18 | 'title': 'python language', | |
19 | }, | |
20 | 'playlist_count': 15, | |
21 | } | |
3fc03845 PH |
22 | |
23 | def _get_n_results(self, query, n): | |
24 | """Get a specified number of results for a query""" | |
25 | ||
ccf9114e | 26 | entries = [] |
3fc03845 PH |
27 | res = { |
28 | '_type': 'playlist', | |
29 | 'id': query, | |
ccf9114e | 30 | 'title': query, |
3fc03845 PH |
31 | } |
32 | ||
ccf9114e | 33 | for pagenum in itertools.count(): |
ccf9114e | 34 | webpage = self._download_webpage( |
f3517569 S |
35 | 'http://www.google.com/search', |
36 | 'gvsearch:' + query, | |
37 | note='Downloading result page %s' % (pagenum + 1), | |
38 | query={ | |
39 | 'tbm': 'vid', | |
40 | 'q': query, | |
41 | 'start': pagenum * 10, | |
42 | 'hl': 'en', | |
43 | }) | |
ccf9114e PH |
44 | |
45 | for hit_idx, mobj in enumerate(re.finditer( | |
46 | r'<h3 class="r"><a href="([^"]+)"', webpage)): | |
47 | ||
48 | # Skip playlists | |
49 | if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage): | |
50 | continue | |
3fc03845 | 51 | |
ccf9114e | 52 | entries.append({ |
3fc03845 PH |
53 | '_type': 'url', |
54 | 'url': mobj.group(1) | |
ccf9114e | 55 | }) |
3fc03845 | 56 | |
c3d36f13 | 57 | if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage): |
ccf9114e | 58 | res['entries'] = entries[:n] |
3fc03845 | 59 | return res |