]>
Commit | Line | Data |
---|---|---|
ccf9114e PH |
1 | from __future__ import unicode_literals |
2 | ||
3fc03845 PH |
3 | import itertools |
4 | import re | |
5 | ||
6 | from .common import SearchInfoExtractor | |
1cc79574 | 7 | from ..compat import ( |
3fc03845 PH |
8 | compat_urllib_parse, |
9 | ) | |
10 | ||
11 | ||
12 | class GoogleSearchIE(SearchInfoExtractor): | |
ccf9114e | 13 | IE_DESC = 'Google Video search' |
3fc03845 | 14 | _MAX_RESULTS = 1000 |
ccf9114e | 15 | IE_NAME = 'video.google:search' |
3fc03845 | 16 | _SEARCH_KEY = 'gvsearch' |
829476b8 PH |
17 | _TEST = { |
18 | 'url': 'gvsearch15:python language', | |
19 | 'info_dict': { | |
20 | 'id': 'python language', | |
21 | 'title': 'python language', | |
22 | }, | |
23 | 'playlist_count': 15, | |
24 | } | |
3fc03845 PH |
25 | |
26 | def _get_n_results(self, query, n): | |
27 | """Get a specified number of results for a query""" | |
28 | ||
ccf9114e | 29 | entries = [] |
3fc03845 PH |
30 | res = { |
31 | '_type': 'playlist', | |
32 | 'id': query, | |
ccf9114e | 33 | 'title': query, |
3fc03845 PH |
34 | } |
35 | ||
ccf9114e PH |
36 | for pagenum in itertools.count(): |
37 | result_url = ( | |
38 | 'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' | |
39 | % (compat_urllib_parse.quote_plus(query), pagenum * 10)) | |
40 | ||
41 | webpage = self._download_webpage( | |
42 | result_url, 'gvsearch:' + query, | |
43 | note='Downloading result page ' + str(pagenum + 1)) | |
44 | ||
45 | for hit_idx, mobj in enumerate(re.finditer( | |
46 | r'<h3 class="r"><a href="([^"]+)"', webpage)): | |
47 | ||
48 | # Skip playlists | |
49 | if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage): | |
50 | continue | |
3fc03845 | 51 | |
ccf9114e | 52 | entries.append({ |
3fc03845 PH |
53 | '_type': 'url', |
54 | 'url': mobj.group(1) | |
ccf9114e | 55 | }) |
3fc03845 | 56 | |
c3d36f13 | 57 | if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage): |
ccf9114e | 58 | res['entries'] = entries[:n] |
3fc03845 | 59 | return res |