]>
Commit | Line | Data |
---|---|---|
ccf9114e PH |
1 | from __future__ import unicode_literals |
2 | ||
3fc03845 PH |
3 | import itertools |
4 | import re | |
5 | ||
6 | from .common import SearchInfoExtractor | |
7 | from ..utils import ( | |
8 | compat_urllib_parse, | |
9 | ) | |
10 | ||
11 | ||
12 | class GoogleSearchIE(SearchInfoExtractor): | |
ccf9114e | 13 | IE_DESC = 'Google Video search' |
3fc03845 | 14 | _MAX_RESULTS = 1000 |
ccf9114e | 15 | IE_NAME = 'video.google:search' |
3fc03845 PH |
16 | _SEARCH_KEY = 'gvsearch' |
17 | ||
18 | def _get_n_results(self, query, n): | |
19 | """Get a specified number of results for a query""" | |
20 | ||
ccf9114e | 21 | entries = [] |
3fc03845 PH |
22 | res = { |
23 | '_type': 'playlist', | |
24 | 'id': query, | |
ccf9114e | 25 | 'title': query, |
3fc03845 PH |
26 | } |
27 | ||
ccf9114e PH |
28 | for pagenum in itertools.count(): |
29 | result_url = ( | |
30 | 'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' | |
31 | % (compat_urllib_parse.quote_plus(query), pagenum * 10)) | |
32 | ||
33 | webpage = self._download_webpage( | |
34 | result_url, 'gvsearch:' + query, | |
35 | note='Downloading result page ' + str(pagenum + 1)) | |
36 | ||
37 | for hit_idx, mobj in enumerate(re.finditer( | |
38 | r'<h3 class="r"><a href="([^"]+)"', webpage)): | |
39 | ||
40 | # Skip playlists | |
41 | if not re.search(r'id="vidthumb%d"' % (hit_idx + 1), webpage): | |
42 | continue | |
3fc03845 | 43 | |
ccf9114e | 44 | entries.append({ |
3fc03845 PH |
45 | '_type': 'url', |
46 | 'url': mobj.group(1) | |
ccf9114e | 47 | }) |
3fc03845 | 48 | |
c3d36f13 | 49 | if (len(entries) >= n) or not re.search(r'id="pnnext"', webpage): |
ccf9114e | 50 | res['entries'] = entries[:n] |
3fc03845 | 51 | return res |