]>
Commit | Line | Data |
---|---|---|
3fc03845 PH |
1 | import itertools |
2 | import re | |
3 | ||
4 | from .common import SearchInfoExtractor | |
5 | from ..utils import ( | |
6 | compat_urllib_parse, | |
7 | ) | |
8 | ||
9 | ||
10 | class GoogleSearchIE(SearchInfoExtractor): | |
0f818663 | 11 | IE_DESC = u'Google Video search' |
3fc03845 PH |
12 | _MORE_PAGES_INDICATOR = r'id="pnnext" class="pn"' |
13 | _MAX_RESULTS = 1000 | |
14 | IE_NAME = u'video.google:search' | |
15 | _SEARCH_KEY = 'gvsearch' | |
16 | ||
17 | def _get_n_results(self, query, n): | |
18 | """Get a specified number of results for a query""" | |
19 | ||
20 | res = { | |
21 | '_type': 'playlist', | |
22 | 'id': query, | |
23 | 'entries': [] | |
24 | } | |
25 | ||
26 | for pagenum in itertools.count(1): | |
27 | result_url = u'http://www.google.com/search?tbm=vid&q=%s&start=%s&hl=en' % (compat_urllib_parse.quote_plus(query), pagenum*10) | |
28 | webpage = self._download_webpage(result_url, u'gvsearch:' + query, | |
29 | note='Downloading result page ' + str(pagenum)) | |
30 | ||
31 | for mobj in re.finditer(r'<h3 class="r"><a href="([^"]+)"', webpage): | |
32 | e = { | |
33 | '_type': 'url', | |
34 | 'url': mobj.group(1) | |
35 | } | |
36 | res['entries'].append(e) | |
37 | ||
38 | if (pagenum * 10 > n) or not re.search(self._MORE_PAGES_INDICATOR, webpage): | |
39 | return res |