]>
Commit | Line | Data |
---|---|---|
3fc03845 PH |
1 | import itertools |
2 | import re | |
3 | ||
4 | from .common import SearchInfoExtractor | |
3fc03845 PH |
5 | |
6 | ||
7 | class GoogleSearchIE(SearchInfoExtractor): | |
ccf9114e | 8 | IE_DESC = 'Google Video search' |
ccf9114e | 9 | IE_NAME = 'video.google:search' |
3fc03845 | 10 | _SEARCH_KEY = 'gvsearch' |
c533c89c | 11 | _TESTS = [{ |
829476b8 PH |
12 | 'url': 'gvsearch15:python language', |
13 | 'info_dict': { | |
14 | 'id': 'python language', | |
15 | 'title': 'python language', | |
16 | }, | |
17 | 'playlist_count': 15, | |
c533c89c | 18 | }] |
19 | _PAGE_SIZE = 100 | |
3fc03845 | 20 | |
cc16383f | 21 | def _search_results(self, query): |
ccf9114e | 22 | for pagenum in itertools.count(): |
ccf9114e | 23 | webpage = self._download_webpage( |
c533c89c | 24 | 'http://www.google.com/search', f'gvsearch:{query}', |
25 | note=f'Downloading result page {pagenum + 1}', | |
f3517569 S |
26 | query={ |
27 | 'tbm': 'vid', | |
28 | 'q': query, | |
c533c89c | 29 | 'start': pagenum * self._PAGE_SIZE, |
30 | 'num': self._PAGE_SIZE, | |
f3517569 S |
31 | 'hl': 'en', |
32 | }) | |
ccf9114e | 33 | |
c533c89c | 34 | for url in re.findall(r'<div[^>]* class="dXiKIc"[^>]*><a href="([^"]+)"', webpage): |
35 | yield self.url_result(url) | |
ccf9114e | 36 | |
cc16383f | 37 | if not re.search(r'id="pnnext"', webpage): |
38 | return |