]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/googlesearch.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / googlesearch.py
CommitLineData
3fc03845
PH
1import itertools
2import re
3
4from .common import SearchInfoExtractor
3fc03845
PH
5
6
7class GoogleSearchIE(SearchInfoExtractor):
ccf9114e 8 IE_DESC = 'Google Video search'
ccf9114e 9 IE_NAME = 'video.google:search'
3fc03845 10 _SEARCH_KEY = 'gvsearch'
c533c89c 11 _TESTS = [{
829476b8
PH
12 'url': 'gvsearch15:python language',
13 'info_dict': {
14 'id': 'python language',
15 'title': 'python language',
16 },
17 'playlist_count': 15,
c533c89c 18 }]
19 _PAGE_SIZE = 100
3fc03845 20
cc16383f 21 def _search_results(self, query):
ccf9114e 22 for pagenum in itertools.count():
ccf9114e 23 webpage = self._download_webpage(
c533c89c 24 'http://www.google.com/search', f'gvsearch:{query}',
25 note=f'Downloading result page {pagenum + 1}',
f3517569
S
26 query={
27 'tbm': 'vid',
28 'q': query,
c533c89c 29 'start': pagenum * self._PAGE_SIZE,
30 'num': self._PAGE_SIZE,
f3517569
S
31 'hl': 'en',
32 })
ccf9114e 33
c533c89c 34 for url in re.findall(r'<div[^>]* class="dXiKIc"[^>]*><a href="([^"]+)"', webpage):
35 yield self.url_result(url)
ccf9114e 36
cc16383f 37 if not re.search(r'id="pnnext"', webpage):
38 return