]>
Commit | Line | Data |
---|---|---|
934858ad | 1 | import itertools |
d6039175 PH |
2 | import json |
3 | import re | |
4 | ||
934858ad | 5 | from .common import InfoExtractor, SearchInfoExtractor |
d6039175 | 6 | from ..utils import ( |
934858ad | 7 | compat_urllib_parse, |
9c15e9de JMF |
8 | compat_urlparse, |
9 | determine_ext, | |
10 | clean_html, | |
d6039175 PH |
11 | ) |
12 | ||
9c15e9de | 13 | |
d6039175 | 14 | class YahooIE(InfoExtractor): |
0f818663 | 15 | IE_DESC = u'Yahoo screen' |
d6039175 | 16 | _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html' |
9c15e9de JMF |
17 | _TESTS = [ |
18 | { | |
19 | u'url': u'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', | |
17ad2b3f | 20 | u'file': u'214727115.flv', |
9c15e9de JMF |
21 | u'info_dict': { |
22 | u'title': u'Julian Smith & Travis Legg Watch Julian Smith', | |
23 | u'description': u'Julian and Travis watch Julian Smith', | |
24 | }, | |
466880f5 PH |
25 | u'params': { |
26 | # Requires rtmpdump | |
27 | u'skip_download': True, | |
28 | }, | |
9c15e9de JMF |
29 | }, |
30 | { | |
31 | u'url': u'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', | |
32 | u'file': u'103000935.flv', | |
33 | u'info_dict': { | |
5e2a60db | 34 | u'title': u'Codefellas - The Cougar Lies with Spanish Moss', |
9c15e9de JMF |
35 | u'description': u'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?', |
36 | }, | |
37 | u'params': { | |
38 | # Requires rtmpdump | |
39 | u'skip_download': True, | |
40 | }, | |
6f5ac90c | 41 | }, |
9c15e9de | 42 | ] |
d6039175 PH |
43 | |
44 | def _real_extract(self, url): | |
45 | mobj = re.match(self._VALID_URL, url) | |
d6039175 PH |
46 | video_id = mobj.group('id') |
47 | webpage = self._download_webpage(url, video_id) | |
d6039175 | 48 | |
9c15e9de JMF |
49 | items_json = self._search_regex(r'YVIDEO_INIT_ITEMS = ({.*?});$', |
50 | webpage, u'items', flags=re.MULTILINE) | |
51 | items = json.loads(items_json) | |
52 | info = items['mediaItems']['query']['results']['mediaObj'][0] | |
53 | meta = info['meta'] | |
54 | ||
55 | formats = [] | |
56 | for s in info['streams']: | |
57 | format_info = { | |
58 | 'width': s.get('width'), | |
59 | 'height': s.get('height'), | |
60 | 'bitrate': s.get('bitrate'), | |
61 | } | |
62 | ||
63 | host = s['host'] | |
64 | path = s['path'] | |
65 | if host.startswith('rtmp'): | |
66 | format_info.update({ | |
67 | 'url': host, | |
68 | 'play_path': path, | |
69 | 'ext': 'flv', | |
70 | }) | |
71 | else: | |
72 | format_url = compat_urlparse.urljoin(host, path) | |
73 | format_info['url'] = format_url | |
74 | format_info['ext'] = determine_ext(format_url) | |
75 | ||
76 | formats.append(format_info) | |
77 | formats = sorted(formats, key=lambda f:(f['height'], f['width'])) | |
78 | ||
79 | info = { | |
80 | 'id': video_id, | |
81 | 'title': meta['title'], | |
82 | 'formats': formats, | |
83 | 'description': clean_html(meta['description']), | |
84 | 'thumbnail': meta['thumbnail'], | |
85 | } | |
86 | # TODO: Remove when #980 has been merged | |
87 | info.update(formats[-1]) | |
d6039175 | 88 | |
9c15e9de | 89 | return info |
d6039175 | 90 | |
934858ad PH |
91 | |
92 | class YahooSearchIE(SearchInfoExtractor): | |
0f818663 | 93 | IE_DESC = u'Yahoo screen search' |
934858ad PH |
94 | _MAX_RESULTS = 1000 |
95 | IE_NAME = u'screen.yahoo:search' | |
96 | _SEARCH_KEY = 'yvsearch' | |
97 | ||
98 | def _get_n_results(self, query, n): | |
99 | """Get a specified number of results for a query""" | |
100 | ||
101 | res = { | |
102 | '_type': 'playlist', | |
103 | 'id': query, | |
104 | 'entries': [] | |
105 | } | |
106 | for pagenum in itertools.count(0): | |
107 | result_url = u'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) | |
108 | webpage = self._download_webpage(result_url, query, | |
109 | note='Downloading results page '+str(pagenum+1)) | |
110 | info = json.loads(webpage) | |
111 | m = info[u'm'] | |
112 | results = info[u'results'] | |
113 | ||
114 | for (i, r) in enumerate(results): | |
115 | if (pagenum * 30) +i >= n: | |
116 | break | |
117 | mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r) | |
118 | e = self.url_result('http://' + mobj.group('url'), 'Yahoo') | |
119 | res['entries'].append(e) | |
120 | if (pagenum * 30 +i >= n) or (m[u'last'] >= (m[u'total'] -1 )): | |
121 | break | |
122 | ||
123 | return res |