]>
Commit | Line | Data |
---|---|---|
2317e6b2 AS |
1 | from __future__ import unicode_literals |
2 | ||
934858ad | 3 | import itertools |
15251481 | 4 | import json |
d6039175 PH |
5 | import re |
6 | ||
934858ad | 7 | from .common import InfoExtractor, SearchInfoExtractor |
d6039175 | 8 | from ..utils import ( |
934858ad | 9 | compat_urllib_parse, |
9c15e9de | 10 | compat_urlparse, |
9c15e9de | 11 | clean_html, |
7217e148 | 12 | int_or_none, |
d6039175 PH |
13 | ) |
14 | ||
9c15e9de | 15 | |
d6039175 | 16 | class YahooIE(InfoExtractor): |
2317e6b2 | 17 | IE_DESC = 'Yahoo screen' |
d6039175 | 18 | _VALID_URL = r'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html' |
9c15e9de JMF |
19 | _TESTS = [ |
20 | { | |
2317e6b2 | 21 | 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', |
2317e6b2 AS |
22 | 'md5': '4962b075c08be8690a922ee026d05e69', |
23 | 'info_dict': { | |
88f1c6de JMF |
24 | 'id': '214727115', |
25 | 'ext': 'mp4', | |
2317e6b2 AS |
26 | 'title': 'Julian Smith & Travis Legg Watch Julian Smith', |
27 | 'description': 'Julian and Travis watch Julian Smith', | |
9c15e9de JMF |
28 | }, |
29 | }, | |
30 | { | |
2317e6b2 | 31 | 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', |
2317e6b2 AS |
32 | 'md5': 'd6e6fc6e1313c608f316ddad7b82b306', |
33 | 'info_dict': { | |
88f1c6de JMF |
34 | 'id': '103000935', |
35 | 'ext': 'mp4', | |
2317e6b2 AS |
36 | 'title': 'Codefellas - The Cougar Lies with Spanish Moss', |
37 | 'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?', | |
9c15e9de | 38 | }, |
6f5ac90c | 39 | }, |
9c15e9de | 40 | ] |
d6039175 PH |
41 | |
42 | def _real_extract(self, url): | |
43 | mobj = re.match(self._VALID_URL, url) | |
d6039175 PH |
44 | video_id = mobj.group('id') |
45 | webpage = self._download_webpage(url, video_id) | |
d6039175 | 46 | |
35907e23 | 47 | items_json = self._search_regex(r'mediaItems: ({.*?})$', |
2317e6b2 | 48 | webpage, 'items', flags=re.MULTILINE) |
9c15e9de JMF |
49 | items = json.loads(items_json) |
50 | info = items['mediaItems']['query']['results']['mediaObj'][0] | |
cb354c8f JMF |
51 | # The 'meta' field is not always in the video webpage, we request it |
52 | # from another page | |
53 | long_id = info['id'] | |
a1ef7e85 | 54 | return self._get_info(long_id, video_id) |
befd88b7 JMF |
55 | |
56 | def _get_info(self, long_id, video_id): | |
cb354c8f | 57 | query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' |
c5171c45 JMF |
58 | ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"' |
59 | ' AND protocol="http"' % long_id) | |
cb354c8f JMF |
60 | data = compat_urllib_parse.urlencode({ |
61 | 'q': query, | |
62 | 'env': 'prod', | |
63 | 'format': 'json', | |
64 | }) | |
88f1c6de | 65 | query_result = self._download_json( |
cb354c8f | 66 | 'http://video.query.yahoo.com/v1/public/yql?' + data, |
2317e6b2 | 67 | video_id, 'Downloading video info') |
cb354c8f | 68 | info = query_result['query']['results']['mediaObj'][0] |
9c15e9de JMF |
69 | meta = info['meta'] |
70 | ||
71 | formats = [] | |
72 | for s in info['streams']: | |
73 | format_info = { | |
7217e148 PH |
74 | 'width': int_or_none(s.get('width')), |
75 | 'height': int_or_none(s.get('height')), | |
76 | 'tbr': int_or_none(s.get('bitrate')), | |
9c15e9de JMF |
77 | } |
78 | ||
79 | host = s['host'] | |
80 | path = s['path'] | |
81 | if host.startswith('rtmp'): | |
82 | format_info.update({ | |
83 | 'url': host, | |
84 | 'play_path': path, | |
85 | 'ext': 'flv', | |
86 | }) | |
87 | else: | |
88 | format_url = compat_urlparse.urljoin(host, path) | |
89 | format_info['url'] = format_url | |
9c15e9de | 90 | formats.append(format_info) |
7217e148 PH |
91 | |
92 | self._sort_formats(formats) | |
9c15e9de | 93 | |
35907e23 | 94 | return { |
9c15e9de JMF |
95 | 'id': video_id, |
96 | 'title': meta['title'], | |
97 | 'formats': formats, | |
98 | 'description': clean_html(meta['description']), | |
99 | 'thumbnail': meta['thumbnail'], | |
100 | } | |
d6039175 | 101 | |
934858ad | 102 | |
befd88b7 JMF |
103 | class YahooNewsIE(YahooIE): |
104 | IE_NAME = 'yahoo:news' | |
105 | _VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html' | |
106 | ||
107 | _TEST = { | |
2317e6b2 AS |
108 | 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', |
109 | 'md5': '67010fdf3a08d290e060a4dd96baa07b', | |
110 | 'info_dict': { | |
111 | 'id': '104538833', | |
112 | 'ext': 'mp4', | |
113 | 'title': 'China Moses Is Crazy About the Blues', | |
114 | 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0', | |
befd88b7 | 115 | }, |
befd88b7 JMF |
116 | } |
117 | ||
118 | # Overwrite YahooIE properties we don't want | |
119 | _TESTS = [] | |
120 | ||
121 | def _real_extract(self, url): | |
122 | mobj = re.match(self._VALID_URL, url) | |
123 | video_id = mobj.group('id') | |
124 | webpage = self._download_webpage(url, video_id) | |
2317e6b2 | 125 | long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, 'long id') |
befd88b7 JMF |
126 | return self._get_info(long_id, video_id) |
127 | ||
128 | ||
934858ad | 129 | class YahooSearchIE(SearchInfoExtractor): |
2317e6b2 | 130 | IE_DESC = 'Yahoo screen search' |
934858ad | 131 | _MAX_RESULTS = 1000 |
2317e6b2 | 132 | IE_NAME = 'screen.yahoo:search' |
934858ad PH |
133 | _SEARCH_KEY = 'yvsearch' |
134 | ||
135 | def _get_n_results(self, query, n): | |
136 | """Get a specified number of results for a query""" | |
88f1c6de JMF |
137 | entries = [] |
138 | for pagenum in itertools.count(0): | |
2317e6b2 | 139 | result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) |
88f1c6de JMF |
140 | info = self._download_json(result_url, query, |
141 | note='Downloading results page '+str(pagenum+1)) | |
2317e6b2 AS |
142 | m = info['m'] |
143 | results = info['results'] | |
934858ad PH |
144 | |
145 | for (i, r) in enumerate(results): | |
88f1c6de | 146 | if (pagenum * 30) + i >= n: |
934858ad PH |
147 | break |
148 | mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r) | |
149 | e = self.url_result('http://' + mobj.group('url'), 'Yahoo') | |
88f1c6de JMF |
150 | entries.append(e) |
151 | if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)): | |
934858ad PH |
152 | break |
153 | ||
88f1c6de JMF |
154 | return { |
155 | '_type': 'playlist', | |
156 | 'id': query, | |
157 | 'entries': entries, | |
158 | } |