]>
Commit | Line | Data |
---|---|---|
2317e6b2 AS |
1 | from __future__ import unicode_literals |
2 | ||
934858ad | 3 | import itertools |
15251481 | 4 | import json |
d6039175 PH |
5 | import re |
6 | ||
934858ad | 7 | from .common import InfoExtractor, SearchInfoExtractor |
d6039175 | 8 | from ..utils import ( |
934858ad | 9 | compat_urllib_parse, |
9c15e9de | 10 | compat_urlparse, |
9c15e9de | 11 | clean_html, |
7217e148 | 12 | int_or_none, |
d6039175 PH |
13 | ) |
14 | ||
9c15e9de | 15 | |
d6039175 | 16 | class YahooIE(InfoExtractor): |
a4eb9578 | 17 | IE_DESC = 'Yahoo screen and movies' |
49807b4a | 18 | _VALID_URL = r'(?P<url>https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)' |
9c15e9de JMF |
19 | _TESTS = [ |
20 | { | |
2317e6b2 | 21 | 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', |
2317e6b2 AS |
22 | 'md5': '4962b075c08be8690a922ee026d05e69', |
23 | 'info_dict': { | |
fb6a5b96 | 24 | 'id': '2d25e626-2378-391f-ada0-ddaf1417e588', |
88f1c6de | 25 | 'ext': 'mp4', |
2317e6b2 AS |
26 | 'title': 'Julian Smith & Travis Legg Watch Julian Smith', |
27 | 'description': 'Julian and Travis watch Julian Smith', | |
9c15e9de JMF |
28 | }, |
29 | }, | |
30 | { | |
2317e6b2 | 31 | 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', |
2317e6b2 AS |
32 | 'md5': 'd6e6fc6e1313c608f316ddad7b82b306', |
33 | 'info_dict': { | |
fb6a5b96 | 34 | 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9', |
88f1c6de | 35 | 'ext': 'mp4', |
2317e6b2 AS |
36 | 'title': 'Codefellas - The Cougar Lies with Spanish Moss', |
37 | 'description': 'Agent Topple\'s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?', | |
9c15e9de | 38 | }, |
6f5ac90c | 39 | }, |
a4eb9578 PH |
40 | { |
41 | 'url': 'https://movies.yahoo.com/video/world-loves-spider-man-190819223.html', | |
42 | 'md5': '410b7104aa9893b765bc22787a22f3d9', | |
43 | 'info_dict': { | |
44 | 'id': '516ed8e2-2c4f-339f-a211-7a8b49d30845', | |
45 | 'ext': 'mp4', | |
46 | 'title': 'The World Loves Spider-Man', | |
47 | 'description': '''People all over the world are celebrating the release of \"The Amazing Spider-Man 2.\" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''', | |
48 | } | |
49807b4a S |
49 | }, |
50 | { | |
51 | 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', | |
52 | 'md5': '60e8ac193d8fb71997caa8fce54c6460', | |
53 | 'info_dict': { | |
54 | 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', | |
55 | 'ext': 'mp4', | |
56 | 'title': "Yahoo Saves 'Community'", | |
57 | 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053', | |
58 | } | |
59 | }, | |
9c15e9de | 60 | ] |
d6039175 PH |
61 | |
62 | def _real_extract(self, url): | |
63 | mobj = re.match(self._VALID_URL, url) | |
d6039175 | 64 | video_id = mobj.group('id') |
49807b4a | 65 | url = mobj.group('url') |
d6039175 | 66 | webpage = self._download_webpage(url, video_id) |
d6039175 | 67 | |
a4eb9578 PH |
68 | items_json = self._search_regex( |
69 | r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, | |
70 | default=None) | |
71 | if items_json is None: | |
fb6a5b96 | 72 | CONTENT_ID_REGEXES = [ |
a4eb9578 | 73 | r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', |
071a236c S |
74 | r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', |
75 | r'"first_videoid"\s*:\s*"([^"]+)"', | |
fb6a5b96 S |
76 | ] |
77 | long_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID') | |
a4eb9578 PH |
78 | video_id = long_id |
79 | else: | |
80 | items = json.loads(items_json) | |
81 | info = items['mediaItems']['query']['results']['mediaObj'][0] | |
82 | # The 'meta' field is not always in the video webpage, we request it | |
83 | # from another page | |
84 | long_id = info['id'] | |
dede691a | 85 | return self._get_info(long_id, video_id, webpage) |
befd88b7 | 86 | |
dede691a | 87 | def _get_info(self, long_id, video_id, webpage): |
cb354c8f | 88 | query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' |
c5171c45 JMF |
89 | ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"' |
90 | ' AND protocol="http"' % long_id) | |
cb354c8f JMF |
91 | data = compat_urllib_parse.urlencode({ |
92 | 'q': query, | |
93 | 'env': 'prod', | |
94 | 'format': 'json', | |
95 | }) | |
88f1c6de | 96 | query_result = self._download_json( |
cb354c8f | 97 | 'http://video.query.yahoo.com/v1/public/yql?' + data, |
2317e6b2 | 98 | video_id, 'Downloading video info') |
cb354c8f | 99 | info = query_result['query']['results']['mediaObj'][0] |
9c15e9de JMF |
100 | meta = info['meta'] |
101 | ||
102 | formats = [] | |
103 | for s in info['streams']: | |
104 | format_info = { | |
7217e148 PH |
105 | 'width': int_or_none(s.get('width')), |
106 | 'height': int_or_none(s.get('height')), | |
107 | 'tbr': int_or_none(s.get('bitrate')), | |
9c15e9de JMF |
108 | } |
109 | ||
110 | host = s['host'] | |
111 | path = s['path'] | |
112 | if host.startswith('rtmp'): | |
113 | format_info.update({ | |
114 | 'url': host, | |
115 | 'play_path': path, | |
116 | 'ext': 'flv', | |
117 | }) | |
118 | else: | |
119 | format_url = compat_urlparse.urljoin(host, path) | |
120 | format_info['url'] = format_url | |
9c15e9de | 121 | formats.append(format_info) |
7217e148 PH |
122 | |
123 | self._sort_formats(formats) | |
9c15e9de | 124 | |
35907e23 | 125 | return { |
9c15e9de JMF |
126 | 'id': video_id, |
127 | 'title': meta['title'], | |
128 | 'formats': formats, | |
129 | 'description': clean_html(meta['description']), | |
dede691a | 130 | 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), |
9c15e9de | 131 | } |
d6039175 | 132 | |
934858ad | 133 | |
befd88b7 JMF |
134 | class YahooNewsIE(YahooIE): |
135 | IE_NAME = 'yahoo:news' | |
136 | _VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html' | |
137 | ||
52fadd5f | 138 | _TESTS = [{ |
2317e6b2 AS |
139 | 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', |
140 | 'md5': '67010fdf3a08d290e060a4dd96baa07b', | |
141 | 'info_dict': { | |
142 | 'id': '104538833', | |
143 | 'ext': 'mp4', | |
144 | 'title': 'China Moses Is Crazy About the Blues', | |
145 | 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0', | |
befd88b7 | 146 | }, |
52fadd5f | 147 | }] |
befd88b7 JMF |
148 | |
149 | def _real_extract(self, url): | |
150 | mobj = re.match(self._VALID_URL, url) | |
151 | video_id = mobj.group('id') | |
152 | webpage = self._download_webpage(url, video_id) | |
2317e6b2 | 153 | long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, 'long id') |
dede691a | 154 | return self._get_info(long_id, video_id, webpage) |
befd88b7 JMF |
155 | |
156 | ||
934858ad | 157 | class YahooSearchIE(SearchInfoExtractor): |
2317e6b2 | 158 | IE_DESC = 'Yahoo screen search' |
934858ad | 159 | _MAX_RESULTS = 1000 |
2317e6b2 | 160 | IE_NAME = 'screen.yahoo:search' |
934858ad PH |
161 | _SEARCH_KEY = 'yvsearch' |
162 | ||
163 | def _get_n_results(self, query, n): | |
164 | """Get a specified number of results for a query""" | |
88f1c6de JMF |
165 | entries = [] |
166 | for pagenum in itertools.count(0): | |
2317e6b2 | 167 | result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) |
88f1c6de JMF |
168 | info = self._download_json(result_url, query, |
169 | note='Downloading results page '+str(pagenum+1)) | |
2317e6b2 AS |
170 | m = info['m'] |
171 | results = info['results'] | |
934858ad PH |
172 | |
173 | for (i, r) in enumerate(results): | |
88f1c6de | 174 | if (pagenum * 30) + i >= n: |
934858ad PH |
175 | break |
176 | mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r) | |
177 | e = self.url_result('http://' + mobj.group('url'), 'Yahoo') | |
88f1c6de JMF |
178 | entries.append(e) |
179 | if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)): | |
934858ad PH |
180 | break |
181 | ||
88f1c6de JMF |
182 | return { |
183 | '_type': 'playlist', | |
184 | 'id': query, | |
185 | 'entries': entries, | |
186 | } |