]>
Commit | Line | Data |
---|---|---|
762958d5 | 1 | # coding: utf-8 |
2317e6b2 AS |
2 | from __future__ import unicode_literals |
3 | ||
934858ad | 4 | import itertools |
15251481 | 5 | import json |
d6039175 PH |
6 | import re |
7 | ||
934858ad | 8 | from .common import InfoExtractor, SearchInfoExtractor |
d6039175 | 9 | from ..utils import ( |
934858ad | 10 | compat_urllib_parse, |
9c15e9de | 11 | compat_urlparse, |
9c15e9de | 12 | clean_html, |
7217e148 | 13 | int_or_none, |
d6039175 PH |
14 | ) |
15 | ||
9c15e9de | 16 | |
d6039175 | 17 | class YahooIE(InfoExtractor): |
a4eb9578 | 18 | IE_DESC = 'Yahoo screen and movies' |
762958d5 | 19 | _VALID_URL = r'(?P<url>https?://(?:.+?\.)?(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)' |
9c15e9de JMF |
20 | _TESTS = [ |
21 | { | |
2317e6b2 | 22 | 'url': 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', |
2317e6b2 AS |
23 | 'md5': '4962b075c08be8690a922ee026d05e69', |
24 | 'info_dict': { | |
fb6a5b96 | 25 | 'id': '2d25e626-2378-391f-ada0-ddaf1417e588', |
88f1c6de | 26 | 'ext': 'mp4', |
2317e6b2 AS |
27 | 'title': 'Julian Smith & Travis Legg Watch Julian Smith', |
28 | 'description': 'Julian and Travis watch Julian Smith', | |
762958d5 | 29 | 'duration': 6863, |
9c15e9de JMF |
30 | }, |
31 | }, | |
32 | { | |
2317e6b2 | 33 | 'url': 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html', |
2317e6b2 AS |
34 | 'md5': 'd6e6fc6e1313c608f316ddad7b82b306', |
35 | 'info_dict': { | |
fb6a5b96 | 36 | 'id': 'd1dedf8c-d58c-38c3-8963-e899929ae0a9', |
88f1c6de | 37 | 'ext': 'mp4', |
2317e6b2 | 38 | 'title': 'Codefellas - The Cougar Lies with Spanish Moss', |
762958d5 S |
39 | 'description': 'md5:66b627ab0a282b26352136ca96ce73c1', |
40 | 'duration': 151, | |
9c15e9de | 41 | }, |
6f5ac90c | 42 | }, |
49807b4a S |
43 | { |
44 | 'url': 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed', | |
45 | 'md5': '60e8ac193d8fb71997caa8fce54c6460', | |
46 | 'info_dict': { | |
47 | 'id': '4fe78544-8d48-39d8-97cd-13f205d9fcdb', | |
48 | 'ext': 'mp4', | |
49 | 'title': "Yahoo Saves 'Community'", | |
50 | 'description': 'md5:4d4145af2fd3de00cbb6c1d664105053', | |
762958d5 S |
51 | 'duration': 170, |
52 | } | |
53 | }, | |
54 | { | |
55 | 'url': 'https://tw.screen.yahoo.com/taipei-opinion-poll/選情站報-街頭民調-台北市篇-102823042.html', | |
56 | 'md5': '92a7fdd8a08783c68a174d7aa067dde8', | |
57 | 'info_dict': { | |
58 | 'id': '7a23b569-7bea-36cb-85b9-bd5301a0a1fb', | |
59 | 'ext': 'mp4', | |
60 | 'title': '選情站報 街頭民調 台北市篇', | |
61 | 'description': '選情站報 街頭民調 台北市篇', | |
62 | 'duration': 429, | |
63 | } | |
64 | }, | |
65 | { | |
66 | 'url': 'https://uk.screen.yahoo.com/editor-picks/cute-raccoon-freed-drain-using-091756545.html ', | |
67 | 'md5': '0b51660361f0e27c9789e7037ef76f4b', | |
68 | 'info_dict': { | |
69 | 'id': 'b3affa53-2e14-3590-852b-0e0db6cd1a58', | |
70 | 'ext': 'mp4', | |
71 | 'title': 'Cute Raccoon Freed From Drain\u00a0Using Angle Grinder', | |
72 | 'description': 'md5:f66c890e1490f4910a9953c941dee944', | |
73 | 'duration': 97, | |
49807b4a S |
74 | } |
75 | }, | |
9c15e9de | 76 | ] |
d6039175 PH |
77 | |
78 | def _real_extract(self, url): | |
79 | mobj = re.match(self._VALID_URL, url) | |
d6039175 | 80 | video_id = mobj.group('id') |
49807b4a | 81 | url = mobj.group('url') |
d6039175 | 82 | webpage = self._download_webpage(url, video_id) |
d6039175 | 83 | |
a4eb9578 PH |
84 | items_json = self._search_regex( |
85 | r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE, | |
86 | default=None) | |
87 | if items_json is None: | |
fb6a5b96 | 88 | CONTENT_ID_REGEXES = [ |
a4eb9578 | 89 | r'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"', |
071a236c S |
90 | r'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"', |
91 | r'"first_videoid"\s*:\s*"([^"]+)"', | |
fb6a5b96 S |
92 | ] |
93 | long_id = self._search_regex(CONTENT_ID_REGEXES, webpage, 'content ID') | |
a4eb9578 PH |
94 | video_id = long_id |
95 | else: | |
96 | items = json.loads(items_json) | |
97 | info = items['mediaItems']['query']['results']['mediaObj'][0] | |
98 | # The 'meta' field is not always in the video webpage, we request it | |
99 | # from another page | |
100 | long_id = info['id'] | |
dede691a | 101 | return self._get_info(long_id, video_id, webpage) |
befd88b7 | 102 | |
dede691a | 103 | def _get_info(self, long_id, video_id, webpage): |
762958d5 S |
104 | region = self._search_regex( |
105 | r'"region"\s*:\s*"([^"]+)"', webpage, 'region', fatal=False, default='US') | |
cb354c8f | 106 | query = ('SELECT * FROM yahoo.media.video.streams WHERE id="%s"' |
762958d5 S |
107 | ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="%s"' |
108 | ' AND protocol="http"' % (long_id, region)) | |
cb354c8f JMF |
109 | data = compat_urllib_parse.urlencode({ |
110 | 'q': query, | |
111 | 'env': 'prod', | |
112 | 'format': 'json', | |
113 | }) | |
88f1c6de | 114 | query_result = self._download_json( |
cb354c8f | 115 | 'http://video.query.yahoo.com/v1/public/yql?' + data, |
2317e6b2 | 116 | video_id, 'Downloading video info') |
cb354c8f | 117 | info = query_result['query']['results']['mediaObj'][0] |
9c15e9de JMF |
118 | meta = info['meta'] |
119 | ||
120 | formats = [] | |
121 | for s in info['streams']: | |
122 | format_info = { | |
7217e148 PH |
123 | 'width': int_or_none(s.get('width')), |
124 | 'height': int_or_none(s.get('height')), | |
125 | 'tbr': int_or_none(s.get('bitrate')), | |
9c15e9de JMF |
126 | } |
127 | ||
128 | host = s['host'] | |
129 | path = s['path'] | |
130 | if host.startswith('rtmp'): | |
131 | format_info.update({ | |
132 | 'url': host, | |
133 | 'play_path': path, | |
134 | 'ext': 'flv', | |
135 | }) | |
136 | else: | |
137 | format_url = compat_urlparse.urljoin(host, path) | |
138 | format_info['url'] = format_url | |
9c15e9de | 139 | formats.append(format_info) |
7217e148 PH |
140 | |
141 | self._sort_formats(formats) | |
9c15e9de | 142 | |
35907e23 | 143 | return { |
9c15e9de JMF |
144 | 'id': video_id, |
145 | 'title': meta['title'], | |
146 | 'formats': formats, | |
147 | 'description': clean_html(meta['description']), | |
dede691a | 148 | 'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage), |
762958d5 | 149 | 'duration': int_or_none(meta.get('duration')), |
9c15e9de | 150 | } |
d6039175 | 151 | |
934858ad | 152 | |
befd88b7 JMF |
153 | class YahooNewsIE(YahooIE): |
154 | IE_NAME = 'yahoo:news' | |
155 | _VALID_URL = r'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html' | |
156 | ||
52fadd5f | 157 | _TESTS = [{ |
2317e6b2 AS |
158 | 'url': 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html', |
159 | 'md5': '67010fdf3a08d290e060a4dd96baa07b', | |
160 | 'info_dict': { | |
161 | 'id': '104538833', | |
162 | 'ext': 'mp4', | |
163 | 'title': 'China Moses Is Crazy About the Blues', | |
164 | 'description': 'md5:9900ab8cd5808175c7b3fe55b979bed0', | |
befd88b7 | 165 | }, |
52fadd5f | 166 | }] |
befd88b7 JMF |
167 | |
168 | def _real_extract(self, url): | |
169 | mobj = re.match(self._VALID_URL, url) | |
170 | video_id = mobj.group('id') | |
171 | webpage = self._download_webpage(url, video_id) | |
2317e6b2 | 172 | long_id = self._search_regex(r'contentId: \'(.+?)\',', webpage, 'long id') |
dede691a | 173 | return self._get_info(long_id, video_id, webpage) |
befd88b7 JMF |
174 | |
175 | ||
934858ad | 176 | class YahooSearchIE(SearchInfoExtractor): |
2317e6b2 | 177 | IE_DESC = 'Yahoo screen search' |
934858ad | 178 | _MAX_RESULTS = 1000 |
2317e6b2 | 179 | IE_NAME = 'screen.yahoo:search' |
934858ad PH |
180 | _SEARCH_KEY = 'yvsearch' |
181 | ||
182 | def _get_n_results(self, query, n): | |
183 | """Get a specified number of results for a query""" | |
88f1c6de JMF |
184 | entries = [] |
185 | for pagenum in itertools.count(0): | |
2317e6b2 | 186 | result_url = 'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse.quote_plus(query), pagenum * 30) |
88f1c6de JMF |
187 | info = self._download_json(result_url, query, |
188 | note='Downloading results page '+str(pagenum+1)) | |
2317e6b2 AS |
189 | m = info['m'] |
190 | results = info['results'] | |
934858ad PH |
191 | |
192 | for (i, r) in enumerate(results): | |
88f1c6de | 193 | if (pagenum * 30) + i >= n: |
934858ad PH |
194 | break |
195 | mobj = re.search(r'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r) | |
196 | e = self.url_result('http://' + mobj.group('url'), 'Yahoo') | |
88f1c6de JMF |
197 | entries.append(e) |
198 | if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)): | |
934858ad PH |
199 | break |
200 | ||
88f1c6de JMF |
201 | return { |
202 | '_type': 'playlist', | |
203 | 'id': query, | |
204 | 'entries': entries, | |
205 | } |