]>
jfr.im git - yt-dlp.git/blob - youtube_dl/extractor/yahoo.py
1 from __future__
import unicode_literals
7 from . common
import InfoExtractor
, SearchInfoExtractor
16 class YahooIE ( InfoExtractor
):
17 IE_DESC
= 'Yahoo screen and movies'
18 _VALID_URL
= r
'(?P<url>https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html)'
21 'url' : 'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html' ,
22 'md5' : '4962b075c08be8690a922ee026d05e69' ,
24 'id' : '2d25e626-2378-391f-ada0-ddaf1417e588' ,
26 'title' : 'Julian Smith & Travis Legg Watch Julian Smith' ,
27 'description' : 'Julian and Travis watch Julian Smith' ,
31 'url' : 'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html' ,
32 'md5' : 'd6e6fc6e1313c608f316ddad7b82b306' ,
34 'id' : 'd1dedf8c-d58c-38c3-8963-e899929ae0a9' ,
36 'title' : 'Codefellas - The Cougar Lies with Spanish Moss' ,
37 'description' : 'Agent Topple \' s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?' ,
41 'url' : 'https://movies.yahoo.com/video/world-loves-spider-man-190819223.html' ,
42 'md5' : '410b7104aa9893b765bc22787a22f3d9' ,
44 'id' : '516ed8e2-2c4f-339f-a211-7a8b49d30845' ,
46 'title' : 'The World Loves Spider-Man' ,
47 'description' : '''People all over the world are celebrating the release of \" The Amazing Spider-Man 2. \" We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''' ,
51 'url' : 'https://screen.yahoo.com/community/community-sizzle-reel-203225340.html?format=embed' ,
52 'md5' : '60e8ac193d8fb71997caa8fce54c6460' ,
54 'id' : '4fe78544-8d48-39d8-97cd-13f205d9fcdb' ,
56 'title' : "Yahoo Saves 'Community'" ,
57 'description' : 'md5:4d4145af2fd3de00cbb6c1d664105053' ,
62 def _real_extract ( self
, url
):
63 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
64 video_id
= mobj
. group ( 'id' )
65 url
= mobj
. group ( 'url' )
66 webpage
= self
._ download
_ webpage
( url
, video_id
)
68 items_json
= self
._ search
_ regex
(
69 r
'mediaItems: ( {.*?} )$' , webpage
, 'items' , flags
= re
. MULTILINE
,
71 if items_json
is None :
72 CONTENT_ID_REGEXES
= [
73 r
'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"' ,
74 r
'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"' ,
75 r
'"first_videoid"\s*:\s*"([^"]+)"' ,
77 long_id
= self
._ search
_ regex
( CONTENT_ID_REGEXES
, webpage
, 'content ID' )
80 items
= json
. loads ( items_json
)
81 info
= items
[ 'mediaItems' ][ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]
82 # The 'meta' field is not always in the video webpage, we request it
85 return self
._ get
_ info
( long_id
, video_id
, webpage
)
87 def _get_info ( self
, long_id
, video_id
, webpage
):
88 query
= ( 'SELECT * FROM yahoo.media.video.streams WHERE id=" %s "'
89 ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'
90 ' AND protocol="http"' % long_id
)
91 data
= compat_urllib_parse
. urlencode ({
96 query_result
= self
._ download
_ json
(
97 'http://video.query.yahoo.com/v1/public/yql?' + data
,
98 video_id
, 'Downloading video info' )
99 info
= query_result
[ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]
103 for s
in info
[ 'streams' ]:
105 'width' : int_or_none ( s
. get ( 'width' )),
106 'height' : int_or_none ( s
. get ( 'height' )),
107 'tbr' : int_or_none ( s
. get ( 'bitrate' )),
112 if host
. startswith ( 'rtmp' ):
119 format_url
= compat_urlparse
. urljoin ( host
, path
)
120 format_info
[ 'url' ] = format_url
121 formats
. append ( format_info
)
123 self
._ sort
_ formats
( formats
)
127 'title' : meta
[ 'title' ],
129 'description' : clean_html ( meta
[ 'description' ]),
130 'thumbnail' : meta
[ 'thumbnail' ] if meta
. get ( 'thumbnail' ) else self
._ og
_ search
_ thumbnail
( webpage
),
134 class YahooNewsIE ( YahooIE
):
135 IE_NAME
= 'yahoo:news'
136 _VALID_URL
= r
'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'
139 'url' : 'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html' ,
140 'md5' : '67010fdf3a08d290e060a4dd96baa07b' ,
144 'title' : 'China Moses Is Crazy About the Blues' ,
145 'description' : 'md5:9900ab8cd5808175c7b3fe55b979bed0' ,
149 def _real_extract ( self
, url
):
150 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
151 video_id
= mobj
. group ( 'id' )
152 webpage
= self
._ download
_ webpage
( url
, video_id
)
153 long_id
= self
._ search
_ regex
( r
'contentId: \' (.+ ?
) \' , ', webpage, ' long id ')
154 return self._get_info(long_id, video_id, webpage)
157 class YahooSearchIE(SearchInfoExtractor):
158 IE_DESC = ' Yahoo screen search
'
160 IE_NAME = ' screen
. yahoo
: search
'
161 _SEARCH_KEY = ' yvsearch
'
163 def _get_n_results(self, query, n):
164 """Get a specified number of results for a query"""
166 for pagenum in itertools.count(0):
167 result_url = ' http
:// video
. search
. yahoo
. com
/ search
/ ?p
= %s& fr
= screen
& o
= js
& gs
= 0 & b
= %d ' % (compat_urllib_parse.quote_plus(query), pagenum * 30)
168 info = self._download_json(result_url, query,
169 note=' Downloading results page
'+str(pagenum+1))
171 results = info[' results
']
173 for (i, r) in enumerate(results):
174 if (pagenum * 30) + i >= n:
176 mobj = re.search(r' ( ?P
< url
> screen\
. yahoo\
. com
/.* ?
- \d
* ?\
. html
) "', r)
177 e = self.url_result('http://' + mobj.group('url'), 'Yahoo')
179 if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):