10 import xml
.etree
.ElementTree
19 from .extractor
.common
import InfoExtractor
, SearchInfoExtractor
21 from .extractor
.ard
import ARDIE
22 from .extractor
.arte
import ArteTvIE
23 from .extractor
.bandcamp
import BandcampIE
24 from .extractor
.bliptv
import BlipTVIE
, BlipTVUserIE
25 from .extractor
.comedycentral
import ComedyCentralIE
26 from .extractor
.collegehumor
import CollegeHumorIE
27 from .extractor
.dailymotion
import DailymotionIE
28 from .extractor
.depositfiles
import DepositFilesIE
29 from .extractor
.eighttracks
import EightTracksIE
30 from .extractor
.escapist
import EscapistIE
31 from .extractor
.facebook
import FacebookIE
32 from .extractor
.funnyordie
import FunnyOrDieIE
33 from .extractor
.gametrailers
import GametrailersIE
34 from .extractor
.generic
import GenericIE
35 from .extractor
.googleplus
import GooglePlusIE
36 from .extractor
.googlesearch
import GoogleSearchIE
37 from .extractor
.infoq
import InfoQIE
38 from .extractor
.justintv
import JustinTVIE
39 from .extractor
.keek
import KeekIE
40 from .extractor
.liveleak
import LiveLeakIE
41 from .extractor
.metacafe
import MetacafeIE
42 from .extractor
.mixcloud
import MixcloudIE
43 from .extractor
.mtv
import MTVIE
44 from .extractor
.myspass
import MySpassIE
45 from .extractor
.myvideo
import MyVideoIE
46 from .extractor
.nba
import NBAIE
47 from .extractor
.statigram
import StatigramIE
48 from .extractor
.photobucket
import PhotobucketIE
49 from .extractor
.pornotube
import PornotubeIE
50 from .extractor
.rbmaradio
import RBMARadioIE
51 from .extractor
.soundcloud
import SoundcloudIE
, SoundcloudSetIE
52 from .extractor
.spiegel
import SpiegelIE
53 from .extractor
.stanfordoc
import StanfordOpenClassroomIE
54 from .extractor
.steam
import SteamIE
55 from .extractor
.ted
import TEDIE
56 from .extractor
.tumblr
import TumblrIE
57 from .extractor
.ustream
import UstreamIE
58 from .extractor
.vbox7
import Vbox7IE
59 from .extractor
.vimeo
import VimeoIE
60 from .extractor
.vine
import VineIE
61 from .extractor
.worldstarhiphop
import WorldStarHipHopIE
62 from .extractor
.xnxx
import XNXXIE
63 from .extractor
.xvideos
import XVideosIE
64 from .extractor
.yahoo
import YahooIE
, YahooSearchIE
65 from .extractor
.youjizz
import YouJizzIE
66 from .extractor
.youku
import YoukuIE
67 from .extractor
.youporn
import YouPornIE
68 from .extractor
.youtube
import YoutubeIE
, YoutubePlaylistIE
, YoutubeSearchIE
, YoutubeUserIE
, YoutubeChannelIE
69 from .extractor
.zdf
import ZDFIE
105 class RedTubeIE(InfoExtractor
):
106 """Information Extractor for redtube"""
107 _VALID_URL
= r
'(?:http://)?(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
109 def _real_extract(self
,url
):
110 mobj
= re
.match(self
._VALID
_URL
, url
)
112 raise ExtractorError(u
'Invalid URL: %s' % url
)
114 video_id
= mobj
.group('id')
115 video_extension
= 'mp4'
116 webpage
= self
._download
_webpage
(url
, video_id
)
118 self
.report_extraction(video_id
)
120 video_url
= self
._html
_search
_regex
(r
'<source src="(.+?)" type="video/mp4">',
121 webpage
, u
'video URL')
123 video_title
= self
._html
_search
_regex
('<h1 class="videoTitle slidePanelMovable">(.+?)</h1>',
129 'ext': video_extension
,
130 'title': video_title
,
133 class InaIE(InfoExtractor
):
134 """Information Extractor for Ina.fr"""
135 _VALID_URL
= r
'(?:http://)?(?:www\.)?ina\.fr/video/(?P<id>I[0-9]+)/.*'
137 def _real_extract(self
,url
):
138 mobj
= re
.match(self
._VALID
_URL
, url
)
140 video_id
= mobj
.group('id')
141 mrss_url
='http://player.ina.fr/notices/%s.mrss' % video_id
142 video_extension
= 'mp4'
143 webpage
= self
._download
_webpage
(mrss_url
, video_id
)
145 self
.report_extraction(video_id
)
147 video_url
= self
._html
_search
_regex
(r
'<media:player url="(?P<mp4url>http://mp4.ina.fr/[^"]+\.mp4)',
148 webpage
, u
'video URL')
150 video_title
= self
._search
_regex
(r
'<title><!\[CDATA\[(?P<titre>.*?)]]></title>',
156 'ext': video_extension
,
157 'title': video_title
,
160 class HowcastIE(InfoExtractor
):
161 """Information Extractor for Howcast.com"""
162 _VALID_URL
= r
'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
164 def _real_extract(self
, url
):
165 mobj
= re
.match(self
._VALID
_URL
, url
)
167 video_id
= mobj
.group('id')
168 webpage_url
= 'http://www.howcast.com/videos/' + video_id
169 webpage
= self
._download
_webpage
(webpage_url
, video_id
)
171 self
.report_extraction(video_id
)
173 video_url
= self
._search
_regex
(r
'\'?
file\'?
: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
174 webpage, u'video URL')
176 video_title = self._html_search_regex(r'<meta content=(?:"([^
"]+)"|
\'([^
\']+)\') property=\'og
:title
\'',
179 video_description = self._html_search_regex(r'<meta content
=(?
:"([^"]+)"|\'([^\']+)\') name=\'description\'',
180 webpage, u'description', fatal=False)
182 thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
183 webpage, u'thumbnail', fatal=False)
189 'title': video_title,
190 'description': video_description,
191 'thumbnail': thumbnail,
195 class FlickrIE(InfoExtractor):
196 """Information Extractor for Flickr videos"""
197 _VALID_URL = r'(?:https?://)?(?:www\.)?flickr\.com/photos/(?P<uploader_id>[\w\-_@]+)/(?P<id>\d+).*'
199 def _real_extract(self, url):
200 mobj = re.match(self._VALID_URL, url)
202 video_id = mobj.group('id')
203 video_uploader_id = mobj.group('uploader_id')
204 webpage_url = 'http://www.flickr.com/photos/' + video_uploader_id + '/' + video_id
205 webpage = self._download_webpage(webpage_url, video_id)
207 secret = self._search_regex(r"photo_secret
: '(\w+)'", webpage, u'secret')
209 first_url = 'https://secure.flickr.com/apps/video/video_mtl_xml.gne?v=x&photo_id=' + video_id + '&secret=' + secret + '&bitrate=700&target=_self'
210 first_xml = self._download_webpage(first_url, video_id, 'Downloading first data webpage')
212 node_id = self._html_search_regex(r'<Item id="id">(\d+-\d+)</Item>',
213 first_xml, u'node_id')
215 second_url = 'https://secure.flickr.com/video_playlist.gne?node_id=' + node_id + '&tech=flash&mode=playlist&bitrate=700&secret=' + secret + '&rd=video.yahoo.com&noad=1'
216 second_xml = self._download_webpage(second_url, video_id, 'Downloading second data webpage')
218 self.report_extraction(video_id)
220 mobj = re.search(r'<STREAM APP="(.+?
)" FULLPATH="(.+?
)"', second_xml)
222 raise ExtractorError(u'Unable to extract video url')
223 video_url = mobj.group(1) + unescapeHTML(mobj.group(2))
225 video_title = self._html_search_regex(r'<meta property="og
:title
" content=(?:"([^
"]+)"|
\'([^
\']+)\')',
226 webpage, u'video title
')
228 video_description = self._html_search_regex(r'<meta
property="og:description" content
=(?
:"([^"]+)"|\'([^\']+)\')',
229 webpage, u'description', fatal=False)
231 thumbnail = self._html_search_regex(r'<meta property="og
:image
" content=(?:"([^
"]+)"|
\'([^
\']+)\')',
232 webpage, u'thumbnail
', fatal=False)
238 'title
': video_title,
239 'description
': video_description,
240 'thumbnail
': thumbnail,
241 'uploader_id
': video_uploader_id,
244 class TeamcocoIE(InfoExtractor):
245 _VALID_URL = r'http
://teamcoco\
.com
/video
/(?P
<url_title
>.*)'
247 def _real_extract(self, url):
248 mobj = re.match(self._VALID_URL, url)
250 raise ExtractorError(u'Invalid URL
: %s' % url)
251 url_title = mobj.group('url_title
')
252 webpage = self._download_webpage(url, url_title)
254 video_id = self._html_search_regex(r'<article
class="video" data
-id="(\d+?)"',
255 webpage, u'video
id')
257 self.report_extraction(video_id)
259 video_title = self._html_search_regex(r'<meta
property="og:title" content
="(.+?)"',
262 thumbnail = self._html_search_regex(r'<meta
property="og:image" content
="(.+?)"',
263 webpage, u'thumbnail
', fatal=False)
265 video_description = self._html_search_regex(r'<meta
property="og:description" content
="(.*?)"',
266 webpage, u'description
', fatal=False)
268 data_url = 'http
://teamcoco
.com
/cvp
/2.0/%s.xml
' % video_id
269 data = self._download_webpage(data_url, video_id, 'Downloading data webpage
')
271 video_url = self._html_search_regex(r'<file type="high".*?
>(.*?
)</file>',
278 'title
': video_title,
279 'thumbnail
': thumbnail,
280 'description
': video_description,
283 class XHamsterIE(InfoExtractor):
284 """Information Extractor for xHamster"""
285 _VALID_URL = r'(?
:http
://)?
(?
:www
.)?xhamster\
.com
/movies
/(?P
<id>[0-9]+)/.*\
.html
'
287 def _real_extract(self,url):
288 mobj = re.match(self._VALID_URL, url)
290 video_id = mobj.group('id')
291 mrss_url = 'http
://xhamster
.com
/movies
/%s/.html
' % video_id
292 webpage = self._download_webpage(mrss_url, video_id)
294 mobj = re.search(r'\'srv
\': \'(?P
<server
>[^
\']*)\',\s
*\'file\': \'(?P
<file>[^
\']+)\',', webpage)
296 raise ExtractorError(u'Unable to extract media URL
')
297 if len(mobj.group('server
')) == 0:
298 video_url = compat_urllib_parse.unquote(mobj.group('file'))
300 video_url = mobj.group('server
')+'/key
='+mobj.group('file')
301 video_extension = video_url.split('.')[-1]
303 video_title = self._html_search_regex(r'<title
>(?P
<title
>.+?
) - xHamster\
.com
</title
>',
306 # Can't see the description anywhere
in the UI
307 # video_description = self._html_search_regex(r'<span>Description: </span>(?P<description>[^<]+)',
308 # webpage, u'description', fatal=False)
309 # if video_description: video_description = unescapeHTML(video_description)
311 mobj
= re
.search(r
'hint=\'(?P
<upload_date_Y
>[0-9]{4}
)-(?P
<upload_date_m
>[0-9]{2}
)-(?P
<upload_date_d
>[0-9]{2}
) [0-9]{2}
:[0-9]{2}
:[0-9]{2}
[A
-Z
]{3,4}
\'', webpage)
313 video_upload_date = mobj.group('upload_date_Y
')+mobj.group('upload_date_m
')+mobj.group('upload_date_d
')
315 video_upload_date = None
316 self._downloader.report_warning(u'Unable to extract upload date
')
318 video_uploader_id = self._html_search_regex(r'<a href
=\'/user
/[^
>]+>(?P
<uploader_id
>[^
<]+)',
319 webpage, u'uploader
id', default=u'anonymous
')
321 video_thumbnail = self._search_regex(r'\'image
\':\'(?P
<thumbnail
>[^
\']+)\'',
322 webpage, u'thumbnail
', fatal=False)
327 'ext
': video_extension,
328 'title
': video_title,
329 # 'description
': video_description,
330 'upload_date
': video_upload_date,
331 'uploader_id
': video_uploader_id,
332 'thumbnail
': video_thumbnail
335 class HypemIE(InfoExtractor):
336 """Information Extractor for hypem"""
337 _VALID_URL = r'(?
:http
://)?
(?
:www\
.)?hypem\
.com
/track
/([^
/]+)/([^
/]+)'
339 def _real_extract(self, url):
340 mobj = re.match(self._VALID_URL, url)
342 raise ExtractorError(u'Invalid URL
: %s' % url)
343 track_id = mobj.group(1)
345 data = { 'ax': 1, 'ts': time.time() }
346 data_encoded = compat_urllib_parse.urlencode(data)
347 complete_url = url + "?" + data_encoded
348 request = compat_urllib_request.Request(complete_url)
349 response, urlh = self._download_webpage_handle(request, track_id, u'Downloading webpage
with the url
')
350 cookie = urlh.headers.get('Set
-Cookie
', '')
352 self.report_extraction(track_id)
354 html_tracks = self._html_search_regex(r'<script
type="application/json" id="displayList-data">(.*?
)</script
>',
355 response, u'tracks
', flags=re.MULTILINE|re.DOTALL).strip()
357 track_list = json.loads(html_tracks)
358 track = track_list[u'tracks
'][0]
360 raise ExtractorError(u'Hypemachine contained invalid JSON
.')
363 track_id = track[u"id"]
364 artist = track[u"artist"]
365 title = track[u"song"]
367 serve_url = "http://hypem.com/serve/source/%s/%s" % (compat_str(track_id), compat_str(key))
368 request = compat_urllib_request.Request(serve_url, "" , {'Content-Type': 'application/json'})
369 request.add_header('cookie
', cookie)
370 song_data_json = self._download_webpage(request, track_id, u'Downloading metadata
')
372 song_data = json.loads(song_data_json)
374 raise ExtractorError(u'Hypemachine contained invalid JSON
.')
375 final_url = song_data[u"url"]
387 def gen_extractors():
388 """ Return a list of an instance of every supported extractor.
389 The order does matter; the first extractor matched is the one handling the URL.
417 StanfordOpenClassroomIE(),
457 def get_info_extractor(ie_name):
458 """Returns the info extractor class with the given ie_name"""
459 return globals()[ie_name+'IE
']