]>
Commit | Line | Data |
---|---|---|
1cbd4106 S |
1 | from __future__ import unicode_literals |
2 | ||
ca9e02dc | 3 | import re |
c4d9e673 | 4 | import os |
ca9e02dc AG |
5 | |
6 | from .common import InfoExtractor | |
ca9e02dc AG |
7 | |
8 | ||
9 | class PyvideoIE(InfoExtractor): | |
5886b38d | 10 | _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/video/(?P<id>\d+)/(.*)' |
1cbd4106 S |
11 | |
12 | _TESTS = [ | |
13 | { | |
14 | 'url': 'http://pyvideo.org/video/1737/become-a-logging-expert-in-30-minutes', | |
f22ba4bd | 15 | 'md5': '520915673e53a5c5d487c36e0c4d85b5', |
1cbd4106 S |
16 | 'info_dict': { |
17 | 'id': '24_4WWkSmNo', | |
f22ba4bd | 18 | 'ext': 'webm', |
1cbd4106 S |
19 | 'title': 'Become a logging expert in 30 minutes', |
20 | 'description': 'md5:9665350d466c67fb5b1598de379021f7', | |
21 | 'upload_date': '20130320', | |
f22ba4bd | 22 | 'uploader': 'Next Day Video', |
1cbd4106 S |
23 | 'uploader_id': 'NextDayVideo', |
24 | }, | |
25 | 'add_ie': ['Youtube'], | |
0d9ec5d9 | 26 | }, |
1cbd4106 S |
27 | { |
28 | 'url': 'http://pyvideo.org/video/2542/gloriajw-spotifywitherikbernhardsson182m4v', | |
29 | 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', | |
30 | 'info_dict': { | |
31 | 'id': '2542', | |
32 | 'ext': 'm4v', | |
33 | 'title': 'Gloriajw-SpotifyWithErikBernhardsson182', | |
34 | }, | |
c4d9e673 | 35 | }, |
c4d9e673 | 36 | ] |
ca9e02dc AG |
37 | |
38 | def _real_extract(self, url): | |
39 | mobj = re.match(self._VALID_URL, url) | |
c4d9e673 | 40 | video_id = mobj.group('id') |
1cbd4106 | 41 | |
ca9e02dc | 42 | webpage = self._download_webpage(url, video_id) |
ca9e02dc | 43 | |
1cbd4106 | 44 | m_youtube = re.search(r'(https?://www\.youtube\.com/watch\?v=.*)', webpage) |
ca9e02dc AG |
45 | if m_youtube is not None: |
46 | return self.url_result(m_youtube.group(1), 'Youtube') | |
c4d9e673 | 47 | |
1cbd4106 | 48 | title = self._html_search_regex( |
76233cda | 49 | r'<div class="section">\s*<h3(?:\s+class="[^"]*"[^>]*)?>([^>]+?)</h3>', |
39504503 | 50 | webpage, 'title', flags=re.DOTALL) |
1cbd4106 S |
51 | video_url = self._search_regex( |
52 | [r'<source src="(.*?)"', r'<dt>Download</dt>.*?<a href="(.+?)"'], | |
53 | webpage, 'video url', flags=re.DOTALL) | |
54 | ||
c4d9e673 JMF |
55 | return { |
56 | 'id': video_id, | |
57 | 'title': os.path.splitext(title)[0], | |
58 | 'url': video_url, | |
59 | } |