]>
Commit | Line | Data |
---|---|---|
1cbd4106 S |
1 | from __future__ import unicode_literals |
2 | ||
ca9e02dc AG |
3 | import re |
4 | ||
5 | from .common import InfoExtractor | |
245023a8 S |
6 | from ..compat import compat_str |
7 | from ..utils import int_or_none | |
ca9e02dc AG |
8 | |
9 | ||
10 | class PyvideoIE(InfoExtractor): | |
245023a8 S |
11 | _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' |
12 | ||
13 | _TESTS = [{ | |
14 | 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', | |
15 | 'info_dict': { | |
16 | 'id': 'become-a-logging-expert-in-30-minutes', | |
0d9ec5d9 | 17 | }, |
245023a8 S |
18 | 'playlist_count': 2, |
19 | }, { | |
20 | 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', | |
21 | 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', | |
22 | 'info_dict': { | |
23 | 'id': '2542', | |
24 | 'ext': 'm4v', | |
25 | 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', | |
c4d9e673 | 26 | }, |
245023a8 | 27 | }] |
ca9e02dc AG |
28 | |
29 | def _real_extract(self, url): | |
30 | mobj = re.match(self._VALID_URL, url) | |
245023a8 | 31 | category = mobj.group('category') |
c4d9e673 | 32 | video_id = mobj.group('id') |
1cbd4106 | 33 | |
245023a8 | 34 | entries = [] |
ca9e02dc | 35 | |
245023a8 S |
36 | data = self._download_json( |
37 | 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' | |
38 | % (category, video_id), video_id, fatal=False) | |
c4d9e673 | 39 | |
245023a8 | 40 | if data: |
245023a8 S |
41 | for video in data['videos']: |
42 | video_url = video.get('url') | |
43 | if video_url: | |
44 | if video.get('type') == 'youtube': | |
45 | entries.append(self.url_result(video_url, 'Youtube')) | |
46 | else: | |
47 | entries.append({ | |
48 | 'id': compat_str(data.get('id') or video_id), | |
49 | 'url': video_url, | |
50 | 'title': data['title'], | |
51 | 'description': data.get('description') or data.get('summary'), | |
52 | 'thumbnail': data.get('thumbnail_url'), | |
53 | 'duration': int_or_none(data.get('duration')), | |
54 | }) | |
55 | else: | |
56 | webpage = self._download_webpage(url, video_id) | |
57 | title = self._og_search_title(webpage) | |
58 | media_urls = self._search_regex( | |
59 | r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') | |
60 | for m in re.finditer( | |
61 | r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): | |
62 | media_url = m.group('url') | |
63 | if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): | |
64 | entries.append(self.url_result(media_url, 'Youtube')) | |
65 | else: | |
66 | entries.append({ | |
67 | 'id': video_id, | |
68 | 'url': media_url, | |
69 | 'title': title, | |
70 | }) | |
1cbd4106 | 71 | |
245023a8 | 72 | return self.playlist_result(entries, video_id) |