]>
Commit | Line | Data |
---|---|---|
ca9e02dc AG |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
245023a8 S |
4 | from ..compat import compat_str |
5 | from ..utils import int_or_none | |
ca9e02dc AG |
6 | |
7 | ||
8 | class PyvideoIE(InfoExtractor): | |
245023a8 S |
9 | _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' |
10 | ||
11 | _TESTS = [{ | |
12 | 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', | |
13 | 'info_dict': { | |
14 | 'id': 'become-a-logging-expert-in-30-minutes', | |
0d9ec5d9 | 15 | }, |
245023a8 S |
16 | 'playlist_count': 2, |
17 | }, { | |
18 | 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', | |
19 | 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', | |
20 | 'info_dict': { | |
21 | 'id': '2542', | |
22 | 'ext': 'm4v', | |
23 | 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', | |
c4d9e673 | 24 | }, |
245023a8 | 25 | }] |
ca9e02dc AG |
26 | |
27 | def _real_extract(self, url): | |
5ad28e7f | 28 | mobj = self._match_valid_url(url) |
245023a8 | 29 | category = mobj.group('category') |
c4d9e673 | 30 | video_id = mobj.group('id') |
1cbd4106 | 31 | |
245023a8 | 32 | entries = [] |
ca9e02dc | 33 | |
245023a8 S |
34 | data = self._download_json( |
35 | 'https://raw.githubusercontent.com/pyvideo/data/master/%s/videos/%s.json' | |
36 | % (category, video_id), video_id, fatal=False) | |
c4d9e673 | 37 | |
245023a8 | 38 | if data: |
245023a8 S |
39 | for video in data['videos']: |
40 | video_url = video.get('url') | |
41 | if video_url: | |
42 | if video.get('type') == 'youtube': | |
43 | entries.append(self.url_result(video_url, 'Youtube')) | |
44 | else: | |
45 | entries.append({ | |
46 | 'id': compat_str(data.get('id') or video_id), | |
47 | 'url': video_url, | |
48 | 'title': data['title'], | |
49 | 'description': data.get('description') or data.get('summary'), | |
50 | 'thumbnail': data.get('thumbnail_url'), | |
51 | 'duration': int_or_none(data.get('duration')), | |
52 | }) | |
53 | else: | |
54 | webpage = self._download_webpage(url, video_id) | |
55 | title = self._og_search_title(webpage) | |
56 | media_urls = self._search_regex( | |
57 | r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') | |
58 | for m in re.finditer( | |
59 | r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): | |
60 | media_url = m.group('url') | |
61 | if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): | |
62 | entries.append(self.url_result(media_url, 'Youtube')) | |
63 | else: | |
64 | entries.append({ | |
65 | 'id': video_id, | |
66 | 'url': media_url, | |
67 | 'title': title, | |
68 | }) | |
1cbd4106 | 69 | |
245023a8 | 70 | return self.playlist_result(entries, video_id) |