]>
Commit | Line | Data |
---|---|---|
ca9e02dc AG |
1 | import re |
2 | ||
3 | from .common import InfoExtractor | |
245023a8 | 4 | from ..utils import int_or_none |
ca9e02dc AG |
5 | |
6 | ||
7 | class PyvideoIE(InfoExtractor): | |
245023a8 S |
8 | _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' |
9 | ||
10 | _TESTS = [{ | |
11 | 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', | |
12 | 'info_dict': { | |
13 | 'id': 'become-a-logging-expert-in-30-minutes', | |
0d9ec5d9 | 14 | }, |
245023a8 S |
15 | 'playlist_count': 2, |
16 | }, { | |
17 | 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', | |
18 | 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', | |
19 | 'info_dict': { | |
20 | 'id': '2542', | |
21 | 'ext': 'm4v', | |
22 | 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', | |
c4d9e673 | 23 | }, |
245023a8 | 24 | }] |
ca9e02dc AG |
25 | |
26 | def _real_extract(self, url): | |
5ad28e7f | 27 | mobj = self._match_valid_url(url) |
245023a8 | 28 | category = mobj.group('category') |
c4d9e673 | 29 | video_id = mobj.group('id') |
1cbd4106 | 30 | |
245023a8 | 31 | entries = [] |
ca9e02dc | 32 | |
245023a8 | 33 | data = self._download_json( |
add96eb9 | 34 | f'https://raw.githubusercontent.com/pyvideo/data/master/{category}/videos/{video_id}.json', |
35 | video_id, fatal=False) | |
c4d9e673 | 36 | |
245023a8 | 37 | if data: |
245023a8 S |
38 | for video in data['videos']: |
39 | video_url = video.get('url') | |
40 | if video_url: | |
41 | if video.get('type') == 'youtube': | |
42 | entries.append(self.url_result(video_url, 'Youtube')) | |
43 | else: | |
44 | entries.append({ | |
add96eb9 | 45 | 'id': str(data.get('id') or video_id), |
245023a8 S |
46 | 'url': video_url, |
47 | 'title': data['title'], | |
48 | 'description': data.get('description') or data.get('summary'), | |
49 | 'thumbnail': data.get('thumbnail_url'), | |
50 | 'duration': int_or_none(data.get('duration')), | |
51 | }) | |
52 | else: | |
53 | webpage = self._download_webpage(url, video_id) | |
54 | title = self._og_search_title(webpage) | |
55 | media_urls = self._search_regex( | |
56 | r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') | |
57 | for m in re.finditer( | |
58 | r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): | |
59 | media_url = m.group('url') | |
60 | if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): | |
61 | entries.append(self.url_result(media_url, 'Youtube')) | |
62 | else: | |
63 | entries.append({ | |
64 | 'id': video_id, | |
65 | 'url': media_url, | |
66 | 'title': title, | |
67 | }) | |
1cbd4106 | 68 | |
245023a8 | 69 | return self.playlist_result(entries, video_id) |