]>
Commit | Line | Data |
---|---|---|
49aeedb8 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
2e90dff2 | 6 | from .common import InfoExtractor |
49aeedb8 S |
7 | from ..utils import unified_strdate |
8 | ||
2e90dff2 | 9 | |
10 | class LibsynIE(InfoExtractor): | |
3b9b32f4 | 11 | _VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))' |
49aeedb8 | 12 | |
3b9b32f4 | 13 | _TESTS = [{ |
49aeedb8 S |
14 | 'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/', |
15 | 'md5': '443360ee1b58007bc3dcf09b41d093bb', | |
9ef4f12b | 16 | 'info_dict': { |
49aeedb8 S |
17 | 'id': '3377616', |
18 | 'ext': 'mp3', | |
19 | 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart", | |
20 | 'description': 'md5:601cb790edd05908957dae8aaa866465', | |
21 | 'upload_date': '20150220', | |
3b9b32f4 | 22 | 'thumbnail': 're:^https?://.*', |
9ef4f12b | 23 | }, |
3b9b32f4 PH |
24 | }, { |
25 | 'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/', | |
26 | 'md5': '6c5cb21acd622d754d3b1a92b582ce42', | |
27 | 'info_dict': { | |
28 | 'id': '3727166', | |
29 | 'ext': 'mp3', | |
30 | 'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career', | |
31 | 'upload_date': '20150818', | |
32 | 'thumbnail': 're:^https?://.*', | |
33 | } | |
34 | }] | |
2e90dff2 | 35 | |
36 | def _real_extract(self, url): | |
3b9b32f4 PH |
37 | m = re.match(self._VALID_URL, url) |
38 | video_id = m.group('id') | |
39 | url = m.group('mainurl') | |
49aeedb8 S |
40 | webpage = self._download_webpage(url, video_id) |
41 | ||
42 | formats = [{ | |
43 | 'url': media_url, | |
44 | } for media_url in set(re.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage))] | |
45 | ||
46 | podcast_title = self._search_regex( | |
3b9b32f4 | 47 | r'<h2>([^<]+)</h2>', webpage, 'podcast title', default=None) |
49aeedb8 | 48 | episode_title = self._search_regex( |
3b9b32f4 | 49 | r'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage, 'episode title') |
49aeedb8 | 50 | |
336d1904 | 51 | title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title |
49aeedb8 S |
52 | |
53 | description = self._html_search_regex( | |
54 | r'<div id="info_text_body">(.+?)</div>', webpage, | |
3b9b32f4 | 55 | 'description', default=None) |
49aeedb8 S |
56 | thumbnail = self._search_regex( |
57 | r'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"', | |
58 | webpage, 'thumbnail', fatal=False) | |
49aeedb8 S |
59 | release_date = unified_strdate(self._search_regex( |
60 | r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False)) | |
2e90dff2 | 61 | |
62 | return { | |
49aeedb8 S |
63 | 'id': video_id, |
64 | 'title': title, | |
65 | 'description': description, | |
66 | 'thumbnail': thumbnail, | |
67 | 'upload_date': release_date, | |
2e90dff2 | 68 | 'formats': formats, |
69 | } |