]>
Commit | Line | Data |
---|---|---|
49aeedb8 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
3395958d | 4 | import json |
49aeedb8 S |
5 | import re |
6 | ||
2e90dff2 | 7 | from .common import InfoExtractor |
3395958d PH |
8 | from ..utils import ( |
9 | parse_duration, | |
10 | unified_strdate, | |
11 | ) | |
49aeedb8 | 12 | |
2e90dff2 | 13 | |
14 | class LibsynIE(InfoExtractor): | |
3b9b32f4 | 15 | _VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))' |
49aeedb8 | 16 | |
3b9b32f4 | 17 | _TESTS = [{ |
3395958d PH |
18 | 'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/', |
19 | 'md5': '2a55e75496c790cdeb058e7e6c087746', | |
9ef4f12b | 20 | 'info_dict': { |
3395958d | 21 | 'id': '6385796', |
49aeedb8 | 22 | 'ext': 'mp3', |
3395958d PH |
23 | 'title': "Champion Minded - Developing a Growth Mindset", |
24 | 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', | |
25 | 'upload_date': '20180320', | |
3b9b32f4 | 26 | 'thumbnail': 're:^https?://.*', |
9ef4f12b | 27 | }, |
3b9b32f4 PH |
28 | }, { |
29 | 'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/', | |
30 | 'md5': '6c5cb21acd622d754d3b1a92b582ce42', | |
31 | 'info_dict': { | |
32 | 'id': '3727166', | |
33 | 'ext': 'mp3', | |
34 | 'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career', | |
35 | 'upload_date': '20150818', | |
36 | 'thumbnail': 're:^https?://.*', | |
37 | } | |
38 | }] | |
2e90dff2 | 39 | |
40 | def _real_extract(self, url): | |
3b9b32f4 PH |
41 | m = re.match(self._VALID_URL, url) |
42 | video_id = m.group('id') | |
43 | url = m.group('mainurl') | |
49aeedb8 S |
44 | webpage = self._download_webpage(url, video_id) |
45 | ||
49aeedb8 | 46 | podcast_title = self._search_regex( |
3395958d PH |
47 | r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None) |
48 | if podcast_title: | |
49 | podcast_title = podcast_title.strip() | |
49aeedb8 | 50 | episode_title = self._search_regex( |
3395958d PH |
51 | r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title') |
52 | if episode_title: | |
53 | episode_title = episode_title.strip() | |
49aeedb8 | 54 | |
336d1904 | 55 | title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title |
49aeedb8 S |
56 | |
57 | description = self._html_search_regex( | |
3395958d | 58 | r'<p\s+id="info_text_body">(.+?)</p>', webpage, |
3b9b32f4 | 59 | 'description', default=None) |
3395958d PH |
60 | if description: |
61 | # Strip non-breaking and normal spaces | |
62 | description = description.replace('\u00A0', ' ').strip() | |
49aeedb8 S |
63 | release_date = unified_strdate(self._search_regex( |
64 | r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False)) | |
2e90dff2 | 65 | |
3395958d PH |
66 | data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block') |
67 | data = json.loads(data_json) | |
68 | ||
69 | formats = [{ | |
70 | 'url': data['media_url'], | |
71 | 'format_id': 'main', | |
72 | }, { | |
73 | 'url': data['media_url_libsyn'], | |
74 | 'format_id': 'libsyn', | |
75 | }] | |
76 | thumbnail = data.get('thumbnail_url') | |
77 | duration = parse_duration(data.get('duration')) | |
78 | ||
2e90dff2 | 79 | return { |
49aeedb8 S |
80 | 'id': video_id, |
81 | 'title': title, | |
82 | 'description': description, | |
83 | 'thumbnail': thumbnail, | |
84 | 'upload_date': release_date, | |
3395958d | 85 | 'duration': duration, |
2e90dff2 | 86 | 'formats': formats, |
87 | } |