]>
Commit | Line | Data |
---|---|---|
49aeedb8 S |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import re | |
5 | ||
2e90dff2 | 6 | from .common import InfoExtractor |
3395958d | 7 | from ..utils import ( |
0a5baf9c RA |
8 | clean_html, |
9 | get_element_by_class, | |
3395958d | 10 | parse_duration, |
0a5baf9c | 11 | strip_or_none, |
3395958d PH |
12 | unified_strdate, |
13 | ) | |
49aeedb8 | 14 | |
2e90dff2 | 15 | |
16 | class LibsynIE(InfoExtractor): | |
3b9b32f4 | 17 | _VALID_URL = r'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))' |
49aeedb8 | 18 | |
3b9b32f4 | 19 | _TESTS = [{ |
3395958d PH |
20 | 'url': 'http://html5-player.libsyn.com/embed/episode/id/6385796/', |
21 | 'md5': '2a55e75496c790cdeb058e7e6c087746', | |
9ef4f12b | 22 | 'info_dict': { |
3395958d | 23 | 'id': '6385796', |
49aeedb8 | 24 | 'ext': 'mp3', |
3395958d | 25 | 'title': "Champion Minded - Developing a Growth Mindset", |
0a5baf9c RA |
26 | # description fetched using another request: |
27 | # http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796 | |
28 | # 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', | |
3395958d | 29 | 'upload_date': '20180320', |
3b9b32f4 | 30 | 'thumbnail': 're:^https?://.*', |
9ef4f12b | 31 | }, |
3b9b32f4 PH |
32 | }, { |
33 | 'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/', | |
34 | 'md5': '6c5cb21acd622d754d3b1a92b582ce42', | |
35 | 'info_dict': { | |
36 | 'id': '3727166', | |
37 | 'ext': 'mp3', | |
38 | 'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career', | |
39 | 'upload_date': '20150818', | |
40 | 'thumbnail': 're:^https?://.*', | |
41 | } | |
42 | }] | |
2e90dff2 | 43 | |
44 | def _real_extract(self, url): | |
0a5baf9c | 45 | url, video_id = re.match(self._VALID_URL, url).groups() |
49aeedb8 S |
46 | webpage = self._download_webpage(url, video_id) |
47 | ||
0a5baf9c RA |
48 | data = self._parse_json(self._search_regex( |
49 | r'var\s+playlistItem\s*=\s*({.+?});', | |
50 | webpage, 'JSON data block'), video_id) | |
51 | ||
52 | episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage) | |
53 | if not episode_title: | |
54 | self._search_regex( | |
55 | [r'data-title="([^"]+)"', r'<title>(.+?)</title>'], | |
56 | webpage, 'episode title') | |
57 | episode_title = episode_title.strip() | |
58 | ||
59 | podcast_title = strip_or_none(clean_html(self._search_regex( | |
60 | r'<h3>([^<]+)</h3>', webpage, 'podcast title', | |
61 | default=None) or get_element_by_class('podcast-title', webpage))) | |
49aeedb8 | 62 | |
336d1904 | 63 | title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title |
49aeedb8 | 64 | |
0a5baf9c RA |
65 | formats = [] |
66 | for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')): | |
67 | f_url = data.get(k) | |
68 | if not f_url: | |
69 | continue | |
70 | formats.append({ | |
71 | 'url': f_url, | |
72 | 'format_id': format_id, | |
73 | }) | |
74 | ||
49aeedb8 | 75 | description = self._html_search_regex( |
3395958d | 76 | r'<p\s+id="info_text_body">(.+?)</p>', webpage, |
3b9b32f4 | 77 | 'description', default=None) |
3395958d PH |
78 | if description: |
79 | # Strip non-breaking and normal spaces | |
80 | description = description.replace('\u00A0', ' ').strip() | |
49aeedb8 | 81 | release_date = unified_strdate(self._search_regex( |
0a5baf9c RA |
82 | r'<div class="release_date">Released: ([^<]+)<', |
83 | webpage, 'release date', default=None) or data.get('release_date')) | |
3395958d | 84 | |
2e90dff2 | 85 | return { |
49aeedb8 S |
86 | 'id': video_id, |
87 | 'title': title, | |
88 | 'description': description, | |
0a5baf9c | 89 | 'thumbnail': data.get('thumbnail_url'), |
49aeedb8 | 90 | 'upload_date': release_date, |
0a5baf9c | 91 | 'duration': parse_duration(data.get('duration')), |
2e90dff2 | 92 | 'formats': formats, |
93 | } |