]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/stitcher.py
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
15 class StitcherIE(InfoExtractor
):
16 _VALID_URL
= r
'https?://(?:www\.)?stitcher\.com/(?:podcast|show)/(?:[^/]+/)+e(?:pisode)?/(?:(?P<display_id>[^/#?&]+?)-)?(?P<id>\d+)(?:[/#?&]|$)'
18 'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true',
19 'md5': 'e9635098e0da10b21a0e2b85585530f6',
23 'title': 'Machine Learning Mastery and Cancer Clusters',
24 'description': 'md5:547adb4081864be114ae3831b4c2b42f',
26 'thumbnail': r
're:^https?://.*\.jpg',
27 'upload_date': '20180126',
28 'timestamp': 1516989316,
31 'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true',
34 'display_id': 'the-rare-hourlong-comedy-plus',
36 'title': "The CW's 'Crazy Ex-Girlfriend'",
37 'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17',
39 'thumbnail': r
're:^https?://.*\.jpg',
42 'skip_download': True,
44 'skip': 'Page Not Found',
47 'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true',
48 'only_matching': True,
50 'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true',
51 'only_matching': True,
53 'url': 'https://www.stitcher.com/show/threedom/episode/circles-on-a-stick-200212584',
54 'only_matching': True,
57 def _real_extract(self
, url
):
58 display_id
, audio_id
= re
.match(self
._VALID
_URL
, url
).groups()
60 resp
= self
._download
_json
(
61 'https://api.prod.stitcher.com/episode/' + audio_id
,
62 display_id
or audio_id
)
63 episode
= try_get(resp
, lambda x
: x
['data']['episodes'][0], dict)
65 raise ExtractorError(resp
['errors'][0]['message'], expected
=True)
67 title
= episode
['title'].strip()
68 audio_url
= episode
['audio_url']
71 show_id
= episode
.get('show_id')
72 if show_id
and episode
.get('classic_id') != -1:
73 thumbnail
= 'https://stitcher-classic.imgix.net/feedimages/%s.jpg' % show_id
77 'display_id': display_id
,
79 'description': clean_html(episode
.get('html_description') or episode
.get('description')),
80 'duration': int_or_none(episode
.get('duration')),
81 'thumbnail': thumbnail
,
84 'timestamp': int_or_none(episode
.get('date_created')),
85 'season_number': int_or_none(episode
.get('season')),
86 'season_id': str_or_none(episode
.get('season_id')),