yt_dlp/extractor/thisamericanlife.py

   1 from .common import InfoExtractor
   2
   3
   4 class ThisAmericanLifeIE(InfoExtractor):
   5     _VALID_URL = r'https?://(?:www\.)?thisamericanlife\.org/(?:radio-archives/episode/|play_full\.php\?play=)(?P<id>\d+)'
   6     _TESTS = [{
   7         'url': 'http://www.thisamericanlife.org/radio-archives/episode/487/harper-high-school-part-one',
   8         'md5': '8f7d2da8926298fdfca2ee37764c11ce',
   9         'info_dict': {
  10             'id': '487',
  11             'ext': 'm4a',
  12             'title': '487: Harper High School, Part One',
  13             'description': 'md5:ee40bdf3fb96174a9027f76dbecea655',
  14             'thumbnail': r're:^https?://.*\.jpg$',
  15         },
  16     }, {
  17         'url': 'http://www.thisamericanlife.org/play_full.php?play=487',
  18         'only_matching': True,
  19     }]
  20
  21     def _real_extract(self, url):
  22         video_id = self._match_id(url)
  23
  24         webpage = self._download_webpage(
  25             f'http://www.thisamericanlife.org/radio-archives/episode/{video_id}', video_id)
  26
  27         return {
  28             'id': video_id,
  29             'url': f'http://stream.thisamericanlife.org/{video_id}/stream/{video_id}_64k.m3u8',
  30             'protocol': 'm3u8_native',
  31             'ext': 'm4a',
  32             'acodec': 'aac',
  33             'vcodec': 'none',
  34             'abr': 64,
  35             'title': self._html_search_meta(r'twitter:title', webpage, 'title', fatal=True),
  36             'description': self._html_search_meta(r'description', webpage, 'description'),
  37             'thumbnail': self._og_search_thumbnail(webpage),
  38         }