]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/googlepodcasts.py
[extractor] Common function `_match_valid_url`
[yt-dlp.git] / yt_dlp / extractor / googlepodcasts.py
CommitLineData
00dd0cd5 1# coding: utf-8
2from __future__ import unicode_literals
3
4import json
00dd0cd5 5
6from .common import InfoExtractor
7from ..utils import (
8 clean_podcast_url,
9 int_or_none,
10 try_get,
11 urlencode_postdata,
12)
13
14
15class GooglePodcastsBaseIE(InfoExtractor):
16 _VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
17
18 def _batch_execute(self, func_id, video_id, params):
19 return json.loads(self._download_json(
20 'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
21 video_id, data=urlencode_postdata({
22 'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
23 }), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
24
25 def _extract_episode(self, episode):
26 return {
27 'id': episode[4][3],
28 'title': episode[8],
29 'url': clean_podcast_url(episode[13]),
30 'thumbnail': episode[2],
31 'description': episode[9],
32 'creator': try_get(episode, lambda x: x[14]),
33 'timestamp': int_or_none(episode[11]),
34 'duration': int_or_none(episode[12]),
35 'series': episode[1],
36 }
37
38
39class GooglePodcastsIE(GooglePodcastsBaseIE):
40 IE_NAME = 'google:podcasts'
41 _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
42 _TEST = {
43 'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
44 'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
45 'info_dict': {
46 'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
47 'ext': 'mp3',
48 'title': 'WWDTM New Year 2021',
49 'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
50 'upload_date': '20210102',
51 'timestamp': 1609606800,
52 'duration': 2901,
53 'series': "Wait Wait... Don't Tell Me!",
54 }
55 }
56
57 def _real_extract(self, url):
5ad28e7f 58 b64_feed_url, b64_guid = self._match_valid_url(url).groups()
00dd0cd5 59 episode = self._batch_execute(
60 'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
61 return self._extract_episode(episode)
62
63
64class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
65 IE_NAME = 'google:podcasts:feed'
66 _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
67 _TEST = {
68 'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
69 'info_dict': {
70 'title': "Wait Wait... Don't Tell Me!",
71 'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
72 },
73 'playlist_mincount': 20,
74 }
75
76 def _real_extract(self, url):
77 b64_feed_url = self._match_id(url)
78 data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
79
80 entries = []
81 for episode in (try_get(data, lambda x: x[1][0]) or []):
82 entries.append(self._extract_episode(episode))
83
84 feed = try_get(data, lambda x: x[3]) or []
85 return self.playlist_result(
86 entries, playlist_title=try_get(feed, lambda x: x[0]),
87 playlist_description=try_get(feed, lambda x: x[2]))