[yt-dlp.git] / yt_dlp / extractor / googlepodcasts.py

# coding: utf-8
from __future__ import unicode_literals

import json

from .common import InfoExtractor
from ..utils import (
    clean_podcast_url,
    int_or_none,
    try_get,
    urlencode_postdata,
)


class GooglePodcastsBaseIE(InfoExtractor):
    _VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'

    def _batch_execute(self, func_id, video_id, params):
        return json.loads(self._download_json(
            'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
            video_id, data=urlencode_postdata({
                'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
            }), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])

    def _extract_episode(self, episode):
        return {
            'id': episode[4][3],
            'title': episode[8],
            'url': clean_podcast_url(episode[13]),
            'thumbnail': episode[2],
            'description': episode[9],
            'creator': try_get(episode, lambda x: x[14]),
            'timestamp': int_or_none(episode[11]),
            'duration': int_or_none(episode[12]),
            'series': episode[1],
        }


class GooglePodcastsIE(GooglePodcastsBaseIE):
    IE_NAME = 'google:podcasts'
    _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
    _TEST = {
        'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
        'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
        'info_dict': {
            'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
            'ext': 'mp3',
            'title': 'WWDTM New Year 2021',
            'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
            'upload_date': '20210102',
            'timestamp': 1609606800,
            'duration': 2901,
            'series': "Wait Wait... Don't Tell Me!",
        }
    }

    def _real_extract(self, url):
        b64_feed_url, b64_guid = self._match_valid_url(url).groups()
        episode = self._batch_execute(
            'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
        return self._extract_episode(episode)


class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
    IE_NAME = 'google:podcasts:feed'
    _VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]|$)'
    _TEST = {
        'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
        'info_dict': {
            'title': "Wait Wait... Don't Tell Me!",
            'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
        },
        'playlist_mincount': 20,
    }

    def _real_extract(self, url):
        b64_feed_url = self._match_id(url)
        data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])

        entries = []
        for episode in (try_get(data, lambda x: x[1][0]) or []):
            entries.append(self._extract_episode(episode))

        feed = try_get(data, lambda x: x[3]) or []
        return self.playlist_result(
            entries, playlist_title=try_get(feed, lambda x: x[0]),
            playlist_description=try_get(feed, lambda x: x[2]))
Commit	Line	Data
00dd0cd5	1	# coding: utf-8
	2	from __future__ import unicode_literals
	3
	4	import json
00dd0cd5	5
	6	from .common import InfoExtractor
	7	from ..utils import (
	8	clean_podcast_url,
	9	int_or_none,
	10	try_get,
	11	urlencode_postdata,
	12	)
	13
	14
	15	class GooglePodcastsBaseIE(InfoExtractor):
	16	_VALID_URL_BASE = r'https?://podcasts\.google\.com/feed/'
	17
	18	def _batch_execute(self, func_id, video_id, params):
	19	return json.loads(self._download_json(
	20	'https://podcasts.google.com/_/PodcastsUi/data/batchexecute',
	21	video_id, data=urlencode_postdata({
	22	'f.req': json.dumps([[[func_id, json.dumps(params), None, '1']]]),
	23	}), transform_source=lambda x: self._search_regex(r'(?s)(\[.+\])', x, 'data'))[0][2])
	24
	25	def _extract_episode(self, episode):
	26	return {
	27	'id': episode[4][3],
	28	'title': episode[8],
	29	'url': clean_podcast_url(episode[13]),
	30	'thumbnail': episode[2],
	31	'description': episode[9],
	32	'creator': try_get(episode, lambda x: x[14]),
	33	'timestamp': int_or_none(episode[11]),
	34	'duration': int_or_none(episode[12]),
	35	'series': episode[1],
	36	}
	37
	38
	39	class GooglePodcastsIE(GooglePodcastsBaseIE):
	40	IE_NAME = 'google:podcasts'
	41	_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<feed_url>[^/]+)/episode/(?P<id>[^/?&#]+)'
	42	_TEST = {
	43	'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA/episode/MzBlNWRlN2UtOWE4Yy00ODcwLTk2M2MtM2JlMmUyNmViOTRh',
	44	'md5': 'fa56b2ee8bd0703e27e42d4b104c4766',
	45	'info_dict': {
	46	'id': '30e5de7e-9a8c-4870-963c-3be2e26eb94a',
	47	'ext': 'mp3',
	48	'title': 'WWDTM New Year 2021',
	49	'description': 'We say goodbye to 2020 with Christine Baranksi, Doug Jones, Jonna Mendez, and Kellee Edwards.',
	50	'upload_date': '20210102',
	51	'timestamp': 1609606800,
	52	'duration': 2901,
	53	'series': "Wait Wait... Don't Tell Me!",
	54	}
	55	}
	56
	57	def _real_extract(self, url):
5ad28e7f	58	b64_feed_url, b64_guid = self._match_valid_url(url).groups()
00dd0cd5	59	episode = self._batch_execute(
	60	'oNjqVe', b64_guid, [b64_feed_url, b64_guid])[1]
	61	return self._extract_episode(episode)
	62
	63
	64	class GooglePodcastsFeedIE(GooglePodcastsBaseIE):
	65	IE_NAME = 'google:podcasts:feed'
	66	_VALID_URL = GooglePodcastsBaseIE._VALID_URL_BASE + r'(?P<id>[^/?&#]+)/?(?:[?#&]\|$)'
	67	_TEST = {
	68	'url': 'https://podcasts.google.com/feed/aHR0cHM6Ly9mZWVkcy5ucHIub3JnLzM0NDA5ODUzOS9wb2RjYXN0LnhtbA',
	69	'info_dict': {
	70	'title': "Wait Wait... Don't Tell Me!",
	71	'description': "NPR's weekly current events quiz. Have a laugh and test your news knowledge while figuring out what's real and what we've made up.",
	72	},
	73	'playlist_mincount': 20,
	74	}
	75
	76	def _real_extract(self, url):
	77	b64_feed_url = self._match_id(url)
	78	data = self._batch_execute('ncqJEe', b64_feed_url, [b64_feed_url])
	79
	80	entries = []
	81	for episode in (try_get(data, lambda x: x[1][0]) or []):
	82	entries.append(self._extract_episode(episode))
	83
	84	feed = try_get(data, lambda x: x[3]) or []
	85	return self.playlist_result(
	86	entries, playlist_title=try_get(feed, lambda x: x[0]),
	87	playlist_description=try_get(feed, lambda x: x[2]))