]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/callin.py
2 from .common
import InfoExtractor
10 class CallinIE(InfoExtractor
):
11 _VALID_URL
= r
'https?://(?:www\.)?callin\.com/(episode)/(?P<id>[-a-zA-Z]+)'
13 'url': 'https://www.callin.com/episode/the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
15 'id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd',
16 'title': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
18 'display_id': 'the-title-ix-regime-and-the-long-march-through-EBfXYSrsjc',
19 'thumbnail': 're:https://.+\\.png',
20 'description': 'First episode',
21 'uploader': 'Wesley Yang',
22 'timestamp': 1639404128.65,
23 'upload_date': '20211213',
24 'uploader_id': 'wesyang',
25 'uploader_url': 'http://wesleyyang.substack.com',
26 'channel': 'Conversations in Year Zero',
27 'channel_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
28 'channel_url': 'https://callin.com/show/conversations-in-year-zero-oJNllRFSfx',
31 'categories': ['News & Politics', 'History', 'Technology'],
32 'cast': ['Wesley Yang', 'KC Johnson', 'Gabi Abramovich'],
33 'series': 'Conversations in Year Zero',
34 'series_id': '436d1f82ddeb30cd2306ea9156044d8d2cfdc3f1f1552d245117a42173e78553',
35 'episode': 'The Title IX Regime and the Long March Through and Beyond the Institutions',
37 'episode_id': '218b979630a35ead12c6fd096f2996c56c37e4d0dc1f6dc0feada32dcf7b31cd'
41 def try_get_user_name(self
, d
):
42 names
= [d
.get(n
) for n
in ('first', 'last')]
44 return next((n
for n
in names
if n
), default
=None)
45 return ' '.join(names
)
47 def _real_extract(self
, url
):
48 display_id
= self
._match
_id
(url
)
49 webpage
= self
._download
_webpage
(url
, display_id
)
51 next_data
= self
._search
_nextjs
_data
(webpage
, display_id
)
52 episode
= next_data
['props']['pageProps']['episode']
55 title
= (episode
.get('title')
56 or self
._og
_search
_title
(webpage
, fatal
=False)
57 or self
._html
_search
_regex
('<title>(.*?)</title>', webpage
, 'title'))
59 formats
= self
._extract
_m
3u8_formats
(url
, display_id
, ext
='ts')
60 self
._sort
_formats
(formats
)
62 show
= traverse_obj(episode
, ('show', 'title'))
63 show_id
= traverse_obj(episode
, ('show', 'id'))
66 app_slug
= (self
._html
_search
_regex
(
67 '<script\\s+src=["\']/_next/static/([-_a-zA-Z0-9]+)/_',
68 webpage
, 'app slug', fatal
=False) or next_data
.get('buildId'))
69 show_slug
= traverse_obj(episode
, ('show', 'linkObj', 'resourceUrl'))
70 if app_slug
and show_slug
and '/' in show_slug
:
71 show_slug
= show_slug
.rsplit('/', 1)[1]
72 show_json_url
= f
'https://www.callin.com/_next/data/{app_slug}/show/{show_slug}.json'
73 show_json
= self
._download
_json
(show_json_url
, display_id
, fatal
=False)
75 host
= (traverse_obj(show_json
, ('pageProps', 'show', 'hosts', 0))
76 or traverse_obj(episode
, ('speakers', 0)))
78 host_nick
= traverse_obj(host
, ('linkObj', 'resourceUrl'))
79 host_nick
= host_nick
.rsplit('/', 1)[1] if (host_nick
and '/' in host_nick
) else None
81 cast
= list(filter(None, [
82 self
.try_get_user_name(u
) for u
in
83 traverse_obj(episode
, (('speakers', 'callerTags'), ...)) or []
86 episode_list
= traverse_obj(show_json
, ('pageProps', 'show', 'episodes')) or []
87 episode_number
= next(
88 (len(episode_list
) - i
for (i
, e
) in enumerate(episode_list
) if e
.get('id') == id),
93 'display_id': display_id
,
96 'thumbnail': traverse_obj(episode
, ('show', 'photo')),
97 'description': episode
.get('description'),
98 'uploader': self
.try_get_user_name(host
) if host
else None,
99 'timestamp': episode
.get('publishedAt'),
100 'uploader_id': host_nick
,
101 'uploader_url': traverse_obj(show_json
, ('pageProps', 'show', 'url')),
103 'channel_id': show_id
,
104 'channel_url': traverse_obj(episode
, ('show', 'linkObj', 'resourceUrl')),
105 'duration': float_or_none(episode
.get('runtime')),
106 'view_count': int_or_none(episode
.get('plays')),
107 'categories': traverse_obj(episode
, ('show', 'categorizations', ..., 'name')),
108 'cast': cast
if cast
else None,
110 'series_id': show_id
,
112 'episode_number': episode_number
,