jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/downloader/youtube_live

Commit	Line	Data
	1	import json
	2	import time
	3
	4	from .fragment import FragmentFD
	5	from ..compat import compat_urllib_error
	6	from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE
	7	from ..utils import RegexNotFoundError, dict_get, int_or_none, try_get
	8
	9
	10	class YoutubeLiveChatFD(FragmentFD):
	11	""" Downloads YouTube live chats fragment by fragment """
	12
	13	FD_NAME = 'youtube_live_chat'
	14
	15	def real_download(self, filename, info_dict):
	16	video_id = info_dict['video_id']
	17	self.to_screen('[%s] Downloading live chat' % self.FD_NAME)
	18	if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':
	19	self.report_warning('Live chat download runs until the livestream ends. '
	20	'If you wish to download the video simultaneously, run a separate yt-dlp instance')
	21
	22	fragment_retries = self.params.get('fragment_retries', 0)
	23	test = self.params.get('test', False)
	24
	25	ctx = {
	26	'filename': filename,
	27	'live': True,
	28	'total_frags': None,
	29	}
	30
	31	ie = YT_BaseIE(self.ydl)
	32
	33	start_time = int(time.time() * 1000)
	34
	35	def dl_fragment(url, data=None, headers=None):
	36	http_headers = info_dict.get('http_headers', {})
	37	if headers:
	38	http_headers = http_headers.copy()
	39	http_headers.update(headers)
	40	return self._download_fragment(ctx, url, info_dict, http_headers, data)
	41
	42	def parse_actions_replay(live_chat_continuation):
	43	offset = continuation_id = click_tracking_params = None
	44	processed_fragment = bytearray()
	45	for action in live_chat_continuation.get('actions', []):
	46	if 'replayChatItemAction' in action:
	47	replay_chat_item_action = action['replayChatItemAction']
	48	offset = int(replay_chat_item_action['videoOffsetTimeMsec'])
	49	processed_fragment.extend(
	50	json.dumps(action, ensure_ascii=False).encode() + b'\n')
	51	if offset is not None:
	52	continuation = try_get(
	53	live_chat_continuation,
	54	lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)
	55	if continuation:
	56	continuation_id = continuation.get('continuation')
	57	click_tracking_params = continuation.get('clickTrackingParams')
	58	self._append_fragment(ctx, processed_fragment)
	59	return continuation_id, offset, click_tracking_params
	60
	61	def try_refresh_replay_beginning(live_chat_continuation):
	62	# choose the second option that contains the unfiltered live chat replay
	63	refresh_continuation = try_get(
	64	live_chat_continuation,
	65	lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)
	66	if refresh_continuation:
	67	# no data yet but required to call _append_fragment
	68	self._append_fragment(ctx, b'')
	69	refresh_continuation_id = refresh_continuation.get('continuation')
	70	offset = 0
	71	click_tracking_params = refresh_continuation.get('trackingParams')
	72	return refresh_continuation_id, offset, click_tracking_params
	73	return parse_actions_replay(live_chat_continuation)
	74
	75	live_offset = 0
	76
	77	def parse_actions_live(live_chat_continuation):
	78	nonlocal live_offset
	79	continuation_id = click_tracking_params = None
	80	processed_fragment = bytearray()
	81	for action in live_chat_continuation.get('actions', []):
	82	timestamp = self.parse_live_timestamp(action)
	83	if timestamp is not None:
	84	live_offset = timestamp - start_time
	85	# compatibility with replay format
	86	pseudo_action = {
	87	'replayChatItemAction': {'actions': [action]},
	88	'videoOffsetTimeMsec': str(live_offset),
	89	'isLive': True,
	90	}
	91	processed_fragment.extend(
	92	json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')
	93	continuation_data_getters = [
	94	lambda x: x['continuations'][0]['invalidationContinuationData'],
	95	lambda x: x['continuations'][0]['timedContinuationData'],
	96	]
	97	continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)
	98	if continuation_data:
	99	continuation_id = continuation_data.get('continuation')
	100	click_tracking_params = continuation_data.get('clickTrackingParams')
	101	timeout_ms = int_or_none(continuation_data.get('timeoutMs'))
	102	if timeout_ms is not None:
	103	time.sleep(timeout_ms / 1000)
	104	self._append_fragment(ctx, processed_fragment)
	105	return continuation_id, live_offset, click_tracking_params
	106
	107	def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):
	108	count = 0
	109	while count <= fragment_retries:
	110	try:
	111	success = dl_fragment(url, request_data, headers)
	112	if not success:
	113	return False, None, None, None
	114	raw_fragment = self._read_fragment(ctx)
	115	try:
	116	data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
	117	except RegexNotFoundError:
	118	data = None
	119	if not data:
	120	data = json.loads(raw_fragment)
	121	live_chat_continuation = try_get(
	122	data,
	123	lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}
	124	if info_dict['protocol'] == 'youtube_live_chat_replay':
	125	if frag_index == 1:
	126	continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)
	127	else:
	128	continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)
	129	elif info_dict['protocol'] == 'youtube_live_chat':
	130	continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)
	131	return True, continuation_id, offset, click_tracking_params
	132	except compat_urllib_error.HTTPError as err:
	133	count += 1
	134	if count <= fragment_retries:
	135	self.report_retry_fragment(err, frag_index, count, fragment_retries)
	136	if count > fragment_retries:
	137	self.report_error('giving up after %s fragment retries' % fragment_retries)
	138	return False, None, None, None
	139
	140	self._prepare_and_start_frag_download(ctx, info_dict)
	141
	142	success = dl_fragment(info_dict['url'])
	143	if not success:
	144	return False
	145	raw_fragment = self._read_fragment(ctx)
	146	try:
	147	data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))
	148	except RegexNotFoundError:
	149	return False
	150	continuation_id = try_get(
	151	data,
	152	lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])
	153	# no data yet but required to call _append_fragment
	154	self._append_fragment(ctx, b'')
	155
	156	ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))
	157
	158	if not ytcfg:
	159	return False
	160	api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])
	161	innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])
	162	if not api_key or not innertube_context:
	163	return False
	164	visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)
	165	if info_dict['protocol'] == 'youtube_live_chat_replay':
	166	url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key
	167	chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id
	168	elif info_dict['protocol'] == 'youtube_live_chat':
	169	url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key
	170	chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id
	171
	172	frag_index = offset = 0
	173	click_tracking_params = None
	174	while continuation_id is not None:
	175	frag_index += 1
	176	request_data = {
	177	'context': innertube_context,
	178	'continuation': continuation_id,
	179	}
	180	if frag_index > 1:
	181	request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}
	182	if click_tracking_params:
	183	request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}
	184	headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)
	185	headers.update({'content-type': 'application/json'})
	186	fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'
	187	success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
	188	url, frag_index, fragment_request_data, headers)
	189	else:
	190	success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(
	191	chat_page_url, frag_index)
	192	if not success:
	193	return False
	194	if test:
	195	break
	196
	197	self._finish_frag_download(ctx, info_dict)
	198	return True
	199
	200	@staticmethod
	201	def parse_live_timestamp(action):
	202	action_content = dict_get(
	203	action,
	204	['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])
	205	if not isinstance(action_content, dict):
	206	return None
	207	item = dict_get(action_content, ['item', 'bannerRenderer'])
	208	if not isinstance(item, dict):
	209	return None
	210	renderer = dict_get(item, [
	211	# text
	212	'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
	213	'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
	214	# ticker
	215	'liveChatTickerPaidMessageItemRenderer',
	216	'liveChatTickerSponsorItemRenderer',
	217	# banner
	218	'liveChatBannerRenderer',
	219	])
	220	if not isinstance(renderer, dict):
	221	return None
	222	parent_item_getters = [
	223	lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],
	224	lambda x: x['contents'],
	225	]
	226	parent_item = try_get(renderer, parent_item_getters, dict)
	227	if parent_item:
	228	renderer = dict_get(parent_item, [
	229	'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',
	230	'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',
	231	])
	232	if not isinstance(renderer, dict):
	233	return None
	234	return int_or_none(renderer.get('timestampUsec'), 1000)

1

import json

2

import time

3

4

from .fragment import FragmentFD

5

from ..compat import compat_urllib_error

6

from ..extractor.youtube import YoutubeBaseInfoExtractor as YT_BaseIE

7

from ..utils import RegexNotFoundError, dict_get, int_or_none, try_get

8

9

10

class YoutubeLiveChatFD(FragmentFD):

11

""" Downloads YouTube live chats fragment by fragment """

12

13

FD_NAME = 'youtube_live_chat'

14

15

def real_download(self, filename, info_dict):

16

video_id = info_dict['video_id']

17

self.to_screen('[%s] Downloading live chat' % self.FD_NAME)

18

if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat':

19

self.report_warning('Live chat download runs until the livestream ends. '

20

'If you wish to download the video simultaneously, run a separate yt-dlp instance')

21

22

fragment_retries = self.params.get('fragment_retries', 0)

23

test = self.params.get('test', False)

24

25

ctx = {

26

'filename': filename,

'live': True,

'total_frags': None,

}

ie = YT_BaseIE(self.ydl)

32

33

start_time = int(time.time() * 1000)

34

35

def dl_fragment(url, data=None, headers=None):

36

http_headers = info_dict.get('http_headers', {})

37

if headers:

38

http_headers = http_headers.copy()

39

http_headers.update(headers)

40

return self._download_fragment(ctx, url, info_dict, http_headers, data)

41

42

def parse_actions_replay(live_chat_continuation):

43

offset = continuation_id = click_tracking_params = None

44

processed_fragment = bytearray()

45

for action in live_chat_continuation.get('actions', []):

46

if 'replayChatItemAction' in action:

47

replay_chat_item_action = action['replayChatItemAction']

48

offset = int(replay_chat_item_action['videoOffsetTimeMsec'])

49

processed_fragment.extend(

50

json.dumps(action, ensure_ascii=False).encode() + b'\n')

51

if offset is not None:

52

continuation = try_get(

53

live_chat_continuation,

54

lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict)

55

if continuation:

56

continuation_id = continuation.get('continuation')

57

click_tracking_params = continuation.get('clickTrackingParams')

58

self._append_fragment(ctx, processed_fragment)

59

return continuation_id, offset, click_tracking_params

60

61

def try_refresh_replay_beginning(live_chat_continuation):

62

# choose the second option that contains the unfiltered live chat replay

63

refresh_continuation = try_get(

64

live_chat_continuation,

65

lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict)

66

if refresh_continuation:

67

# no data yet but required to call _append_fragment

68

self._append_fragment(ctx, b'')

69

refresh_continuation_id = refresh_continuation.get('continuation')

70

offset = 0

71

click_tracking_params = refresh_continuation.get('trackingParams')

72

return refresh_continuation_id, offset, click_tracking_params

73

return parse_actions_replay(live_chat_continuation)

live_offset = 0

def parse_actions_live(live_chat_continuation):

78

nonlocal live_offset

79

continuation_id = click_tracking_params = None

80

processed_fragment = bytearray()

81

for action in live_chat_continuation.get('actions', []):

82

timestamp = self.parse_live_timestamp(action)

83

if timestamp is not None:

84

live_offset = timestamp - start_time

85

# compatibility with replay format

86

pseudo_action = {

87

'replayChatItemAction': {'actions': [action]},

88

'videoOffsetTimeMsec': str(live_offset),

89

'isLive': True,

90

}

91

processed_fragment.extend(

92

json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n')

93

continuation_data_getters = [

94

lambda x: x['continuations'][0]['invalidationContinuationData'],

95

lambda x: x['continuations'][0]['timedContinuationData'],

96

]

97

continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict)

98

if continuation_data:

99

continuation_id = continuation_data.get('continuation')

100

click_tracking_params = continuation_data.get('clickTrackingParams')

101

timeout_ms = int_or_none(continuation_data.get('timeoutMs'))

102

if timeout_ms is not None:

103

time.sleep(timeout_ms / 1000)

104

self._append_fragment(ctx, processed_fragment)

105

return continuation_id, live_offset, click_tracking_params

106

107

def download_and_parse_fragment(url, frag_index, request_data=None, headers=None):

108

count = 0

109

while count <= fragment_retries:

110

try:

111

success = dl_fragment(url, request_data, headers)

112

if not success:

113

return False, None, None, None

114

raw_fragment = self._read_fragment(ctx)

115

try:

116

data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))

117

except RegexNotFoundError:

118

data = None

119

if not data:

120

data = json.loads(raw_fragment)

121

live_chat_continuation = try_get(

122

data,

123

lambda x: x['continuationContents']['liveChatContinuation'], dict) or {}

124

if info_dict['protocol'] == 'youtube_live_chat_replay':

125

if frag_index == 1:

126

continuation_id, offset, click_tracking_params = try_refresh_replay_beginning(live_chat_continuation)

127

else:

128

continuation_id, offset, click_tracking_params = parse_actions_replay(live_chat_continuation)

129

elif info_dict['protocol'] == 'youtube_live_chat':

130

continuation_id, offset, click_tracking_params = parse_actions_live(live_chat_continuation)

131

return True, continuation_id, offset, click_tracking_params

132

except compat_urllib_error.HTTPError as err:

133

count += 1

134

if count <= fragment_retries:

135

self.report_retry_fragment(err, frag_index, count, fragment_retries)

136

if count > fragment_retries:

137

self.report_error('giving up after %s fragment retries' % fragment_retries)

138

return False, None, None, None

139

140

self._prepare_and_start_frag_download(ctx, info_dict)

141

142

success = dl_fragment(info_dict['url'])

143

if not success:

144

return False

145

raw_fragment = self._read_fragment(ctx)

146

try:

147

data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace'))

148

except RegexNotFoundError:

149

return False

150

continuation_id = try_get(

151

data,

152

lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation'])

153

# no data yet but required to call _append_fragment

154

self._append_fragment(ctx, b'')

155

156

ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace'))

if not ytcfg:

return False

api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY'])

161

innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT'])

162

if not api_key or not innertube_context:

163

return False

164

visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str)

165

if info_dict['protocol'] == 'youtube_live_chat_replay':

166

url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key

167

chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id

168

elif info_dict['protocol'] == 'youtube_live_chat':

169

url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key

170

chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id

171

172

frag_index = offset = 0

173

click_tracking_params = None

174

while continuation_id is not None:

175

frag_index += 1

176

request_data = {

177

'context': innertube_context,

178

'continuation': continuation_id,

179

}

180

if frag_index > 1:

181

request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))}

182

if click_tracking_params:

183

request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params}

184

headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data)

185

headers.update({'content-type': 'application/json'})

186

fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n'

187

success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(

188

url, frag_index, fragment_request_data, headers)

189

else:

190

success, continuation_id, offset, click_tracking_params = download_and_parse_fragment(

191

chat_page_url, frag_index)

if not success:

return False

if test:

break

self._finish_frag_download(ctx, info_dict)

return True

@staticmethod

def parse_live_timestamp(action):

202

action_content = dict_get(

203

action,

204

['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand'])

205

if not isinstance(action_content, dict):

206

return None

207

item = dict_get(action_content, ['item', 'bannerRenderer'])

208

if not isinstance(item, dict):

209

return None

210

renderer = dict_get(item, [

211

# text

212

'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',

213

'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',

214

# ticker

215

'liveChatTickerPaidMessageItemRenderer',

216

'liveChatTickerSponsorItemRenderer',

217

# banner

218

'liveChatBannerRenderer',

219

])

220

if not isinstance(renderer, dict):

221

return None

222

parent_item_getters = [

223

lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'],

224

lambda x: x['contents'],

225

]

226

parent_item = try_get(renderer, parent_item_getters, dict)

227

if parent_item:

228

renderer = dict_get(parent_item, [

229

'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer',

230

'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer',

231

])

232

if not isinstance(renderer, dict):

233

return None

234

return int_or_none(renderer.get('timestampUsec'), 1000)