"OUTPUT TEMPLATE" for a list of available
keys) to match if the key is present, !key
to check if the key is not present,
- key>NUMBER (like "comment_count > 12", also
+ key>NUMBER (like "view_count > 12", also
works with >=, <, <=, !=, =) to compare
against a number, key = 'LITERAL' (like
"uploader = 'Mike Smith'", also works with
--no-write-playlist-metafiles Do not write playlist metadata when using
--write-info-json, --write-description etc.
--get-comments Retrieve video comments to be placed in the
- .info.json file
+ .info.json file. The comments are fetched
+ even without this option if the extraction
+ is known to be quick
--load-info-json FILE JSON file containing the video information
(created with the "--write-info-json"
option)
- `dislike_count` (numeric): Number of negative ratings of the video
- `repost_count` (numeric): Number of reposts of the video
- `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage
- - `comment_count` (numeric): Number of comments on the video
+ - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used)
- `age_limit` (numeric): Age restriction for the video (years)
- `is_live` (boolean): Whether this video is a live stream or a fixed-length video
- `was_live` (boolean): Whether this video was originally a live stream
self.to_stdout(formatSeconds(info_dict['duration']))
print_mandatory('format')
if self.params.get('forcejson', False):
+ self.post_extract(info_dict)
self.to_stdout(json.dumps(info_dict))
def process_info(self, info_dict):
if self._match_entry(info_dict, incomplete=False) is not None:
return
+ self.post_extract(info_dict)
self._num_downloads += 1
info_dict = self.pre_process(info_dict)
raise
else:
if self.params.get('dump_single_json', False):
+ self.post_extract(res)
self.to_stdout(json.dumps(res))
return self._download_retcode
del files_to_move[old_filename]
return files_to_move, infodict
+ @staticmethod
+ def post_extract(info_dict):
+ def actual_post_extract(info_dict):
+ if info_dict.get('_type') in ('playlist', 'multi_video'):
+ for video_dict in info_dict.get('entries', {}):
+ actual_post_extract(video_dict)
+ return
+
+ if '__post_extractor' not in info_dict:
+ return
+ post_extractor = info_dict['__post_extractor']
+ if post_extractor:
+ info_dict.update(post_extractor().items())
+ del info_dict['__post_extractor']
+ return
+
+ actual_post_extract(info_dict)
+
def pre_process(self, ie_info):
info = dict(ie_info)
for pp in self._pps['beforedl']:
info['uploader'] = self._html_search_meta(
'author', webpage, 'uploader', default=None)
- comments = None
- if self._downloader.params.get('getcomments', False):
- comments = self._get_all_comment_pages(video_id)
-
raw_danmaku = self._get_raw_danmaku(video_id, cid)
raw_tags = self._get_tags(video_id)
top_level_info = {
'raw_danmaku': raw_danmaku,
- 'comments': comments,
- 'comment_count': len(comments) if comments is not None else None,
'tags': tags,
'raw_tags': raw_tags,
}
+ if self._downloader.params.get('getcomments', False):
+ def get_comments():
+ comments = self._get_all_comment_pages(video_id)
+ return {
+ 'comments': comments,
+ 'comment_count': len(comments)
+ }
+
+ top_level_info['__post_extractor'] = get_comments
'''
# Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
players on other sites. Can be True (=always allowed),
False (=never allowed), None (=unknown), or a string
specifying the criteria for embedability (Eg: 'whitelist').
+ __post_extractor: A function to be called just before the metadata is
+ written to either disk, logger or console. The function
+ must return a dict which will be added to the info_dict.
+ This is usefull for additional information that is
+ time-consuming to extract. Note that the fields thus
+ extracted will not be available to output template and
+ match_filter. So, only "comments" and "comment_count" are
+ currently allowed to be extracted via this method.
The following fields should only be used when the video belongs to some logical
chapter or section:
# Get comments
# TODO: Refactor and move to seperate function
- if get_comments:
+ def extract_comments():
expected_video_comment_count = 0
video_comments = []
+ comment_xsrf = xsrf_token
def find_value(html, key, num_chars=2, separator='"'):
pos_begin = html.find(key) + len(key) + num_chars
self.to_screen('Downloading comments')
while continuations:
continuation = continuations.pop()
- comment_response = get_continuation(continuation, xsrf_token)
+ comment_response = get_continuation(continuation, comment_xsrf)
if not comment_response:
continue
if list(search_dict(comment_response, 'externalErrorMessage')):
continue
# not sure if this actually helps
if 'xsrf_token' in comment_response:
- xsrf_token = comment_response['xsrf_token']
+ comment_xsrf = comment_response['xsrf_token']
item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
if first_continuation:
while reply_continuations:
time.sleep(1)
continuation = reply_continuations.pop()
- replies_data = get_continuation(continuation, xsrf_token, True)
+ replies_data = get_continuation(continuation, comment_xsrf, True)
if not replies_data or 'continuationContents' not in replies_data[1]['response']:
continue
time.sleep(1)
self.to_screen('Total comments downloaded: %d of ~%d' % (len(video_comments), expected_video_comment_count))
- info.update({
+ return {
'comments': video_comments,
'comment_count': expected_video_comment_count
- })
+ }
+
+ if get_comments:
+ info['__post_extractor'] = extract_comments
self.mark_watched(video_id, player_response)
'Specify any key (see "OUTPUT TEMPLATE" for a list of available keys) to '
'match if the key is present, '
'!key to check if the key is not present, '
- 'key>NUMBER (like "comment_count > 12", also works with '
+ 'key>NUMBER (like "view_count > 12", also works with '
'>=, <, <=, !=, =) to compare against a number, '
'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
'to match against a string literal '
filesystem.add_option(
'--get-comments',
action='store_true', dest='getcomments', default=False,
- help='Retrieve video comments to be placed in the .info.json file')
+ help=(
+ 'Retrieve video comments to be placed in the .info.json file. '
+ 'The comments are fetched even without this option if the extraction is known to be quick'))
filesystem.add_option(
'--load-info-json', '--load-info',
dest='load_info_filename', metavar='FILE',