--write-annotations Write video annotations to a
.annotations.xml file
--no-write-annotations Do not write video annotations (default)
+ --get-comments Retrieve video comments to be placed in the
+ .info.json file
--load-info-json FILE JSON file containing the video information
(created with the "--write-info-json"
option)
--remux-video FORMAT Remux the video into another container if
necessary (currently supported: mp4|mkv).
If target container does not support the
- video/audio codec, remuxing will fail
+ video/audio codec, remuxing will fail. You
+ can specify multiple rules; eg.
+ "aac>m4a/mov>mp4/mkv" will remux aac to
+ m4a, mov to mp4 and anything else to mkv.
--recode-video FORMAT Re-encode the video into another format if
re-encoding is necessary (currently
supported: mp4|flv|ogg|webm|mkv|avi)
logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
+ writecomments: Extract video comments. This will not be written to disk
+ unless writeinfojson is also given
writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file
write_all_thumbnails: Write all thumbnail formats to files
self.to_screen("[%s] %s: has already been recorded in archive" % (
ie_key, temp_id))
break
-
return self.__extract_info(url, ie, download, extra_info, process, info_dict)
-
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
playlist = ie_result.get('title') or ie_result.get('id')
self.to_screen('[download] Downloading playlist: %s' % playlist)
+ if self.params.get('writeinfojson', False):
+ infofn = replace_extension(
+ self.prepare_filepath(self.prepare_filename(ie_result), 'infojson'),
+ 'info.json', ie_result.get('ext'))
+ if self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
+ self.to_screen('[info] Playlist description metadata is already present')
+ else:
+ self.to_screen('[info] Writing description playlist metadata as JSON to: ' + infofn)
+ playlist_info = dict(ie_result)
+ playlist_info.pop('entries')
+ try:
+ write_json_file(self.filter_requested_info(playlist_info), infofn)
+ except (OSError, IOError):
+ self.report_error('Cannot write playlist description metadata to JSON file ' + infofn)
+
playlist_results = []
playliststart = self.params.get('playliststart', 1) - 1
except (OSError, IOError):
self.report_error('Cannot write metadata to JSON file ' + infofn)
return
+ info_dict['__infojson_filepath'] = infofn
thumbdir = os.path.dirname(self.prepare_filepath(filename, 'thumbnail'))
for thumbfn in self._write_thumbnails(info_dict, temp_filename):
'updatetime': opts.updatetime,
'writedescription': opts.writedescription,
'writeannotations': opts.writeannotations,
- 'writeinfojson': opts.writeinfojson,
+ 'writeinfojson': opts.writeinfojson or opts.getcomments,
+ 'getcomments': opts.getcomments,
'writethumbnail': opts.writethumbnail,
'write_all_thumbnails': opts.write_all_thumbnails,
'writelink': opts.writelink,
import time
from .common import InfoExtractor
+from .anvato_token_generator import NFLTokenGenerator
from ..aes import aes_encrypt
from ..compat import compat_str
from ..utils import (
'telemundo': 'anvato_mcp_telemundo_web_prod_c5278d51ad46fda4b6ca3d0ea44a7846a054f582'
}
+ _TOKEN_GENERATORS = {
+ 'GXvEgwyJeWem8KCYXfeoHWknwP48Mboj': NFLTokenGenerator,
+ }
+
_API_KEY = '3hwbSuqqT690uxjNYBktSQpa5ZrpYYR0Iofx7NcJHyA'
_ANVP_RE = r'<script[^>]+\bdata-anvp\s*=\s*(["\'])(?P<anvp>(?:(?!\1).)+)\1'
'anvrid': anvrid,
'anvts': server_time,
}
- api['anvstk'] = md5_text('%s|%s|%d|%s' % (
- access_key, anvrid, server_time,
- self._ANVACK_TABLE.get(access_key, self._API_KEY)))
+ if access_key in self._TOKEN_GENERATORS:
+ api['anvstk2'] = self._TOKEN_GENERATORS[access_key].generate(self, access_key, video_id)
+ else:
+ api['anvstk'] = md5_text('%s|%s|%d|%s' % (
+ access_key, anvrid, server_time,
+ self._ANVACK_TABLE.get(access_key, self._API_KEY)))
return self._download_json(
video_data_url, video_id, transform_source=strip_jsonp,
from __future__ import unicode_literals
import hashlib
+import json
import re
-from .common import InfoExtractor
+from .common import InfoExtractor, SearchInfoExtractor
from ..compat import (
compat_parse_qs,
compat_urlparse,
(?:
video/[aA][vV]|
anime/(?P<anime_id>\d+)/play\#
- )(?P<id_bv>\d+)|
- video/[bB][vV](?P<id>[^/?#&]+)
+ )(?P<id>\d+)|
+ video/[bB][vV](?P<id_bv>[^/?#&]+)
)
+ (?:/?\?p=(?P<page>\d+))?
'''
_TESTS = [{
- 'url': 'http://www.bilibili.tv/video/av1074402/',
+ 'url': 'http://www.bilibili.com/video/av1074402/',
'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
'info_dict': {
'id': '1074402',
# Tested in BiliBiliBangumiIE
'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
'only_matching': True,
+ }, {
+ # bilibili.tv
+ 'url': 'http://www.bilibili.tv/video/av1074402/',
+ 'only_matching': True,
}, {
'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
'md5': '3f721ad1e75030cc06faf73587cfec57',
url, smuggled_data = unsmuggle_url(url, {})
mobj = re.match(self._VALID_URL, url)
- video_id = mobj.group('id') or mobj.group('id_bv')
+ video_id = mobj.group('id_bv') or mobj.group('id')
+
+ av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None)
+ video_id = av_id
+
anime_id = mobj.group('anime_id')
+ page_id = mobj.group('page')
webpage = self._download_webpage(url, video_id)
if 'anime/' not in url:
cid = self._search_regex(
+ r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid',
+ default=None
+ ) or self._search_regex(
r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
default=None
) or compat_parse_qs(self._search_regex(
break
title = self._html_search_regex(
- ('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
- '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
- group='title')
+ (r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
+ r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
+ group='title') + ('_p' + str(page_id) if page_id is not None else '')
description = self._html_search_meta('description', webpage)
timestamp = unified_timestamp(self._html_search_regex(
r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
# TODO 'view_count' requires deobfuscating Javascript
info = {
- 'id': video_id,
+ 'id': str(video_id) if page_id is None else '%s_p%s' % (video_id, page_id),
+ 'cid': cid,
'title': title,
'description': description,
'timestamp': timestamp,
'uploader': uploader_mobj.group('name'),
'uploader_id': uploader_mobj.group('id'),
})
+
if not info.get('uploader'):
info['uploader'] = self._html_search_meta(
'author', webpage, 'uploader', default=None)
+ comments = None
+ if self._downloader.params.get('getcomments', False):
+ comments = self._get_all_comment_pages(video_id)
+
+ raw_danmaku = self._get_raw_danmaku(video_id, cid)
+
+ raw_tags = self._get_tags(video_id)
+ tags = list(map(lambda x: x['tag_name'], raw_tags))
+
+ top_level_info = {
+ 'raw_danmaku': raw_danmaku,
+ 'comments': comments,
+ 'comment_count': len(comments) if comments is not None else None,
+ 'tags': tags,
+ 'raw_tags': raw_tags,
+ }
+
+ '''
+ # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
+ # See https://github.com/animelover1984/youtube-dl
+ danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576)
+ entries[0]['subtitles'] = {
+ 'danmaku': [{
+ 'ext': 'ass',
+ 'data': danmaku
+ }]
+ }
+ '''
+
for entry in entries:
entry.update(info)
if len(entries) == 1:
+ entries[0].update(top_level_info)
return entries[0]
else:
for idx, entry in enumerate(entries):
entry['id'] = '%s_part%d' % (video_id, (idx + 1))
- return {
+ global_info = {
'_type': 'multi_video',
'id': video_id,
+ 'bv_id': bv_id,
'title': title,
'description': description,
'entries': entries,
}
+ global_info.update(info)
+ global_info.update(top_level_info)
+
+ return global_info
+
+ def _get_video_id_set(self, id, is_bv):
+ query = {'bvid': id} if is_bv else {'aid': id}
+ response = self._download_json(
+ "http://api.bilibili.cn/x/web-interface/view",
+ id, query=query,
+ note='Grabbing original ID via API')
+
+ if response['code'] == -400:
+ raise ExtractorError('Video ID does not exist', expected=True, video_id=id)
+ elif response['code'] != 0:
+ raise ExtractorError('Unknown error occurred during API check (code %s)' % response['code'], expected=True, video_id=id)
+ return (response['data']['aid'], response['data']['bvid'])
+
+ # recursive solution to getting every page of comments for the video
+ # we can stop when we reach a page without any comments
+ def _get_all_comment_pages(self, video_id, commentPageNumber=0):
+ comment_url = "https://api.bilibili.com/x/v2/reply?jsonp=jsonp&pn=%s&type=1&oid=%s&sort=2&_=1567227301685" % (commentPageNumber, video_id)
+ json_str = self._download_webpage(
+ comment_url, video_id,
+ note='Extracting comments from page %s' % (commentPageNumber))
+ replies = json.loads(json_str)['data']['replies']
+ if replies is None:
+ return []
+ return self._get_all_children(replies) + self._get_all_comment_pages(video_id, commentPageNumber + 1)
+
+ # extracts all comments in the tree
+ def _get_all_children(self, replies):
+ if replies is None:
+ return []
+
+ ret = []
+ for reply in replies:
+ author = reply['member']['uname']
+ author_id = reply['member']['mid']
+ id = reply['rpid']
+ text = reply['content']['message']
+ timestamp = reply['ctime']
+ parent = reply['parent'] if reply['parent'] != 0 else 'root'
+
+ comment = {
+ "author": author,
+ "author_id": author_id,
+ "id": id,
+ "text": text,
+ "timestamp": timestamp,
+ "parent": parent,
+ }
+ ret.append(comment)
+
+ # from the JSON, the comment structure seems arbitrarily deep, but I could be wrong.
+ # Regardless, this should work.
+ ret += self._get_all_children(reply['replies'])
+
+ return ret
+
+ def _get_raw_danmaku(self, video_id, cid):
+ # This will be useful if I decide to scrape all pages instead of doing them individually
+ # cid_url = "https://www.bilibili.com/widget/getPageList?aid=%s" % (video_id)
+ # cid_str = self._download_webpage(cid_url, video_id, note=False)
+ # cid = json.loads(cid_str)[0]['cid']
+
+ danmaku_url = "https://comment.bilibili.com/%s.xml" % (cid)
+ danmaku = self._download_webpage(danmaku_url, video_id, note='Downloading danmaku comments')
+ return danmaku
+
+ def _get_tags(self, video_id):
+ tags_url = "https://api.bilibili.com/x/tag/archive/tags?aid=%s" % (video_id)
+ tags_json = self._download_json(tags_url, video_id, note='Downloading tags')
+ return tags_json['data']
+
class BiliBiliBangumiIE(InfoExtractor):
_VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
season_info.get('bangumi_title'), season_info.get('evaluate'))
+class BilibiliChannelIE(InfoExtractor):
+ _VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)'
+ # May need to add support for pagination? Need to find a user with many video uploads to test
+ _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=1&ps=25&jsonp=jsonp"
+ _TEST = {} # TODO: Add tests
+
+ def _real_extract(self, url):
+ list_id = self._match_id(url)
+ json_str = self._download_webpage(self._API_URL % list_id, "None")
+
+ json_parsed = json.loads(json_str)
+ entries = [{
+ '_type': 'url',
+ 'ie_key': BiliBiliIE.ie_key(),
+ 'url': ('https://www.bilibili.com/video/%s' %
+ entry['bvid']),
+ 'id': entry['bvid'],
+ } for entry in json_parsed['data']['list']['vlist']]
+
+ return {
+ '_type': 'playlist',
+ 'id': list_id,
+ 'entries': entries
+ }
+
+
+class BiliBiliSearchIE(SearchInfoExtractor):
+ IE_DESC = 'Bilibili video search, "bilisearch" keyword'
+ _MAX_RESULTS = 100000
+ _SEARCH_KEY = 'bilisearch'
+ MAX_NUMBER_OF_RESULTS = 1000
+
+ def _get_n_results(self, query, n):
+ """Get a specified number of results for a query"""
+
+ entries = []
+ pageNumber = 0
+ while True:
+ pageNumber += 1
+ # FIXME
+ api_url = "https://api.bilibili.com/x/web-interface/search/type?context=&page=%s&order=pubdate&keyword=%s&duration=0&tids_2=&__refresh__=true&search_type=video&tids=0&highlight=1" % (pageNumber, query)
+ json_str = self._download_webpage(
+ api_url, "None", query={"Search_key": query},
+ note='Extracting results from page %s' % pageNumber)
+ data = json.loads(json_str)['data']
+
+ # FIXME: this is hideous
+ if "result" not in data:
+ return {
+ '_type': 'playlist',
+ 'id': query,
+ 'entries': entries[:n]
+ }
+
+ videos = data['result']
+ for video in videos:
+ e = self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
+ entries.append(e)
+
+ if(len(entries) >= n or len(videos) >= BiliBiliSearchIE.MAX_NUMBER_OF_RESULTS):
+ return {
+ '_type': 'playlist',
+ 'id': query,
+ 'entries': entries[:n]
+ }
+
+
class BilibiliAudioBaseIE(InfoExtractor):
def _call_api(self, path, sid, query=None):
if not query:
from .bild import BildIE
from .bilibili import (
BiliBiliIE,
+ BiliBiliSearchIE,
BiliBiliBangumiIE,
BilibiliAudioIE,
BilibiliAudioAlbumIE,
BiliBiliPlayerIE,
+ BilibiliChannelIE,
)
from .biobiochiletv import BioBioChileTVIE
from .bitchute import (
default=None
))
- # annotations
- video_annotations = None
- if self._downloader.params.get('writeannotations', False):
+ # get xsrf for annotations or comments
+ get_annotations = self._downloader.params.get('writeannotations', False)
+ get_comments = self._downloader.params.get('getcomments', False)
+ if get_annotations or get_comments:
xsrf_token = None
ytcfg = self._extract_ytcfg(video_id, video_webpage)
if ytcfg:
xsrf_token = self._search_regex(
r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>(?:(?!\2).)+)\2',
video_webpage, 'xsrf token', group='xsrf_token', fatal=False)
+
+ # annotations
+ video_annotations = None
+ if get_annotations:
invideo_url = try_get(
player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str)
if xsrf_token and invideo_url:
chapters = self._extract_chapters(video_webpage, description_original, video_id, video_duration)
+ # Get comments
+ # TODO: Refactor and move to seperate function
+ if get_comments:
+ expected_video_comment_count = 0
+ video_comments = []
+
+ def find_value(html, key, num_chars=2, separator='"'):
+ pos_begin = html.find(key) + len(key) + num_chars
+ pos_end = html.find(separator, pos_begin)
+ return html[pos_begin: pos_end]
+
+ def search_dict(partial, key):
+ if isinstance(partial, dict):
+ for k, v in partial.items():
+ if k == key:
+ yield v
+ else:
+ for o in search_dict(v, key):
+ yield o
+ elif isinstance(partial, list):
+ for i in partial:
+ for o in search_dict(i, key):
+ yield o
+
+ try:
+ ncd = next(search_dict(yt_initial_data, 'nextContinuationData'))
+ continuations = [(ncd['continuation'], ncd['clickTrackingParams'])]
+ # Handle videos where comments have been disabled entirely
+ except StopIteration:
+ continuations = []
+
+ def get_continuation(continuation, itct, session_token, replies=False):
+ query = {
+ 'pbj': 1,
+ 'ctoken': continuation,
+ 'continuation': continuation,
+ 'itct': itct,
+ }
+ if replies:
+ query['action_get_comment_replies'] = 1
+ else:
+ query['action_get_comments'] = 1
+
+ while True:
+ content, handle = self._download_webpage_handle(
+ 'https://www.youtube.com/comment_service_ajax',
+ video_id,
+ note=False,
+ expected_status=[413],
+ data=urlencode_postdata({
+ 'session_token': session_token
+ }),
+ query=query,
+ headers={
+ 'Accept': '*/*',
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0',
+ 'X-YouTube-Client-Name': '1',
+ 'X-YouTube-Client-Version': '2.20201202.06.01'
+ }
+ )
+
+ response_code = handle.getcode()
+ if (response_code == 200):
+ return self._parse_json(content, video_id)
+ if (response_code == 413): # Sometimes google makes continuations that are too big to be accepted by themselves. Grade A engineering
+ # self.to_screen(json.dumps(query))
+ # self.to_screen('Google API rate limit detected; waiting 30 seconds before continuing')
+ # time.sleep(30)
+ # continue
+ return None
+ raise ExtractorError('Unexpected HTTP error code: %s' % response_code)
+
+ first_continuation = True
+ while continuations:
+ continuation, itct = continuations.pop()
+ comment_response = get_continuation(continuation, itct, xsrf_token)
+ if not comment_response:
+ continue
+ if list(search_dict(comment_response, 'externalErrorMessage')):
+ raise ExtractorError('Error returned from server: ' + next(search_dict(comment_response, 'externalErrorMessage')))
+
+ item_section = comment_response['response']['continuationContents']['itemSectionContinuation']
+ if first_continuation:
+ expected_video_comment_count = int(item_section['header']['commentsHeaderRenderer']['countText']['runs'][0]['text'].replace(' Comments', '').replace('1 Comment', '1').replace(',', ''))
+ first_continuation = False
+ if 'contents' not in item_section:
+ # continuation returned no comments?
+ # set an empty array as to not break the for loop
+ item_section['contents'] = []
+
+ for meta_comment in item_section['contents']:
+ comment = meta_comment['commentThreadRenderer']['comment']['commentRenderer']
+ video_comments.append({
+ 'id': comment['commentId'],
+ 'text': ''.join([c['text'] for c in comment['contentText']['runs']]),
+ 'time_text': comment['publishedTimeText']['runs'][0]['text'],
+ 'author': comment.get('authorText', {}).get('simpleText', ''),
+ 'votes': comment.get('voteCount', {}).get('simpleText', '0'),
+ 'author_thumbnail': comment['authorThumbnail']['thumbnails'][-1]['url'],
+ 'parent': 'root'
+ })
+ if 'replies' not in meta_comment['commentThreadRenderer']:
+ continue
+
+ reply_continuation = meta_comment['commentThreadRenderer']['replies']['commentRepliesRenderer']['continuations'][0]['nextContinuationData']
+ continuation = reply_continuation['continuation']
+ itct = reply_continuation['clickTrackingParams']
+ while True:
+ time.sleep(1)
+ replies_data = get_continuation(continuation, itct, xsrf_token, True)
+ if not replies_data or 'continuationContents' not in replies_data[1]['response']:
+ break
+
+ if self._downloader.params.get('verbose', False):
+ self.to_screen('[debug] Comments downloaded (chain %s) %s of ~%s' % (comment['commentId'], len(video_comments), expected_video_comment_count))
+ reply_comment_meta = replies_data[1]['response']['continuationContents']['commentRepliesContinuation']
+ for reply_meta in replies_data[1]['response']['continuationContents']['commentRepliesContinuation']['contents']:
+ reply_comment = reply_meta['commentRenderer']
+ video_comments.append({
+ 'id': reply_comment['commentId'],
+ 'text': ''.join([c['text'] for c in reply_comment['contentText']['runs']]),
+ 'time_text': reply_comment['publishedTimeText']['runs'][0]['text'],
+ 'author': reply_comment.get('authorText', {}).get('simpleText', ''),
+ 'votes': reply_comment.get('voteCount', {}).get('simpleText', '0'),
+ 'author_thumbnail': reply_comment['authorThumbnail']['thumbnails'][-1]['url'],
+ 'parent': comment['commentId']
+ })
+ if 'continuations' not in reply_comment_meta or len(reply_comment_meta['continuations']) == 0:
+ break
+
+ continuation = reply_comment_meta['continuations'][0]['nextContinuationData']['continuation']
+ itct = reply_comment_meta['continuations'][0]['nextContinuationData']['clickTrackingParams']
+
+ self.to_screen('Comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
+
+ if 'continuations' in item_section:
+ new_continuations = [
+ (ncd['nextContinuationData']['continuation'], ncd['nextContinuationData']['clickTrackingParams'])
+ for ncd in item_section['continuations']]
+ continuations += new_continuations
+ time.sleep(1)
+
+ self.to_screen('Total comments downloaded %s of ~%s' % (len(video_comments), expected_video_comment_count))
+ else:
+ expected_video_comment_count = None
+ video_comments = None
+
# Look for the DASH manifest
if self._downloader.params.get('youtube_include_dash_manifest', True):
dash_mpd_fatal = True
'release_year': release_year,
'subscriber_count': subscriber_count,
'playable_in_embed': playable_in_embed,
+ 'comments': video_comments,
+ 'comment_count': expected_video_comment_count,
}
'--no-write-annotations',
action='store_false', dest='writeannotations',
help='Do not write video annotations (default)')
+ filesystem.add_option(
+ '--get-comments',
+ action='store_true', dest='getcomments', default=False,
+ help='Retrieve video comments to be placed in the .info.json file')
filesystem.add_option(
'--load-info-json', '--load-info',
dest='load_info_filename', metavar='FILE',
metavar='FORMAT', dest='remuxvideo', default=None,
help=(
'Remux the video into another container if necessary (currently supported: mp4|mkv). '
- 'If target container does not support the video/audio codec, remuxing will fail'))
+ 'If target container does not support the video/audio codec, remuxing will fail. '
+ 'You can specify multiple rules; eg. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 '
+ 'and anything else to mkv.'))
postproc.add_option(
'--recode-video',
metavar='FORMAT', dest='recodevideo', default=None,
import os
import subprocess
+import struct
+import re
+import base64
+
+try:
+ import mutagen
+ _has_mutagen = True
+except ImportError:
+ _has_mutagen = False
from .ffmpeg import FFmpegPostProcessor
check_executable,
encodeArgument,
encodeFilename,
+ error_to_compat_str,
PostProcessingError,
prepend_extension,
+ process_communicate_or_kill,
replace_extension,
shell_quote,
- process_communicate_or_kill,
)
# Rename back to unescaped for further processing
os.rename(encodeFilename(escaped_thumbnail_jpg_filename), encodeFilename(thumbnail_jpg_filename))
thumbnail_filename = thumbnail_jpg_filename
+ thumbnail_ext = 'jpg'
success = True
if info['ext'] == 'mp3':
self.to_screen('Adding thumbnail to "%s"' % filename)
self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
- elif info['ext'] == 'mkv':
- options = [
- '-c', 'copy', '-map', '0', '-dn', '-attach', thumbnail_filename,
- '-metadata:s:t', 'mimetype=image/jpeg', '-metadata:s:t', 'filename=cover.jpg']
-
- self.to_screen('Adding thumbnail to "%s"' % filename)
- self.run_ffmpeg_multiple_files([filename], temp_filename, options)
-
- elif info['ext'] in ['m4a', 'mp4']:
- if not check_executable('AtomicParsley', ['-v']):
- raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
+ elif info['ext'] in ['mkv', 'mka']:
+ options = ['-c', 'copy', '-map', '0', '-dn']
- cmd = [encodeFilename('AtomicParsley', True),
- encodeFilename(filename, True),
- encodeArgument('--artwork'),
- encodeFilename(thumbnail_filename, True),
- encodeArgument('-o'),
- encodeFilename(temp_filename, True)]
- cmd += [encodeArgument(o) for o in self._configuration_args(exe='AtomicParsley')]
+ mimetype = 'image/%s' % ('png' if thumbnail_ext == 'png' else 'jpeg')
+ old_stream, new_stream = self.get_stream_number(
+ filename, ('tags', 'mimetype'), mimetype)
+ if old_stream is not None:
+ options.extend(['-map', '-0:%d' % old_stream])
+ new_stream -= 1
+ options.extend([
+ '-attach', thumbnail_filename,
+ '-metadata:s:%d' % new_stream, 'mimetype=%s' % mimetype,
+ '-metadata:s:%d' % new_stream, 'filename=cover.%s' % thumbnail_ext])
self.to_screen('Adding thumbnail to "%s"' % filename)
- self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
-
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = process_communicate_or_kill(p)
-
- if p.returncode != 0:
- msg = stderr.decode('utf-8', 'replace').strip()
- raise EmbedThumbnailPPError(msg)
- # for formats that don't support thumbnails (like 3gp) AtomicParsley
- # won't create to the temporary file
- if b'No changes' in stdout:
- self.report_warning('The file format doesn\'t support embedding a thumbnail')
- success = False
+ self.run_ffmpeg(filename, temp_filename, options)
+
+ elif info['ext'] in ['m4a', 'mp4', 'mov']:
+ try:
+ options = ['-c', 'copy', '-map', '0', '-dn', '-map', '1']
+
+ old_stream, new_stream = self.get_stream_number(
+ filename, ('disposition', 'attached_pic'), 1)
+ if old_stream is not None:
+ options.extend(['-map', '-0:%d' % old_stream])
+ new_stream -= 1
+ options.extend(['-disposition:%s' % new_stream, 'attached_pic'])
+
+ self.to_screen('Adding thumbnail to "%s"' % filename)
+ self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
+
+ except PostProcessingError as err:
+ self.report_warning('unable to embed using ffprobe & ffmpeg; %s' % error_to_compat_str(err))
+ if not check_executable('AtomicParsley', ['-v']):
+ raise EmbedThumbnailPPError('AtomicParsley was not found. Please install.')
+
+ cmd = [encodeFilename('AtomicParsley', True),
+ encodeFilename(filename, True),
+ encodeArgument('--artwork'),
+ encodeFilename(thumbnail_filename, True),
+ encodeArgument('-o'),
+ encodeFilename(temp_filename, True)]
+ cmd += [encodeArgument(o) for o in self._configuration_args(exe='AtomicParsley')]
+
+ self.to_screen('Adding thumbnail to "%s"' % filename)
+ self.write_debug('AtomicParsley command line: %s' % shell_quote(cmd))
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ stdout, stderr = process_communicate_or_kill(p)
+ if p.returncode != 0:
+ msg = stderr.decode('utf-8', 'replace').strip()
+ raise EmbedThumbnailPPError(msg)
+ # for formats that don't support thumbnails (like 3gp) AtomicParsley
+ # won't create to the temporary file
+ if b'No changes' in stdout:
+ self.report_warning('The file format doesn\'t support embedding a thumbnail')
+ success = False
+
+ elif info['ext'] in ['ogg', 'opus']:
+ if not _has_mutagen:
+ raise EmbedThumbnailPPError('module mutagen was not found. Please install.')
+ size_regex = r',\s*(?P<w>\d+)x(?P<h>\d+)\s*[,\[]'
+ size_result = self.run_ffmpeg_multiple_files([thumbnail_filename], '', ['-hide_banner'])
+ mobj = re.search(size_regex, size_result)
+ width, height = int(mobj.group('w')), int(mobj.group('h'))
+ mimetype = ('image/%s' % ('png' if thumbnail_ext == 'png' else 'jpeg')).encode('ascii')
+
+ # https://xiph.org/flac/format.html#metadata_block_picture
+ data = bytearray()
+ data += struct.pack('>II', 3, len(mimetype))
+ data += mimetype
+ data += struct.pack('>IIIIII', 0, width, height, 8, 0, os.stat(thumbnail_filename).st_size) # 32 if png else 24
+
+ fin = open(thumbnail_filename, "rb")
+ data += fin.read()
+ fin.close()
+
+ temp_filename = filename
+ f = mutagen.File(temp_filename)
+ f.tags['METADATA_BLOCK_PICTURE'] = base64.b64encode(data).decode('ascii')
+ f.save()
else:
- raise EmbedThumbnailPPError('Only mp3, mkv, m4a and mp4 are supported for thumbnail embedding for now.')
+ raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus, m4a/mp4/mov')
- if success:
+ if success and temp_filename != filename:
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
-
files_to_delete = [] if self._already_have_thumbnail else [thumbnail_filename]
return files_to_delete, info
import subprocess
import time
import re
+import json
from .common import AudioConversionError, PostProcessor
subtitles_filename,
dfxp2srt,
ISO639Utils,
- replace_extension,
process_communicate_or_kill,
+ replace_extension,
+ traverse_dict,
)
return mobj.group(1)
return None
+ def get_metadata_object(self, path, opts=[]):
+ if self.probe_basename != 'ffprobe':
+ if self.probe_available:
+ self.report_warning('Only ffprobe is supported for metadata extraction')
+ raise PostProcessingError('ffprobe not found. Please install.')
+ self.check_version()
+
+ cmd = [
+ encodeFilename(self.probe_executable, True),
+ encodeArgument('-hide_banner'),
+ encodeArgument('-show_format'),
+ encodeArgument('-show_streams'),
+ encodeArgument('-print_format'),
+ encodeArgument('json'),
+ ]
+
+ cmd += opts
+ cmd.append(encodeFilename(self._ffmpeg_filename_argument(path), True))
+ if self._downloader.params.get('verbose', False):
+ self._downloader.to_screen('[debug] ffprobe command line: %s' % shell_quote(cmd))
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+ stdout, stderr = p.communicate()
+ return json.loads(stdout.decode('utf-8', 'replace'))
+
+ def get_stream_number(self, path, keys, value):
+ streams = self.get_metadata_object(path)['streams']
+ num = next(
+ (i for i, stream in enumerate(streams) if traverse_dict(stream, keys, casesense=False) == value),
+ None)
+ return num, len(streams)
+
def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
self.check_version()
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
stdout, stderr = process_communicate_or_kill(p)
if p.returncode != 0:
- stderr = stderr.decode('utf-8', 'replace')
- msg = stderr.strip().split('\n')[-1]
- raise FFmpegPostProcessorError(msg)
+ stderr = stderr.decode('utf-8', 'replace').strip()
+ if self._downloader.params.get('verbose', False):
+ self.report_error(stderr)
+ raise FFmpegPostProcessorError(stderr.split('\n')[-1])
self.try_utime(out_path, oldest_mtime, oldest_mtime)
+ return stderr.decode('utf-8', 'replace')
def run_ffmpeg(self, path, out_path, opts):
self.run_ffmpeg_multiple_files([path], out_path, opts)
# interprets that as a protocol) or can start with '-' (-- is broken in
# ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
# Also leave '-' intact in order not to break streaming to stdout.
+ if fn.startswith(('http://', 'https://')):
+ return fn
return 'file:' + fn if fn != '-' else fn
class FFmpegVideoRemuxerPP(FFmpegPostProcessor):
def __init__(self, downloader=None, preferedformat=None):
super(FFmpegVideoRemuxerPP, self).__init__(downloader)
- self._preferedformat = preferedformat
+ self._preferedformats = preferedformat.lower().split('/')
def run(self, information):
path = information['filepath']
- if information['ext'] == self._preferedformat:
- self.to_screen('Not remuxing video file %s - already is in target format %s' % (path, self._preferedformat))
+ sourceext, targetext = information['ext'].lower(), None
+ for pair in self._preferedformats:
+ kv = pair.split('>')
+ if len(kv) == 1 or kv[0].strip() == sourceext:
+ targetext = kv[-1].strip()
+ break
+
+ _skip_msg = (
+ 'could not find a mapping for %s' if not targetext
+ else 'already is in target format %s' if sourceext == targetext
+ else None)
+ if _skip_msg:
+ self.to_screen('Not remuxing media file %s - %s' % (path, _skip_msg % sourceext))
return [], information
+
options = ['-c', 'copy', '-map', '0', '-dn']
- prefix, sep, ext = path.rpartition('.')
- outpath = prefix + sep + self._preferedformat
- self.to_screen('Remuxing video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
+ if targetext in ['mp4', 'm4a', 'mov']:
+ options.extend(['-movflags', '+faststart'])
+ prefix, sep, oldext = path.rpartition('.')
+ outpath = prefix + sep + targetext
+ self.to_screen('Remuxing video from %s to %s; Destination: %s' % (sourceext, targetext, outpath))
self.run_ffmpeg(path, outpath, options)
information['filepath'] = outpath
- information['format'] = self._preferedformat
- information['ext'] = self._preferedformat
+ information['format'] = targetext
+ information['ext'] = targetext
return [path], information
sub_langs = []
sub_filenames = []
webm_vtt_warn = False
+ mp4_ass_warn = False
for lang, sub_info in subtitles.items():
sub_ext = sub_info['ext']
if sub_ext == 'json':
- self.to_screen('JSON subtitles cannot be embedded')
+ self.report_warning('JSON subtitles cannot be embedded')
elif ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
sub_langs.append(lang)
sub_filenames.append(subtitles_filename(filename, lang, sub_ext, ext))
else:
if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
webm_vtt_warn = True
- self.to_screen('Only WebVTT subtitles can be embedded in webm files')
+ self.report_warning('Only WebVTT subtitles can be embedded in webm files')
+ if not mp4_ass_warn and ext == 'mp4' and sub_ext == 'ass':
+ mp4_ass_warn = True
+ self.report_warning('ASS subtitles cannot be properly embedded in mp4 files; expect issues')
if not sub_langs:
return [], information
opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
temp_filename = prepend_extension(filename, 'temp')
- self.to_screen('Embedding subtitles in \'%s\'' % filename)
+ self.to_screen('Embedding subtitles in "%s"' % filename)
self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
os.remove(encodeFilename(filename))
os.rename(encodeFilename(temp_filename), encodeFilename(filename))
# 1. https://kdenlive.org/en/project/adding-meta-data-to-mp4-video/
# 2. https://wiki.multimedia.cx/index.php/FFmpeg_Metadata
# 3. https://kodi.wiki/view/Video_file_tagging
- # 4. http://atomicparsley.sourceforge.net/mpeg-4files.html
add('title', ('track', 'title'))
add('date', 'upload_date')
in_filenames.append(metadata_filename)
options.extend(['-map_metadata', '1'])
+ if '__infojson_filepath' in info and info['ext'] in ('mkv', 'mka'):
+ old_stream, new_stream = self.get_stream_number(
+ filename, ('tags', 'mimetype'), 'application/json')
+ if old_stream is not None:
+ options.extend(['-map', '-0:%d' % old_stream])
+ new_stream -= 1
+
+ options.extend([
+ '-attach', info['__infojson_filepath'],
+ '-metadata:s:%d' % new_stream, 'mimetype=application/json'
+ ])
+
self.to_screen('Adding metadata to \'%s\'' % filename)
self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
if chapters:
if plugin_info[0] is not None:
plugin_info[0].close()
return classes
+
+
+def traverse_dict(dictn, keys, casesense=True):
+ if not isinstance(dictn, dict):
+ return None
+ first_key = keys[0]
+ if not casesense:
+ dictn = {key.lower(): val for key, val in dictn.items()}
+ first_key = first_key.lower()
+ value = dictn.get(first_key, None)
+ return value if len(keys) < 2 else traverse_dict(value, keys[1:], casesense)