compat_urllib_parse,
compat_urllib_parse_urlencode,
compat_urllib_parse_urlparse,
+ compat_urllib_parse_urlunparse,
+ compat_urllib_parse_quote,
+ compat_urllib_parse_quote_plus,
compat_urllib_parse_unquote_plus,
compat_urllib_request,
compat_urlparse,
r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
+def process_communicate_or_kill(p, *args, **kwargs):
+ try:
+ return p.communicate(*args, **kwargs)
+ except BaseException: # Including KeyboardInterrupt
+ p.kill()
+ p.wait()
+ raise
+
+
def get_subprocess_encoding():
if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
# For subprocess calls, encode with locale encoding
return optval
-def formatSeconds(secs):
+def formatSeconds(secs, delim=':'):
if secs > 3600:
- return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
+ return '%d%s%02d%s%02d' % (secs // 3600, delim, (secs % 3600) // 60, delim, secs % 60)
elif secs > 60:
- return '%d:%02d' % (secs // 60, secs % 60)
+ return '%d%s%02d' % (secs // 60, delim, secs % 60)
else:
return '%d' % secs
if ytdl_is_updateable():
update_cmd = 'type youtube-dlc -U to update'
else:
- update_cmd = 'see https://github.com/blackjack4494/yt-dlc on how to update'
- msg = '; please report this issue on https://github.com/blackjack4494/yt-dlc .'
+ update_cmd = 'see https://github.com/pukkandan/yt-dlp on how to update'
+ msg = '; please report this issue on https://github.com/pukkandan/yt-dlp .'
msg += ' Make sure you are using the latest version; %s.' % update_cmd
msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
return msg
self.msg = msg
+class ExistingVideoReached(YoutubeDLError):
+ """ --max-downloads limit has been reached. """
+ pass
+
+
+class RejectedVideoReached(YoutubeDLError):
+ """ --max-downloads limit has been reached. """
+ pass
+
+
class MaxDownloadsReached(YoutubeDLError):
""" --max-downloads limit has been reached. """
pass
if not url or not isinstance(url, compat_str):
return None
url = url.strip()
- return url if re.match(r'^(?:[a-zA-Z][\da-zA-Z.+-]*:)?//', url) else None
+ return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
def parse_duration(s):
""" Checks if the given binary is installed somewhere in PATH, and returns its name.
args can be a list of arguments for a short output (like -version) """
try:
- subprocess.Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
+ process_communicate_or_kill(subprocess.Popen(
+ [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
except OSError:
return False
return exe
# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
# SIGTTOU if youtube-dlc is run in the background.
# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
- out, _ = subprocess.Popen(
+ out, _ = process_communicate_or_kill(subprocess.Popen(
[encodeArgument(exe)] + args,
stdin=subprocess.PIPE,
- stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate()
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
except OSError:
return False
if isinstance(out, bytes): # Python 2.x
def fixup(url):
if not isinstance(url, compat_str):
url = url.decode('utf-8', 'replace')
- BOM_UTF8 = '\xef\xbb\xbf'
- if url.startswith(BOM_UTF8):
- url = url[len(BOM_UTF8):]
- url = url.strip()
- if url.startswith(('#', ';', ']')):
+ BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
+ for bom in BOM_UTF8:
+ if url.startswith(bom):
+ url = url[len(bom):]
+ url = url.lstrip()
+ if not url or url.startswith(('#', ';', ']')):
return False
- return url
+ # "#" cannot be stripped out since it is part of the URI
+ # However, it can be safely stipped out if follwing a whitespace
+ return re.split(r'\s#', url, 1)[0].rstrip()
with contextlib.closing(batch_fd) as fd:
return [url for url in map(fixup, fd) if url]
return q
-DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
+DEFAULT_OUTTMPL = '%(title)s [%(id)s].%(ext)s'
def limit_length(s, length):
def ytdl_is_updateable():
""" Returns if youtube-dlc can be updated with -U """
+ return False
+
from zipimport import zipimporter
return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
return compat_urllib_parse_urlparse(url).scheme
-def render_table(header_row, data):
+def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
""" Render a list of rows, each as a list of values """
+
+ def get_max_lens(table):
+ return [max(len(compat_str(v)) for v in col) for col in zip(*table)]
+
+ def filter_using_list(row, filterArray):
+ return [col for (take, col) in zip(filterArray, row) if take]
+
+ if hideEmpty:
+ max_lens = get_max_lens(data)
+ header_row = filter_using_list(header_row, max_lens)
+ data = [filter_using_list(row, max_lens) for row in data]
+
table = [header_row] + data
- max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
- format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
+ max_lens = get_max_lens(table)
+ if delim:
+ table = [header_row] + [['-' * ml for ml in max_lens]] + data
+ format_str = ' '.join('%-' + compat_str(ml + extraGap) + 's' for ml in max_lens[:-1]) + ' %s'
return '\n'.join(format_str % tuple(row) for row in table)
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
except EnvironmentError as e:
raise XAttrMetadataError(e.errno, e.strerror)
- stdout, stderr = p.communicate()
+ stdout, stderr = process_communicate_or_kill(p)
stderr = stderr.decode('utf-8', 'replace')
if p.returncode != 0:
raise XAttrMetadataError(p.returncode, stderr)
month_field: str(random_date.month),
day_field: str(random_date.day),
}
+
+
+# Templates for internet shortcut files, which are plain text files.
+DOT_URL_LINK_TEMPLATE = '''
+[InternetShortcut]
+URL=%(url)s
+'''.lstrip()
+
+DOT_WEBLOC_LINK_TEMPLATE = '''
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+\t<key>URL</key>
+\t<string>%(url)s</string>
+</dict>
+</plist>
+'''.lstrip()
+
+DOT_DESKTOP_LINK_TEMPLATE = '''
+[Desktop Entry]
+Encoding=UTF-8
+Name=%(filename)s
+Type=Link
+URL=%(url)s
+Icon=text-html
+'''.lstrip()
+
+
+def iri_to_uri(iri):
+ """
+ Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
+
+ The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
+ """
+
+ iri_parts = compat_urllib_parse_urlparse(iri)
+
+ if '[' in iri_parts.netloc:
+ raise ValueError('IPv6 URIs are not, yet, supported.')
+ # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
+
+ # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
+
+ net_location = ''
+ if iri_parts.username:
+ net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
+ if iri_parts.password is not None:
+ net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
+ net_location += '@'
+
+ net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
+ # The 'idna' encoding produces ASCII text.
+ if iri_parts.port is not None and iri_parts.port != 80:
+ net_location += ':' + str(iri_parts.port)
+
+ return compat_urllib_parse_urlunparse(
+ (iri_parts.scheme,
+ net_location,
+
+ compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
+
+ # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
+ compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
+
+ # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
+ compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
+
+ compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
+
+ # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
+
+
+def to_high_limit_path(path):
+ if sys.platform in ['win32', 'cygwin']:
+ # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
+ return r'\\?\ '.rstrip() + os.path.abspath(path)
+
+ return path
+
+
+def format_field(obj, field, template='%s', ignore=(None, ''), default='', func=None):
+ val = obj.get(field, default)
+ if func and val not in ignore:
+ val = func(val)
+ return template % val if val not in ignore else default
+
+
+def clean_podcast_url(url):
+ return re.sub(r'''(?x)
+ (?:
+ (?:
+ chtbl\.com/track|
+ media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
+ play\.podtrac\.com
+ )/[^/]+|
+ (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
+ flex\.acast\.com|
+ pd(?:
+ cn\.co| # https://podcorn.com/analytics-prefix/
+ st\.fm # https://podsights.com/docs/
+ )/e
+ )/''', '', url)