Add --write-*-link by h-h-h-h

[yt-dlp.git] / youtube_dlc / utils.py
diff --git a/youtube_dlc/utils.py b/youtube_dlc/utils.py

index 54a4ea2aaca8c0dedd6594eaf95f47e3819830a2..d814eb2ac8b5e0ed56976ffda1e2c7f0b3453333 100644 (file)
--- a/youtube_dlc/utils.py
+++ b/youtube_dlc/utils.py
@@ -60,6 +60,9 @@
      compat_urllib_parse,
      compat_urllib_parse_urlencode,
      compat_urllib_parse_urlparse,
+    compat_urllib_parse_urlunparse,
+    compat_urllib_parse_quote,
+    compat_urllib_parse_quote_plus,
      compat_urllib_parse_unquote_plus,
      compat_urllib_request,
      compat_urlparse,
@@ -2320,8 +2323,8 @@ def bug_reports_message():
      if ytdl_is_updateable():
          update_cmd = 'type  youtube-dlc -U  to update'
      else:
-        update_cmd = 'see  https://yt-dl.org/update  on how to update'
-    msg = '; please report this issue on https://yt-dl.org/bug .'
+        update_cmd = 'see  https://github.com/blackjack4494/yt-dlc  on how to update'
+    msg = '; please report this issue on https://github.com/blackjack4494/yt-dlc .'
      msg += ' Make sure you are using the latest version; %s.' % update_cmd
      msg += ' Be sure to call youtube-dlc with the --verbose flag and include its complete output.'
      return msg
@@ -2460,7 +2463,7 @@ def __init__(self, code=None, msg='Unknown error'):
  
          # Parsing code and msg
          if (self.code in (errno.ENOSPC, errno.EDQUOT)
-                or 'No space left' in self.msg or 'Disk quota excedded' in self.msg):
+                or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
              self.reason = 'NO_SPACE'
          elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
              self.reason = 'VALUE_TOO_LONG'
@@ -4085,7 +4088,7 @@ def fix_kv(m):
          v = m.group(0)
          if v in ('true', 'false', 'null'):
              return v
-        elif v.startswith('/*') or v.startswith('//') or v == ',':
+        elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
              return ""
  
          if v[0] in ("'", '"'):
@@ -4095,12 +4098,12 @@ def fix_kv(m):
                  '\\\n': '',
                  '\\x': '\\u00',
              }.get(m.group(0), m.group(0)), v[1:-1])
-
-        for regex, base in INTEGER_TABLE:
-            im = re.match(regex, v)
-            if im:
-                i = int(im.group(1), base)
-                return '"%d":' % i if v.endswith(':') else '%d' % i
+        else:
+            for regex, base in INTEGER_TABLE:
+                im = re.match(regex, v)
+                if im:
+                    i = int(im.group(1), base)
+                    return '"%d":' % i if v.endswith(':') else '%d' % i
  
          return '"%s"' % v
  
@@ -4110,7 +4113,8 @@ def fix_kv(m):
          {comment}|,(?={skip}[\]}}])|
          (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
          \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
-        [0-9]+(?={skip}:)
+        [0-9]+(?={skip}:)|
+        !+
          '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
  
  
@@ -4214,10 +4218,10 @@ def parse_codecs(codecs_str):
      # http://tools.ietf.org/html/rfc6381
      if not codecs_str:
          return {}
-    splited_codecs = list(filter(None, map(
+    split_codecs = list(filter(None, map(
          lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
      vcodec, acodec = None, None
-    for full_codec in splited_codecs:
+    for full_codec in split_codecs:
          codec = full_codec.split('.')[0]
          if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
              if not vcodec:
@@ -4228,10 +4232,10 @@ def parse_codecs(codecs_str):
          else:
              write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
      if not vcodec and not acodec:
-        if len(splited_codecs) == 2:
+        if len(split_codecs) == 2:
              return {
-                'vcodec': splited_codecs[0],
-                'acodec': splited_codecs[1],
+                'vcodec': split_codecs[0],
+                'acodec': split_codecs[1],
              }
      else:
          return {
@@ -5470,7 +5474,7 @@ def encode_base_n(num, n, table=None):
  
  def decode_packed_codes(code):
      mobj = re.search(PACKED_CODES_RE, code)
-    obfucasted_code, base, count, symbols = mobj.groups()
+    obfuscated_code, base, count, symbols = mobj.groups()
      base = int(base)
      count = int(count)
      symbols = symbols.split('|')
@@ -5483,7 +5487,7 @@ def decode_packed_codes(code):
  
      return re.sub(
          r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
-        obfucasted_code)
+        obfuscated_code)
  
  
  def caesar(s, alphabet, shift):
@@ -5713,3 +5717,81 @@ def random_birthday(year_field, month_field, day_field):
          month_field: str(random_date.month),
          day_field: str(random_date.day),
      }
+
+# Templates for internet shortcut files, which are plain text files.
+DOT_URL_LINK_TEMPLATE = '''
+[InternetShortcut]
+URL=%(url)s
+'''.lstrip()
+
+DOT_WEBLOC_LINK_TEMPLATE = '''
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+\t<key>URL</key>
+\t<string>%(url)s</string>
+</dict>
+</plist>
+'''.lstrip()
+
+DOT_DESKTOP_LINK_TEMPLATE = '''
+[Desktop Entry]
+Encoding=UTF-8
+Name=%(filename)s
+Type=Link
+URL=%(url)s
+Icon=text-html
+'''.lstrip()
+
+
+def iri_to_uri(iri):
+    """
+    Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
+
+    The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
+    """
+
+    iri_parts = compat_urllib_parse_urlparse(iri)
+
+    if '[' in iri_parts.netloc:
+        raise ValueError('IPv6 URIs are not, yet, supported.')
+        # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
+
+    # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
+
+    net_location = ''
+    if iri_parts.username:
+        net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
+        if iri_parts.password is not None:
+            net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
+        net_location += '@'
+
+    net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
+    # The 'idna' encoding produces ASCII text.
+    if iri_parts.port is not None and iri_parts.port != 80:
+        net_location += ':' + str(iri_parts.port)
+
+    return compat_urllib_parse_urlunparse(
+        (iri_parts.scheme,
+            net_location,
+
+            compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
+
+            # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
+            compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
+
+            # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
+            compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
+
+            compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
+
+    # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
+
+
+def to_high_limit_path(path):
+    if sys.platform in ['win32', 'cygwin']:
+        # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
+        return r'\\?\ '.rstrip() + os.path.abspath(path)
+
+    return path