]> jfr.im git - yt-dlp.git/commitdiff
Add --write-*-link by h-h-h-h
authorpukkandan <redacted>
Tue, 27 Oct 2020 10:37:21 +0000 (16:07 +0530)
committerpukkandan <redacted>
Sun, 13 Dec 2020 14:35:04 +0000 (20:05 +0530)
Authored-by: h-h-h-h
README.md
test/parameters.json
test/test_YoutubeDL.py
test/test_compat.py
test/test_utils.py
youtube_dlc/YoutubeDL.py
youtube_dlc/__init__.py
youtube_dlc/compat.py
youtube_dlc/options.py
youtube_dlc/utils.py

index 7fded6a33624d8ab23e34ee24f629c2dd164cc00..f46c65dff8973fa66abb77e6c2ac57f8a10c6b34 100644 (file)
--- a/README.md
+++ b/README.md
@@ -321,6 +321,15 @@ ## Thumbnail images:
     --list-thumbnails                Simulate and list all available thumbnail
                                      formats
 
+## Internet Shortcut Options:
+    --write-link                     Write an internet shortcut file, depending on 
+                                     the current platform (.url/.webloc/.desktop). 
+                                     The URL may be cached by the OS.
+    --write-url-link                 Write a Windows internet shortcut file (.url). 
+                                     Note that the OS caches the URL based on the file path.
+    --write-webloc-link              Write a macOS internet shortcut file (.webloc)
+    --write-desktop-link             Write a Linux internet shortcut file (.desktop)
+
 ## Verbosity / Simulation Options:
     -q, --quiet                      Activate quiet mode
     --no-warnings                    Ignore warnings
index 65fd5442860f8352d83408fdeb1b69eb88ef31f8..76c2a9ae77cbfa5e87360e75382f9537b2af13b6 100644 (file)
     "verbose": true, 
     "writedescription": false, 
     "writeinfojson": true, 
+    "writeannotations": false,
+    "writelink": false,
+    "writeurllink": false,
+    "writewebloclink": false,
+    "writedesktoplink": false,
     "writesubtitles": false,
     "allsubtitles": false,
     "listsubtitles": false,
index a9e6491917e7efa86dfb8b2f6f385687d9c30a8f..5950dbffcad6df637450e1a37f1877de7be238b6 100644 (file)
@@ -42,6 +42,7 @@ def _make_result(formats, **kwargs):
         'title': 'testttitle',
         'extractor': 'testex',
         'extractor_key': 'TestEx',
+        'webpage_url': 'http://example.com/watch?v=shenanigans',
     }
     res.update(**kwargs)
     return res
@@ -567,6 +568,7 @@ def s_formats(lang, autocaption=False):
             'subtitles': subtitles,
             'automatic_captions': auto_captions,
             'extractor': 'TEST',
+            'webpage_url': 'http://example.com/watch?v=shenanigans',
         }
 
         def get_info(params={}):
@@ -730,6 +732,7 @@ def _match_entry(self, info_dict, incomplete):
             'playlist_id': '42',
             'uploader': "變態妍字幕版 太妍 тест",
             'creator': "тест ' 123 ' тест--",
+            'webpage_url': 'http://example.com/watch?v=shenanigans',
         }
         second = {
             'id': '2',
@@ -741,6 +744,7 @@ def _match_entry(self, info_dict, incomplete):
             'filesize': 5 * 1024,
             'playlist_id': '43',
             'uploader': "тест 123",
+            'webpage_url': 'http://example.com/watch?v=SHENANIGANS',
         }
         videos = [first, second]
 
index 8c49a001e5eabe6d058af188e1b566f7e3d58474..f66739bd422f398cd383705474d3d6295b942c8a 100644 (file)
@@ -19,6 +19,8 @@
     compat_shlex_split,
     compat_str,
     compat_struct_unpack,
+    compat_urllib_parse_quote,
+    compat_urllib_parse_quote_plus,
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote_plus,
     compat_urllib_parse_urlencode,
@@ -53,6 +55,27 @@ def test_all_present(self):
             dir(youtube_dlc.compat))) - set(['unicode_literals'])
         self.assertEqual(all_names, sorted(present_names))
 
+    def test_compat_urllib_parse_quote(self):
+        self.assertEqual(compat_urllib_parse_quote('abc def'), 'abc%20def')
+        self.assertEqual(compat_urllib_parse_quote('/~user/abc+def'), '/%7Euser/abc%2Bdef')
+        self.assertEqual(compat_urllib_parse_quote('/~user/abc+def', safe='/~+'), '/~user/abc+def')
+        self.assertEqual(compat_urllib_parse_quote(''), '')
+        self.assertEqual(compat_urllib_parse_quote('%'), '%25')
+        self.assertEqual(compat_urllib_parse_quote('%', safe='%'), '%')
+        self.assertEqual(compat_urllib_parse_quote('津波'), '%E6%B4%A5%E6%B3%A2')
+        self.assertEqual(
+            compat_urllib_parse_quote('''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
+%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''', safe='<>=":%/ \r\n'),
+            '''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
+%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a''')
+        self.assertEqual(
+            compat_urllib_parse_quote('''(^◣_◢^)っ︻デ═一    ⇀    ⇀    ⇀    ⇀    ⇀    ↶%I%Break%25Things%''', safe='% '),
+            '''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80    %E2%87%80    %E2%87%80    %E2%87%80    %E2%87%80    %E2%87%80    %E2%86%B6%I%Break%25Things%''')
+
+    def test_compat_urllib_parse_quote_plus(self):
+        self.assertEqual(compat_urllib_parse_quote_plus('abc def'), 'abc+def')
+        self.assertEqual(compat_urllib_parse_quote_plus('~/abc def'), '%7E%2Fabc+def')
+
     def test_compat_urllib_parse_unquote(self):
         self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
         self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def')
index 16ad40831744780e697c5f1881b9f858ca559580..6562d443af8ea8b4e9177c7305a4a67b580382bc 100644 (file)
     cli_valueless_option,
     cli_bool_option,
     parse_codecs,
+    iri_to_uri,
 )
 from youtube_dlc.compat import (
     compat_chr,
@@ -1465,6 +1466,32 @@ def test_get_elements_by_attribute(self):
         self.assertEqual(get_elements_by_attribute('class', 'foo', html), [])
         self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), [])
 
+    def test_iri_to_uri(self):
+        self.assertEqual(
+            iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'),
+            'https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b')  # Same
+        self.assertEqual(
+            iri_to_uri('https://www.google.com/search?q=Käsesoßenrührlöffel'),  # German for cheese sauce stirring spoon
+            'https://www.google.com/search?q=K%C3%A4seso%C3%9Fenr%C3%BChrl%C3%B6ffel')
+        self.assertEqual(
+            iri_to_uri('https://www.google.com/search?q=lt<+gt>+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#'),
+            'https://www.google.com/search?q=lt%3C+gt%3E+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#')
+        self.assertEqual(
+            iri_to_uri('http://правозащита38.рф/category/news/'),
+            'http://xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+        self.assertEqual(
+            iri_to_uri('http://www.правозащита38.рф/category/news/'),
+            'http://www.xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/')
+        self.assertEqual(
+            iri_to_uri('https://i❤.ws/emojidomain/👍👏🤝💪'),
+            'https://xn--i-7iq.ws/emojidomain/%F0%9F%91%8D%F0%9F%91%8F%F0%9F%A4%9D%F0%9F%92%AA')
+        self.assertEqual(
+            iri_to_uri('http://日本語.jp/'),
+            'http://xn--wgv71a119e.jp/')
+        self.assertEqual(
+            iri_to_uri('http://导航.中国/'),
+            'http://xn--fet810g.xn--fiqs8s/')
+
 
 if __name__ == '__main__':
     unittest.main()
index ee6d749107581230019277149d5fe5b7d9e5bee3..97e4f451f1e74bc8b7b1f9c95023adf5dc6e945a 100644 (file)
@@ -51,6 +51,9 @@
     DEFAULT_OUTTMPL,
     determine_ext,
     determine_protocol,
+    DOT_DESKTOP_LINK_TEMPLATE,
+    DOT_URL_LINK_TEMPLATE,
+    DOT_WEBLOC_LINK_TEMPLATE,
     DownloadError,
     encode_compat_str,
     encodeFilename,
@@ -61,6 +64,7 @@
     formatSeconds,
     GeoRestrictedError,
     int_or_none,
+    iri_to_uri,
     ISO3166Utils,
     locked_file,
     make_HTTPS_handler,
@@ -84,6 +88,7 @@
     std_headers,
     str_or_none,
     subtitles_filename,
+    to_high_limit_path,
     UnavailableVideoError,
     url_basename,
     version_tuple,
@@ -187,6 +192,11 @@ class YoutubeDL(object):
     writeannotations:  Write the video annotations to a .annotations.xml file
     writethumbnail:    Write the thumbnail image to a file
     write_all_thumbnails:  Write all thumbnail formats to files
+    writelink:         Write an internet shortcut file, depending on the
+                       current platform (.url/.webloc/.desktop)
+    writeurllink:      Write a Windows internet shortcut file (.url)
+    writewebloclink:   Write a macOS internet shortcut file (.webloc)
+    writedesktoplink:  Write a Linux internet shortcut file (.desktop)
     writesubtitles:    Write the video subtitles to a file
     writeautomaticsub: Write the automatically generated subtitles to a file
     allsubtitles:      Downloads all the subtitles of the video
@@ -1984,6 +1994,57 @@ def dl(name, info, subtitle=False):
 
         self._write_thumbnails(info_dict, filename)
 
+        # Write internet shortcut files
+        url_link = webloc_link = desktop_link = False
+        if self.params.get('writelink', False):
+            if sys.platform == "darwin":  # macOS.
+                webloc_link = True
+            elif sys.platform.startswith("linux"):
+                desktop_link = True
+            else:  # if sys.platform in ['win32', 'cygwin']:
+                url_link = True
+        if self.params.get('writeurllink', False):
+            url_link = True
+        if self.params.get('writewebloclink', False):
+            webloc_link = True
+        if self.params.get('writedesktoplink', False):
+            desktop_link = True
+
+        if url_link or webloc_link or desktop_link:
+            if 'webpage_url' not in info_dict:
+                self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
+                return
+            ascii_url = iri_to_uri(info_dict['webpage_url'])
+
+        def _write_link_file(extension, template, newline, embed_filename):
+            linkfn = replace_extension(filename, extension, info_dict.get('ext'))
+            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
+                self.to_screen('[info] Internet shortcut is already present')
+            else:
+                try:
+                    self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
+                    with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
+                        template_vars = {'url': ascii_url}
+                        if embed_filename:
+                            template_vars['filename'] = linkfn[:-(len(extension) + 1)]
+                        linkfile.write(template % template_vars)
+                except (OSError, IOError):
+                    self.report_error('Cannot write internet shortcut ' + linkfn)
+                    return False
+            return True
+
+        if url_link:
+            if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
+                return
+        if webloc_link:
+            if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
+                return
+        if desktop_link:
+            if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
+                return
+
+        # Download
+        must_record_download_archive = False
         if not self.params.get('skip_download', False):
             try:
                 if info_dict.get('requested_formats') is not None:
index df07016e169154e733e5e8a5cd69d1fd1744cf16..d183016b6ea300599f183c28373d2bd671ac4273 100644 (file)
@@ -389,6 +389,10 @@ def parse_retries(retries):
         'writeinfojson': opts.writeinfojson,
         'writethumbnail': opts.writethumbnail,
         'write_all_thumbnails': opts.write_all_thumbnails,
+        'writelink': opts.writelink,
+        'writeurllink': opts.writeurllink,
+        'writewebloclink': opts.writewebloclink,
+        'writedesktoplink': opts.writedesktoplink,
         'writesubtitles': opts.writesubtitles,
         'writeautomaticsub': opts.writeautomaticsub,
         'allsubtitles': opts.allsubtitles,
index ac889ddd7a72cca7a4e8e0c07f74d992690d06ed..4a69b098fb460ec8fe36d9c0b53e09065ab00a78 100644 (file)
 except ImportError:  # Python 2
     import urllib as compat_urllib_parse
 
+try:
+    import urllib.parse as compat_urlparse
+except ImportError:  # Python 2
+    import urlparse as compat_urlparse
+
 try:
     from urllib.parse import urlparse as compat_urllib_parse_urlparse
 except ImportError:  # Python 2
     from urlparse import urlparse as compat_urllib_parse_urlparse
 
 try:
-    import urllib.parse as compat_urlparse
+    from urllib.parse import urlunparse as compat_urllib_parse_urlunparse
 except ImportError:  # Python 2
-    import urlparse as compat_urlparse
+    from urlparse import urlunparse as compat_urllib_parse_urlunparse
 
 try:
     import urllib.response as compat_urllib_response
@@ -2365,6 +2370,20 @@ class compat_HTMLParseError(Exception):
 except NameError:
     compat_str = str
 
+try:
+    from urllib.parse import quote as compat_urllib_parse_quote
+    from urllib.parse import quote_plus as compat_urllib_parse_quote_plus
+except ImportError:  # Python 2
+    def compat_urllib_parse_quote(string, safe='/'):
+        return compat_urllib_parse.quote(
+            string.encode('utf-8'),
+            str(safe))
+
+    def compat_urllib_parse_quote_plus(string, safe=''):
+        return compat_urllib_parse.quote_plus(
+            string.encode('utf-8'),
+            str(safe))
+
 try:
     from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
     from urllib.parse import unquote as compat_urllib_parse_unquote
@@ -3033,11 +3052,14 @@ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
     'compat_tokenize_tokenize',
     'compat_urllib_error',
     'compat_urllib_parse',
+    'compat_urllib_parse_quote',
+    'compat_urllib_parse_quote_plus',
     'compat_urllib_parse_unquote',
     'compat_urllib_parse_unquote_plus',
     'compat_urllib_parse_unquote_to_bytes',
     'compat_urllib_parse_urlencode',
     'compat_urllib_parse_urlparse',
+    'compat_urllib_parse_urlunparse',
     'compat_urllib_request',
     'compat_urllib_request_DataHandler',
     'compat_urllib_response',
index 44eba3e9c7359cde7f36fcbe01f39183d214186f..bd85abd3a850d229c4e008815c87a66db5a9f589 100644 (file)
@@ -830,7 +830,25 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser):
         action='store_true', dest='list_thumbnails', default=False,
         help='Simulate and list all available thumbnail formats')
 
-    postproc = optparse.OptionGroup(parser, 'Post-processing Options')
+    link = optparse.OptionGroup(parser, 'Internet Shortcut Options')
+    link.add_option(
+        '--write-link',
+        action='store_true', dest='writelink', default=False,
+        help='Write an internet shortcut file, depending on the current platform (.url/.webloc/.desktop). The URL may be cached by the OS.')
+    link.add_option(
+        '--write-url-link',
+        action='store_true', dest='writeurllink', default=False,
+        help='Write a Windows internet shortcut file (.url). Note that the OS caches the URL based on the file path.')
+    link.add_option(
+        '--write-webloc-link',
+        action='store_true', dest='writewebloclink', default=False,
+        help='Write a macOS internet shortcut file (.webloc)')
+    link.add_option(
+        '--write-desktop-link',
+        action='store_true', dest='writedesktoplink', default=False,
+        help='Write a Linux internet shortcut file (.desktop)')
+
+    postproc = optparse.OptionGroup(parser, 'Post-Processing Options')
     postproc.add_option(
         '-x', '--extract-audio',
         action='store_true', dest='extractaudio', default=False,
@@ -932,6 +950,7 @@ def _comma_separated_values_options_callback(option, opt_str, value, parser):
     parser.add_option_group(downloader)
     parser.add_option_group(filesystem)
     parser.add_option_group(thumbnail)
+    parser.add_option_group(link)
     parser.add_option_group(verbosity)
     parser.add_option_group(workarounds)
     parser.add_option_group(video_format)
index 68b4ca944f622711c19ae6a2ff7ea0cfe1c177ea..d814eb2ac8b5e0ed56976ffda1e2c7f0b3453333 100644 (file)
@@ -60,6 +60,9 @@
     compat_urllib_parse,
     compat_urllib_parse_urlencode,
     compat_urllib_parse_urlparse,
+    compat_urllib_parse_urlunparse,
+    compat_urllib_parse_quote,
+    compat_urllib_parse_quote_plus,
     compat_urllib_parse_unquote_plus,
     compat_urllib_request,
     compat_urlparse,
@@ -5714,3 +5717,81 @@ def random_birthday(year_field, month_field, day_field):
         month_field: str(random_date.month),
         day_field: str(random_date.day),
     }
+
+# Templates for internet shortcut files, which are plain text files.
+DOT_URL_LINK_TEMPLATE = '''
+[InternetShortcut]
+URL=%(url)s
+'''.lstrip()
+
+DOT_WEBLOC_LINK_TEMPLATE = '''
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+\t<key>URL</key>
+\t<string>%(url)s</string>
+</dict>
+</plist>
+'''.lstrip()
+
+DOT_DESKTOP_LINK_TEMPLATE = '''
+[Desktop Entry]
+Encoding=UTF-8
+Name=%(filename)s
+Type=Link
+URL=%(url)s
+Icon=text-html
+'''.lstrip()
+
+
+def iri_to_uri(iri):
+    """
+    Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
+
+    The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
+    """
+
+    iri_parts = compat_urllib_parse_urlparse(iri)
+
+    if '[' in iri_parts.netloc:
+        raise ValueError('IPv6 URIs are not, yet, supported.')
+        # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
+
+    # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
+
+    net_location = ''
+    if iri_parts.username:
+        net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
+        if iri_parts.password is not None:
+            net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
+        net_location += '@'
+
+    net_location += iri_parts.hostname.encode('idna').decode('utf-8')  # Punycode for Unicode hostnames.
+    # The 'idna' encoding produces ASCII text.
+    if iri_parts.port is not None and iri_parts.port != 80:
+        net_location += ':' + str(iri_parts.port)
+
+    return compat_urllib_parse_urlunparse(
+        (iri_parts.scheme,
+            net_location,
+
+            compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
+
+            # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
+            compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
+
+            # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
+            compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
+
+            compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
+
+    # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
+
+
+def to_high_limit_path(path):
+    if sys.platform in ['win32', 'cygwin']:
+        # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
+        return r'\\?\ '.rstrip() + os.path.abspath(path)
+
+    return path