[docs] Improve manpage format (#2003)

[yt-dlp.git] / devscripts / prepare_manpage.py
diff --git a/devscripts/prepare_manpage.py b/devscripts/prepare_manpage.py

index 485b39e9f308c7a7e19a3ab9f93ecc41a89ff7ba..b4446a36880ecc7b24a149f42ce381826e7d52f2 100644 (file)
--- a/devscripts/prepare_manpage.py
+++ b/devscripts/prepare_manpage.py
@@ -13,12 +13,14 @@
  
  # NAME
  
-youtube\-dl \- download videos from youtube.com or other video platforms
+yt\-dlp \- A youtube-dl fork with additional features and patches
  
  # SYNOPSIS
  
  **yt-dlp** \[OPTIONS\] URL [URL...]
  
+# DESCRIPTION
+
  '''
  
  
@@ -33,47 +35,62 @@ def main():
      with io.open(README_FILE, encoding='utf-8') as f:
          readme = f.read()
  
-    readme = re.sub(r'(?s)^.*?(?=# DESCRIPTION)', '', readme)
-    readme = re.sub(r'\s+yt-dlp \[OPTIONS\] URL \[URL\.\.\.\]', '', readme)
-    readme = PREFIX + readme
-
+    readme = filter_excluded_sections(readme)
+    readme = move_sections(readme)
      readme = filter_options(readme)
  
      with io.open(outfile, 'w', encoding='utf-8') as outf:
-        outf.write(readme)
+        outf.write(PREFIX + readme)
+
+
+def filter_excluded_sections(readme):
+    EXCLUDED_SECTION_BEGIN_STRING = re.escape('<!-- MANPAGE: BEGIN EXCLUDED SECTION -->')
+    EXCLUDED_SECTION_END_STRING = re.escape('<!-- MANPAGE: END EXCLUDED SECTION -->')
+    return re.sub(
+        rf'(?s){EXCLUDED_SECTION_BEGIN_STRING}.+?{EXCLUDED_SECTION_END_STRING}\n',
+        '', readme)
+
+
+def move_sections(readme):
+    MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->'
+    sections = re.findall(rf'(?m)^{re.escape(MOVE_TAG_TEMPLATE) % "(.+)"}$', readme)
+
+    for section_name in sections:
+        move_tag = MOVE_TAG_TEMPLATE % section_name
+        if readme.count(move_tag) > 1:
+            raise Exception(f'There is more than one occurrence of "{move_tag}". This is unexpected')
+
+        sections = re.findall(rf'(?sm)(^# {re.escape(section_name)}.+?)(?=^# )', readme)
+        if len(sections) < 1:
+            raise Exception(f'The section {section_name} does not exist')
+        elif len(sections) > 1:
+            raise Exception(f'There are multiple occurrences of section {section_name}, this is unhandled')
+
+        readme = readme.replace(sections[0], '', 1).replace(move_tag, sections[0], 1)
+    return readme
  
  
  def filter_options(readme):
-    ret = ''
-    in_options = False
-    for line in readme.split('\n'):
-        if line.startswith('# '):
-            if line[2:].startswith('OPTIONS'):
-                in_options = True
-            else:
-                in_options = False
-
-        if in_options:
-            if line.lstrip().startswith('-'):
-                split = re.split(r'\s{2,}', line.lstrip())
-                # Description string may start with `-` as well. If there is
-                # only one piece then it's a description bit not an option.
-                if len(split) > 1:
-                    option, description = split
-                    split_option = option.split(' ')
-
-                    if not split_option[-1].startswith('-'):  # metavar
-                        option = ' '.join(split_option[:-1] + ['*%s*' % split_option[-1]])
-
-                    # Pandoc's definition_lists. See http://pandoc.org/README.html
-                    # for more information.
-                    ret += '\n%s\n:   %s\n' % (option, description)
-                    continue
-            ret += line.lstrip() + '\n'
-        else:
-            ret += line + '\n'
-
-    return ret
+    section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0)
+    options = '# OPTIONS\n'
+    for line in section.split('\n')[1:]:
+        if line.lstrip().startswith('-'):
+            split = re.split(r'\s{2,}', line.lstrip())
+            # Description string may start with `-` as well. If there is
+            # only one piece then it's a description bit not an option.
+            if len(split) > 1:
+                option, description = split
+                split_option = option.split(' ')
+
+                if not split_option[-1].startswith('-'):  # metavar
+                    option = ' '.join(split_option[:-1] + [f'*{split_option[-1]}*'])
+
+                # Pandoc's definition_lists. See http://pandoc.org/README.html
+                options += f'\n{option}\n:   {description}\n'
+                continue
+        options += line.lstrip() + '\n'
+
+    return readme.replace(section, options, 1)
  
  
  if __name__ == '__main__':