]> jfr.im git - yt-dlp.git/blame - devscripts/tomlparse.py
[cleanup] Fix misc bugs (#8968)
[yt-dlp.git] / devscripts / tomlparse.py
CommitLineData
fd647775
SS
1#!/usr/bin/env python3
2
3"""
4Simple parser for spec compliant toml files
5
6A simple toml parser for files that comply with the spec.
7Should only be used to parse `pyproject.toml` for `install_deps.py`.
8
9IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED!
10"""
11
12from __future__ import annotations
13
14import datetime
15import json
16import re
17
18WS = r'(?:[\ \t]*)'
19STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'')
20SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+')
21KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*')
22EQUALS_RE = re.compile(rf'={WS}')
23WS_RE = re.compile(WS)
24
25_SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)'
26EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE)
27
28LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*')
29LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+')
30
31
32def parse_key(value: str):
33 for match in SINGLE_KEY_RE.finditer(value):
34 if match[0][0] == '"':
35 yield json.loads(match[0])
36 elif match[0][0] == '\'':
37 yield match[0][1:-1]
38 else:
39 yield match[0]
40
41
42def get_target(root: dict, paths: list[str], is_list=False):
43 target = root
44
45 for index, key in enumerate(paths, 1):
46 use_list = is_list and index == len(paths)
47 result = target.get(key)
48 if result is None:
49 result = [] if use_list else {}
50 target[key] = result
51
52 if isinstance(result, dict):
53 target = result
54 elif use_list:
55 target = {}
56 result.append(target)
57 else:
58 target = result[-1]
59
60 assert isinstance(target, dict)
61 return target
62
63
64def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern):
65 index += 1
66
67 if match := ws_re.match(data, index):
68 index = match.end()
69
70 while data[index] != end:
71 index = yield True, index
72
73 if match := ws_re.match(data, index):
74 index = match.end()
75
76 if data[index] == ',':
77 index += 1
78
79 if match := ws_re.match(data, index):
80 index = match.end()
81
82 assert data[index] == end
83 yield False, index + 1
84
85
86def parse_value(data: str, index: int):
87 if data[index] == '[':
88 result = []
89
90 indices = parse_enclosed(data, index, ']', LIST_WS_RE)
91 valid, index = next(indices)
92 while valid:
93 index, value = parse_value(data, index)
94 result.append(value)
95 valid, index = indices.send(index)
96
97 return index, result
98
99 if data[index] == '{':
100 result = {}
101
102 indices = parse_enclosed(data, index, '}', WS_RE)
103 valid, index = next(indices)
104 while valid:
105 valid, index = indices.send(parse_kv_pair(data, index, result))
106
107 return index, result
108
109 if match := STRING_RE.match(data, index):
110 return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1]
111
112 match = LEFTOVER_VALUE_RE.match(data, index)
113 assert match
114 value = match[0].strip()
115 for func in [
116 int,
117 float,
118 datetime.time.fromisoformat,
119 datetime.date.fromisoformat,
120 datetime.datetime.fromisoformat,
121 {'true': True, 'false': False}.get,
122 ]:
123 try:
124 value = func(value)
125 break
126 except Exception:
127 pass
128
129 return match.end(), value
130
131
132def parse_kv_pair(data: str, index: int, target: dict):
133 match = KEY_RE.match(data, index)
134 if not match:
135 return None
136
137 *keys, key = parse_key(match[0])
138
139 match = EQUALS_RE.match(data, match.end())
140 assert match
141 index = match.end()
142
143 index, value = parse_value(data, index)
144 get_target(target, keys)[key] = value
145 return index
146
147
148def parse_toml(data: str):
149 root = {}
150 target = root
151
152 index = 0
153 while True:
154 match = EXPRESSION_RE.search(data, index)
155 if not match:
156 break
157
158 if match.group('subtable'):
159 index = match.end()
160 path, is_list = match.group('path', 'is_list')
161 target = get_target(root, list(parse_key(path)), bool(is_list))
162 continue
163
164 index = parse_kv_pair(data, match.start(), target)
165 assert index is not None
166
167 return root
168
169
170def main():
171 import argparse
172 from pathlib import Path
173
174 parser = argparse.ArgumentParser()
175 parser.add_argument('infile', type=Path, help='The TOML file to read as input')
176 args = parser.parse_args()
177
178 with args.infile.open('r', encoding='utf-8') as file:
179 data = file.read()
180
181 def default(obj):
182 if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)):
183 return obj.isoformat()
184
185 print(json.dumps(parse_toml(data), default=default))
186
187
188if __name__ == '__main__':
189 main()