]>
Commit | Line | Data |
---|---|---|
fd647775 SS |
1 | #!/usr/bin/env python3 |
2 | ||
3 | """ | |
4 | Simple parser for spec compliant toml files | |
5 | ||
6 | A simple toml parser for files that comply with the spec. | |
7 | Should only be used to parse `pyproject.toml` for `install_deps.py`. | |
8 | ||
9 | IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED! | |
10 | """ | |
11 | ||
12 | from __future__ import annotations | |
13 | ||
14 | import datetime | |
15 | import json | |
16 | import re | |
17 | ||
18 | WS = r'(?:[\ \t]*)' | |
19 | STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'') | |
20 | SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+') | |
21 | KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*') | |
22 | EQUALS_RE = re.compile(rf'={WS}') | |
23 | WS_RE = re.compile(WS) | |
24 | ||
25 | _SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)' | |
26 | EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE) | |
27 | ||
28 | LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*') | |
29 | LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+') | |
30 | ||
31 | ||
32 | def parse_key(value: str): | |
33 | for match in SINGLE_KEY_RE.finditer(value): | |
34 | if match[0][0] == '"': | |
35 | yield json.loads(match[0]) | |
36 | elif match[0][0] == '\'': | |
37 | yield match[0][1:-1] | |
38 | else: | |
39 | yield match[0] | |
40 | ||
41 | ||
42 | def get_target(root: dict, paths: list[str], is_list=False): | |
43 | target = root | |
44 | ||
45 | for index, key in enumerate(paths, 1): | |
46 | use_list = is_list and index == len(paths) | |
47 | result = target.get(key) | |
48 | if result is None: | |
49 | result = [] if use_list else {} | |
50 | target[key] = result | |
51 | ||
52 | if isinstance(result, dict): | |
53 | target = result | |
54 | elif use_list: | |
55 | target = {} | |
56 | result.append(target) | |
57 | else: | |
58 | target = result[-1] | |
59 | ||
60 | assert isinstance(target, dict) | |
61 | return target | |
62 | ||
63 | ||
64 | def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern): | |
65 | index += 1 | |
66 | ||
67 | if match := ws_re.match(data, index): | |
68 | index = match.end() | |
69 | ||
70 | while data[index] != end: | |
71 | index = yield True, index | |
72 | ||
73 | if match := ws_re.match(data, index): | |
74 | index = match.end() | |
75 | ||
76 | if data[index] == ',': | |
77 | index += 1 | |
78 | ||
79 | if match := ws_re.match(data, index): | |
80 | index = match.end() | |
81 | ||
82 | assert data[index] == end | |
83 | yield False, index + 1 | |
84 | ||
85 | ||
86 | def parse_value(data: str, index: int): | |
87 | if data[index] == '[': | |
88 | result = [] | |
89 | ||
90 | indices = parse_enclosed(data, index, ']', LIST_WS_RE) | |
91 | valid, index = next(indices) | |
92 | while valid: | |
93 | index, value = parse_value(data, index) | |
94 | result.append(value) | |
95 | valid, index = indices.send(index) | |
96 | ||
97 | return index, result | |
98 | ||
99 | if data[index] == '{': | |
100 | result = {} | |
101 | ||
102 | indices = parse_enclosed(data, index, '}', WS_RE) | |
103 | valid, index = next(indices) | |
104 | while valid: | |
105 | valid, index = indices.send(parse_kv_pair(data, index, result)) | |
106 | ||
107 | return index, result | |
108 | ||
109 | if match := STRING_RE.match(data, index): | |
110 | return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1] | |
111 | ||
112 | match = LEFTOVER_VALUE_RE.match(data, index) | |
113 | assert match | |
114 | value = match[0].strip() | |
115 | for func in [ | |
116 | int, | |
117 | float, | |
118 | datetime.time.fromisoformat, | |
119 | datetime.date.fromisoformat, | |
120 | datetime.datetime.fromisoformat, | |
121 | {'true': True, 'false': False}.get, | |
122 | ]: | |
123 | try: | |
124 | value = func(value) | |
125 | break | |
126 | except Exception: | |
127 | pass | |
128 | ||
129 | return match.end(), value | |
130 | ||
131 | ||
132 | def parse_kv_pair(data: str, index: int, target: dict): | |
133 | match = KEY_RE.match(data, index) | |
134 | if not match: | |
135 | return None | |
136 | ||
137 | *keys, key = parse_key(match[0]) | |
138 | ||
139 | match = EQUALS_RE.match(data, match.end()) | |
140 | assert match | |
141 | index = match.end() | |
142 | ||
143 | index, value = parse_value(data, index) | |
144 | get_target(target, keys)[key] = value | |
145 | return index | |
146 | ||
147 | ||
148 | def parse_toml(data: str): | |
149 | root = {} | |
150 | target = root | |
151 | ||
152 | index = 0 | |
153 | while True: | |
154 | match = EXPRESSION_RE.search(data, index) | |
155 | if not match: | |
156 | break | |
157 | ||
158 | if match.group('subtable'): | |
159 | index = match.end() | |
160 | path, is_list = match.group('path', 'is_list') | |
161 | target = get_target(root, list(parse_key(path)), bool(is_list)) | |
162 | continue | |
163 | ||
164 | index = parse_kv_pair(data, match.start(), target) | |
165 | assert index is not None | |
166 | ||
167 | return root | |
168 | ||
169 | ||
170 | def main(): | |
171 | import argparse | |
172 | from pathlib import Path | |
173 | ||
174 | parser = argparse.ArgumentParser() | |
175 | parser.add_argument('infile', type=Path, help='The TOML file to read as input') | |
176 | args = parser.parse_args() | |
177 | ||
178 | with args.infile.open('r', encoding='utf-8') as file: | |
179 | data = file.read() | |
180 | ||
181 | def default(obj): | |
182 | if isinstance(obj, (datetime.date, datetime.time, datetime.datetime)): | |
183 | return obj.isoformat() | |
184 | ||
185 | print(json.dumps(parse_toml(data), default=default)) | |
186 | ||
187 | ||
188 | if __name__ == '__main__': | |
189 | main() |