]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | from functools import lru_cache | |
3 | from typing import Callable, List | |
4 | ||
5 | from ._cell_widths import CELL_WIDTHS | |
6 | ||
7 | # Regex to match sequence of the most common character ranges | |
8 | _is_single_cell_widths = re.compile("^[\u0020-\u006f\u00a0\u02ff\u0370-\u0482]*$").match | |
9 | ||
10 | ||
11 | @lru_cache(4096) | |
12 | def cached_cell_len(text: str) -> int: | |
13 | """Get the number of cells required to display text. | |
14 | ||
15 | This method always caches, which may use up a lot of memory. It is recommended to use | |
16 | `cell_len` over this method. | |
17 | ||
18 | Args: | |
19 | text (str): Text to display. | |
20 | ||
21 | Returns: | |
22 | int: Get the number of cells required to display text. | |
23 | """ | |
24 | _get_size = get_character_cell_size | |
25 | total_size = sum(_get_size(character) for character in text) | |
26 | return total_size | |
27 | ||
28 | ||
29 | def cell_len(text: str, _cell_len: Callable[[str], int] = cached_cell_len) -> int: | |
30 | """Get the number of cells required to display text. | |
31 | ||
32 | Args: | |
33 | text (str): Text to display. | |
34 | ||
35 | Returns: | |
36 | int: Get the number of cells required to display text. | |
37 | """ | |
38 | if len(text) < 512: | |
39 | return _cell_len(text) | |
40 | _get_size = get_character_cell_size | |
41 | total_size = sum(_get_size(character) for character in text) | |
42 | return total_size | |
43 | ||
44 | ||
45 | @lru_cache(maxsize=4096) | |
46 | def get_character_cell_size(character: str) -> int: | |
47 | """Get the cell size of a character. | |
48 | ||
49 | Args: | |
50 | character (str): A single character. | |
51 | ||
52 | Returns: | |
53 | int: Number of cells (0, 1 or 2) occupied by that character. | |
54 | """ | |
55 | return _get_codepoint_cell_size(ord(character)) | |
56 | ||
57 | ||
58 | @lru_cache(maxsize=4096) | |
59 | def _get_codepoint_cell_size(codepoint: int) -> int: | |
60 | """Get the cell size of a character. | |
61 | ||
62 | Args: | |
63 | codepoint (int): Codepoint of a character. | |
64 | ||
65 | Returns: | |
66 | int: Number of cells (0, 1 or 2) occupied by that character. | |
67 | """ | |
68 | ||
69 | _table = CELL_WIDTHS | |
70 | lower_bound = 0 | |
71 | upper_bound = len(_table) - 1 | |
72 | index = (lower_bound + upper_bound) // 2 | |
73 | while True: | |
74 | start, end, width = _table[index] | |
75 | if codepoint < start: | |
76 | upper_bound = index - 1 | |
77 | elif codepoint > end: | |
78 | lower_bound = index + 1 | |
79 | else: | |
80 | return 0 if width == -1 else width | |
81 | if upper_bound < lower_bound: | |
82 | break | |
83 | index = (lower_bound + upper_bound) // 2 | |
84 | return 1 | |
85 | ||
86 | ||
87 | def set_cell_size(text: str, total: int) -> str: | |
88 | """Set the length of a string to fit within given number of cells.""" | |
89 | ||
90 | if _is_single_cell_widths(text): | |
91 | size = len(text) | |
92 | if size < total: | |
93 | return text + " " * (total - size) | |
94 | return text[:total] | |
95 | ||
96 | if total <= 0: | |
97 | return "" | |
98 | cell_size = cell_len(text) | |
99 | if cell_size == total: | |
100 | return text | |
101 | if cell_size < total: | |
102 | return text + " " * (total - cell_size) | |
103 | ||
104 | start = 0 | |
105 | end = len(text) | |
106 | ||
107 | # Binary search until we find the right size | |
108 | while True: | |
109 | pos = (start + end) // 2 | |
110 | before = text[: pos + 1] | |
111 | before_len = cell_len(before) | |
112 | if before_len == total + 1 and cell_len(before[-1]) == 2: | |
113 | return before[:-1] + " " | |
114 | if before_len == total: | |
115 | return before | |
116 | if before_len > total: | |
117 | end = pos | |
118 | else: | |
119 | start = pos | |
120 | ||
121 | ||
122 | # TODO: This is inefficient | |
123 | # TODO: This might not work with CWJ type characters | |
124 | def chop_cells(text: str, max_size: int, position: int = 0) -> List[str]: | |
125 | """Break text in to equal (cell) length strings, returning the characters in reverse | |
126 | order""" | |
127 | _get_character_cell_size = get_character_cell_size | |
128 | characters = [ | |
129 | (character, _get_character_cell_size(character)) for character in text | |
130 | ] | |
131 | total_size = position | |
132 | lines: List[List[str]] = [[]] | |
133 | append = lines[-1].append | |
134 | ||
135 | for character, size in reversed(characters): | |
136 | if total_size + size > max_size: | |
137 | lines.append([character]) | |
138 | append = lines[-1].append | |
139 | total_size = size | |
140 | else: | |
141 | total_size += size | |
142 | append(character) | |
143 | ||
144 | return ["".join(line) for line in lines] | |
145 | ||
146 | ||
147 | if __name__ == "__main__": # pragma: no cover | |
148 | ||
149 | print(get_character_cell_size("😽")) | |
150 | for line in chop_cells("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", 8): | |
151 | print(line) | |
152 | for n in range(80, 1, -1): | |
153 | print(set_cell_size("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", n) + "|") | |
154 | print("x" * n) |