]>
Commit | Line | Data |
---|---|---|
e0df8241 JR |
1 | import collections |
2 | import functools | |
3 | import os | |
4 | import re | |
5 | import struct | |
6 | import sys | |
7 | import warnings | |
8 | from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple | |
9 | ||
10 | ||
11 | # Python does not provide platform information at sufficient granularity to | |
12 | # identify the architecture of the running executable in some cases, so we | |
13 | # determine it dynamically by reading the information from the running | |
14 | # process. This only applies on Linux, which uses the ELF format. | |
15 | class _ELFFileHeader: | |
16 | # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header | |
17 | class _InvalidELFFileHeader(ValueError): | |
18 | """ | |
19 | An invalid ELF file header was found. | |
20 | """ | |
21 | ||
22 | ELF_MAGIC_NUMBER = 0x7F454C46 | |
23 | ELFCLASS32 = 1 | |
24 | ELFCLASS64 = 2 | |
25 | ELFDATA2LSB = 1 | |
26 | ELFDATA2MSB = 2 | |
27 | EM_386 = 3 | |
28 | EM_S390 = 22 | |
29 | EM_ARM = 40 | |
30 | EM_X86_64 = 62 | |
31 | EF_ARM_ABIMASK = 0xFF000000 | |
32 | EF_ARM_ABI_VER5 = 0x05000000 | |
33 | EF_ARM_ABI_FLOAT_HARD = 0x00000400 | |
34 | ||
35 | def __init__(self, file: IO[bytes]) -> None: | |
36 | def unpack(fmt: str) -> int: | |
37 | try: | |
38 | data = file.read(struct.calcsize(fmt)) | |
39 | result: Tuple[int, ...] = struct.unpack(fmt, data) | |
40 | except struct.error: | |
41 | raise _ELFFileHeader._InvalidELFFileHeader() | |
42 | return result[0] | |
43 | ||
44 | self.e_ident_magic = unpack(">I") | |
45 | if self.e_ident_magic != self.ELF_MAGIC_NUMBER: | |
46 | raise _ELFFileHeader._InvalidELFFileHeader() | |
47 | self.e_ident_class = unpack("B") | |
48 | if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: | |
49 | raise _ELFFileHeader._InvalidELFFileHeader() | |
50 | self.e_ident_data = unpack("B") | |
51 | if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: | |
52 | raise _ELFFileHeader._InvalidELFFileHeader() | |
53 | self.e_ident_version = unpack("B") | |
54 | self.e_ident_osabi = unpack("B") | |
55 | self.e_ident_abiversion = unpack("B") | |
56 | self.e_ident_pad = file.read(7) | |
57 | format_h = "<H" if self.e_ident_data == self.ELFDATA2LSB else ">H" | |
58 | format_i = "<I" if self.e_ident_data == self.ELFDATA2LSB else ">I" | |
59 | format_q = "<Q" if self.e_ident_data == self.ELFDATA2LSB else ">Q" | |
60 | format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q | |
61 | self.e_type = unpack(format_h) | |
62 | self.e_machine = unpack(format_h) | |
63 | self.e_version = unpack(format_i) | |
64 | self.e_entry = unpack(format_p) | |
65 | self.e_phoff = unpack(format_p) | |
66 | self.e_shoff = unpack(format_p) | |
67 | self.e_flags = unpack(format_i) | |
68 | self.e_ehsize = unpack(format_h) | |
69 | self.e_phentsize = unpack(format_h) | |
70 | self.e_phnum = unpack(format_h) | |
71 | self.e_shentsize = unpack(format_h) | |
72 | self.e_shnum = unpack(format_h) | |
73 | self.e_shstrndx = unpack(format_h) | |
74 | ||
75 | ||
76 | def _get_elf_header() -> Optional[_ELFFileHeader]: | |
77 | try: | |
78 | with open(sys.executable, "rb") as f: | |
79 | elf_header = _ELFFileHeader(f) | |
80 | except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): | |
81 | return None | |
82 | return elf_header | |
83 | ||
84 | ||
85 | def _is_linux_armhf() -> bool: | |
86 | # hard-float ABI can be detected from the ELF header of the running | |
87 | # process | |
88 | # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf | |
89 | elf_header = _get_elf_header() | |
90 | if elf_header is None: | |
91 | return False | |
92 | result = elf_header.e_ident_class == elf_header.ELFCLASS32 | |
93 | result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB | |
94 | result &= elf_header.e_machine == elf_header.EM_ARM | |
95 | result &= ( | |
96 | elf_header.e_flags & elf_header.EF_ARM_ABIMASK | |
97 | ) == elf_header.EF_ARM_ABI_VER5 | |
98 | result &= ( | |
99 | elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD | |
100 | ) == elf_header.EF_ARM_ABI_FLOAT_HARD | |
101 | return result | |
102 | ||
103 | ||
104 | def _is_linux_i686() -> bool: | |
105 | elf_header = _get_elf_header() | |
106 | if elf_header is None: | |
107 | return False | |
108 | result = elf_header.e_ident_class == elf_header.ELFCLASS32 | |
109 | result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB | |
110 | result &= elf_header.e_machine == elf_header.EM_386 | |
111 | return result | |
112 | ||
113 | ||
114 | def _have_compatible_abi(arch: str) -> bool: | |
115 | if arch == "armv7l": | |
116 | return _is_linux_armhf() | |
117 | if arch == "i686": | |
118 | return _is_linux_i686() | |
119 | return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} | |
120 | ||
121 | ||
122 | # If glibc ever changes its major version, we need to know what the last | |
123 | # minor version was, so we can build the complete list of all versions. | |
124 | # For now, guess what the highest minor version might be, assume it will | |
125 | # be 50 for testing. Once this actually happens, update the dictionary | |
126 | # with the actual value. | |
127 | _LAST_GLIBC_MINOR: Dict[int, int] = collections.defaultdict(lambda: 50) | |
128 | ||
129 | ||
130 | class _GLibCVersion(NamedTuple): | |
131 | major: int | |
132 | minor: int | |
133 | ||
134 | ||
135 | def _glibc_version_string_confstr() -> Optional[str]: | |
136 | """ | |
137 | Primary implementation of glibc_version_string using os.confstr. | |
138 | """ | |
139 | # os.confstr is quite a bit faster than ctypes.DLL. It's also less likely | |
140 | # to be broken or missing. This strategy is used in the standard library | |
141 | # platform module. | |
142 | # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 | |
143 | try: | |
144 | # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". | |
145 | version_string = os.confstr("CS_GNU_LIBC_VERSION") | |
146 | assert version_string is not None | |
147 | _, version = version_string.split() | |
148 | except (AssertionError, AttributeError, OSError, ValueError): | |
149 | # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... | |
150 | return None | |
151 | return version | |
152 | ||
153 | ||
154 | def _glibc_version_string_ctypes() -> Optional[str]: | |
155 | """ | |
156 | Fallback implementation of glibc_version_string using ctypes. | |
157 | """ | |
158 | try: | |
159 | import ctypes | |
160 | except ImportError: | |
161 | return None | |
162 | ||
163 | # ctypes.CDLL(None) internally calls dlopen(NULL), and as the dlopen | |
164 | # manpage says, "If filename is NULL, then the returned handle is for the | |
165 | # main program". This way we can let the linker do the work to figure out | |
166 | # which libc our process is actually using. | |
167 | # | |
168 | # We must also handle the special case where the executable is not a | |
169 | # dynamically linked executable. This can occur when using musl libc, | |
170 | # for example. In this situation, dlopen() will error, leading to an | |
171 | # OSError. Interestingly, at least in the case of musl, there is no | |
172 | # errno set on the OSError. The single string argument used to construct | |
173 | # OSError comes from libc itself and is therefore not portable to | |
174 | # hard code here. In any case, failure to call dlopen() means we | |
175 | # can proceed, so we bail on our attempt. | |
176 | try: | |
177 | process_namespace = ctypes.CDLL(None) | |
178 | except OSError: | |
179 | return None | |
180 | ||
181 | try: | |
182 | gnu_get_libc_version = process_namespace.gnu_get_libc_version | |
183 | except AttributeError: | |
184 | # Symbol doesn't exist -> therefore, we are not linked to | |
185 | # glibc. | |
186 | return None | |
187 | ||
188 | # Call gnu_get_libc_version, which returns a string like "2.5" | |
189 | gnu_get_libc_version.restype = ctypes.c_char_p | |
190 | version_str: str = gnu_get_libc_version() | |
191 | # py2 / py3 compatibility: | |
192 | if not isinstance(version_str, str): | |
193 | version_str = version_str.decode("ascii") | |
194 | ||
195 | return version_str | |
196 | ||
197 | ||
198 | def _glibc_version_string() -> Optional[str]: | |
199 | """Returns glibc version string, or None if not using glibc.""" | |
200 | return _glibc_version_string_confstr() or _glibc_version_string_ctypes() | |
201 | ||
202 | ||
203 | def _parse_glibc_version(version_str: str) -> Tuple[int, int]: | |
204 | """Parse glibc version. | |
205 | ||
206 | We use a regexp instead of str.split because we want to discard any | |
207 | random junk that might come after the minor version -- this might happen | |
208 | in patched/forked versions of glibc (e.g. Linaro's version of glibc | |
209 | uses version strings like "2.20-2014.11"). See gh-3588. | |
210 | """ | |
211 | m = re.match(r"(?P<major>[0-9]+)\.(?P<minor>[0-9]+)", version_str) | |
212 | if not m: | |
213 | warnings.warn( | |
214 | "Expected glibc version with 2 components major.minor," | |
215 | " got: %s" % version_str, | |
216 | RuntimeWarning, | |
217 | ) | |
218 | return -1, -1 | |
219 | return int(m.group("major")), int(m.group("minor")) | |
220 | ||
221 | ||
222 | @functools.lru_cache() | |
223 | def _get_glibc_version() -> Tuple[int, int]: | |
224 | version_str = _glibc_version_string() | |
225 | if version_str is None: | |
226 | return (-1, -1) | |
227 | return _parse_glibc_version(version_str) | |
228 | ||
229 | ||
230 | # From PEP 513, PEP 600 | |
231 | def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: | |
232 | sys_glibc = _get_glibc_version() | |
233 | if sys_glibc < version: | |
234 | return False | |
235 | # Check for presence of _manylinux module. | |
236 | try: | |
237 | import _manylinux # noqa | |
238 | except ImportError: | |
239 | return True | |
240 | if hasattr(_manylinux, "manylinux_compatible"): | |
241 | result = _manylinux.manylinux_compatible(version[0], version[1], arch) | |
242 | if result is not None: | |
243 | return bool(result) | |
244 | return True | |
245 | if version == _GLibCVersion(2, 5): | |
246 | if hasattr(_manylinux, "manylinux1_compatible"): | |
247 | return bool(_manylinux.manylinux1_compatible) | |
248 | if version == _GLibCVersion(2, 12): | |
249 | if hasattr(_manylinux, "manylinux2010_compatible"): | |
250 | return bool(_manylinux.manylinux2010_compatible) | |
251 | if version == _GLibCVersion(2, 17): | |
252 | if hasattr(_manylinux, "manylinux2014_compatible"): | |
253 | return bool(_manylinux.manylinux2014_compatible) | |
254 | return True | |
255 | ||
256 | ||
257 | _LEGACY_MANYLINUX_MAP = { | |
258 | # CentOS 7 w/ glibc 2.17 (PEP 599) | |
259 | (2, 17): "manylinux2014", | |
260 | # CentOS 6 w/ glibc 2.12 (PEP 571) | |
261 | (2, 12): "manylinux2010", | |
262 | # CentOS 5 w/ glibc 2.5 (PEP 513) | |
263 | (2, 5): "manylinux1", | |
264 | } | |
265 | ||
266 | ||
267 | def platform_tags(linux: str, arch: str) -> Iterator[str]: | |
268 | if not _have_compatible_abi(arch): | |
269 | return | |
270 | # Oldest glibc to be supported regardless of architecture is (2, 17). | |
271 | too_old_glibc2 = _GLibCVersion(2, 16) | |
272 | if arch in {"x86_64", "i686"}: | |
273 | # On x86/i686 also oldest glibc to be supported is (2, 5). | |
274 | too_old_glibc2 = _GLibCVersion(2, 4) | |
275 | current_glibc = _GLibCVersion(*_get_glibc_version()) | |
276 | glibc_max_list = [current_glibc] | |
277 | # We can assume compatibility across glibc major versions. | |
278 | # https://sourceware.org/bugzilla/show_bug.cgi?id=24636 | |
279 | # | |
280 | # Build a list of maximum glibc versions so that we can | |
281 | # output the canonical list of all glibc from current_glibc | |
282 | # down to too_old_glibc2, including all intermediary versions. | |
283 | for glibc_major in range(current_glibc.major - 1, 1, -1): | |
284 | glibc_minor = _LAST_GLIBC_MINOR[glibc_major] | |
285 | glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) | |
286 | for glibc_max in glibc_max_list: | |
287 | if glibc_max.major == too_old_glibc2.major: | |
288 | min_minor = too_old_glibc2.minor | |
289 | else: | |
290 | # For other glibc major versions oldest supported is (x, 0). | |
291 | min_minor = -1 | |
292 | for glibc_minor in range(glibc_max.minor, min_minor, -1): | |
293 | glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) | |
294 | tag = "manylinux_{}_{}".format(*glibc_version) | |
295 | if _is_compatible(tag, arch, glibc_version): | |
296 | yield linux.replace("linux", tag) | |
297 | # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. | |
298 | if glibc_version in _LEGACY_MANYLINUX_MAP: | |
299 | legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] | |
300 | if _is_compatible(legacy_tag, arch, glibc_version): | |
301 | yield linux.replace("linux", legacy_tag) |