Page MenuHomeFreeBSD

D41855.1777880039.diff
No OneTemporary

Size
38 KB
Referenced Files
None
Subscribers
None

D41855.1777880039.diff

diff --git a/lib/libc/amd64/amd64_archlevel.h b/lib/libc/amd64/amd64_archlevel.h
deleted file mode 100644
--- a/lib/libc/amd64/amd64_archlevel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*-
- * Copyright (c) 2023 The FreeBSD Foundation
- *
- * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
- * under sponsorship from the FreeBSD Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE
- */
-
-/* must be macros so they can be accessed from assembly */
-#define X86_64_SCALAR 0 /* disable SIMD optimisations */
-#define X86_64_BASELINE 1 /* CMOV, CX8, FPU, FXSR, MMX, OSFXSR, SSE, SSE2 */
-#define X86_64_V2 2 /* CMPXCHG16B, LAHF-SAHF, POPCNT, SSE3, SSSE3, SSE4_1, SSE4_2 */
-#define X86_64_V3 3 /* AVX, AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE, OSXSAVE */
-#define X86_64_V4 4 /* AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL */
-
-#define X86_64_MAX X86_64_V4 /* highest supported architecture level */
-#define X86_64_UNDEFINED -1 /* architecture level not set yet */
-
-#ifndef __ASSEMBLER__
-#include <dlfcn.h>
-
-dlfunc_t __archlevel_resolve(u_int, u_int, u_int, u_int,
- int32_t[X86_64_MAX + 1]) __hidden;
-#else
-#include <machine/asm.h>
-
-#define ARCHRESOLVE(func) \
- .globl CNAME(func); \
- .type CNAME(func), @gnu_indirect_function; \
- .set CNAME(func), __CONCAT(func,_resolver); \
- ARCHENTRY(func, resolver); \
- lea __CONCAT(func,_funcs)(%rip), %r8; \
- jmp CNAME(__archlevel_resolve); \
- ARCHEND(func, resolver)
-
-/*
- * The func_funcs array stores the location of the implementations
- * as the distance from the func_funcs array to the function. Due
- * to compiling for the medium code model, a 32 bit integer suffices
- * to hold the distance.
- *
- * Doing it this way both saves storage and avoids giving rtld
- * relocations to process at load time.
- */
-#define ARCHFUNCS(func) \
- ARCHRESOLVE(func); \
- .section .rodata; \
- .align 4; \
- __CONCAT(func,_funcs):
-
-#define NOARCHFUNC \
- .4byte 0
-
-#define ARCHFUNC(func, level) \
- .4byte __CONCAT(__CONCAT(func,_),level) - __CONCAT(func,_funcs)
-
-#define ENDARCHFUNCS(func) \
- .zero 4*(X86_64_MAX+1)-(.-__CONCAT(func,_funcs)); \
- .size __CONCAT(func,_funcs), .-__CONCAT(func,_funcs)
-
-#define ARCHENTRY(func, level) \
- _START_ENTRY; \
- .type __CONCAT(__CONCAT(func,_),level), @function; \
- __CONCAT(__CONCAT(func,_),level):; \
- .cfi_startproc
-
-#define ARCHEND(func, level) \
- END(__CONCAT(__CONCAT(func,_),level))
-
-#endif /* __ASSEMBLER__ */
diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc
--- a/lib/libc/amd64/string/Makefile.inc
+++ b/lib/libc/amd64/string/Makefile.inc
@@ -1,14 +1,11 @@
MDSRCS+= \
- amd64_archlevel.c \
bcmp.S \
memcmp.S \
memcpy.S \
memmove.S \
memset.S \
- stpcpy.S \
strcat.S \
- strchrnul.S \
strcmp.S \
strlen.S \
- strcpy.c
+ stpcpy.S
diff --git a/lib/libc/amd64/string/amd64_archlevel.c b/lib/libc/amd64/string/amd64_archlevel.c
deleted file mode 100644
--- a/lib/libc/amd64/string/amd64_archlevel.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*-
- * Copyright (c) 2023 The FreeBSD Foundation
- *
- * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
- * under sponsorship from the FreeBSD Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE
- */
-
-#include <sys/types.h>
-
-#include <machine/atomic.h>
-#include <machine/cpufunc.h>
-#include <machine/specialreg.h>
-
-#include <stddef.h>
-#include <string.h>
-
-#include "amd64_archlevel.h"
-#include "libc_private.h"
-
-#define ARCHLEVEL_ENV "ARCHLEVEL"
-
-static volatile int amd64_archlevel = X86_64_UNDEFINED;
-
-static const struct archlevel {
- char name[10];
- /* CPUID feature bits that need to be present */
- u_int feat_edx, feat_ecx, amd_ecx, ext_ebx;
-} levels[] = {
- {
- .name = "scalar",
- .feat_edx = 0,
- .feat_ecx = 0,
- .amd_ecx = 0,
- .ext_ebx = 0,
- }, {
-#define FEAT_EDX_BASELINE (CPUID_FPU | CPUID_CX8 | CPUID_CMOV | CPUID_MMX | \
- CPUID_FXSR | CPUID_SSE | CPUID_SSE2)
- .name = "baseline",
- .feat_edx = FEAT_EDX_BASELINE,
- .feat_ecx = 0,
- .amd_ecx = 0,
- .ext_ebx = 0,
- }, {
-#define FEAT_ECX_V2 (CPUID2_SSE3 | CPUID2_SSSE3 | CPUID2_CX16 | CPUID2_SSE41 | \
- CPUID2_SSE42 | CPUID2_POPCNT)
-#define AMD_ECX_V2 AMDID2_LAHF
- .name = "x86-64-v2",
- .feat_edx = FEAT_EDX_BASELINE,
- .feat_ecx = FEAT_ECX_V2,
- .amd_ecx = AMD_ECX_V2,
- .ext_ebx = 0,
- }, {
-#define FEAT_ECX_V3 (FEAT_ECX_V2 | CPUID2_FMA | CPUID2_MOVBE | \
- CPUID2_OSXSAVE | CPUID2_AVX | CPUID2_F16C)
-#define AMD_ECX_V3 (AMD_ECX_V2 | AMDID2_ABM)
-#define EXT_EBX_V3 (CPUID_STDEXT_BMI1 | CPUID_STDEXT_AVX2 | CPUID_STDEXT_BMI2)
- .name = "x86-64-v3",
- .feat_edx = FEAT_EDX_BASELINE,
- .feat_ecx = FEAT_ECX_V3,
- .amd_ecx = AMD_ECX_V3,
- .ext_ebx = EXT_EBX_V3,
- }, {
-#define EXT_EBX_V4 (EXT_EBX_V3 | CPUID_STDEXT_AVX512F | \
- CPUID_STDEXT_AVX512DQ | CPUID_STDEXT_AVX512CD | \
- CPUID_STDEXT_AVX512BW | CPUID_STDEXT_AVX512VL)
- .name = "x86-64-v4",
- .feat_edx = FEAT_EDX_BASELINE,
- .feat_ecx = FEAT_ECX_V3,
- .amd_ecx = AMD_ECX_V3,
- .ext_ebx = EXT_EBX_V4,
- }
-};
-
-static int
-supported_archlevel(u_int feat_edx, u_int feat_ecx, u_int ext_ebx, u_int ext_ecx)
-{
- int level;
- u_int p[4], max_leaf;
- u_int amd_ecx = 0;
-
- (void)ext_ecx;
-
- do_cpuid(0x80000000, p);
- max_leaf = p[0];
-
- if (max_leaf >= 0x80000001) {
- do_cpuid(0x80000001, p);
- amd_ecx = p[2];
- }
-
- for (level = X86_64_BASELINE; level <= X86_64_MAX; level++) {
- const struct archlevel *lvl = &levels[level];
-
- if ((lvl->feat_edx & feat_edx) != lvl->feat_edx ||
- (lvl->feat_ecx & feat_ecx) != lvl->feat_ecx ||
- (lvl->amd_ecx & amd_ecx) != lvl->amd_ecx ||
- (lvl->ext_ebx & ext_ebx) != lvl->ext_ebx)
- return (level - 1);
- }
-
- return (X86_64_MAX);
-}
-
-static int
-match_archlevel(const char *str, int *force)
-{
- int level, want_force = 0;
-
- *force = 0;
-
- if (str[0] == '!') {
- str++;
- want_force = 1;
- }
-
- for (level = 0; level <= X86_64_MAX; level++) {
- size_t i;
- const char *candidate = levels[level].name;
-
- /* can't use strcmp here: would recurse during ifunc resolution */
- for (i = 0; str[i] == candidate[i]; i++)
- /* suffixes starting with : or + are ignored for future extensions */
- if (str[i] == '\0' || str[i] == ':' || str[i] == '+') {
- if (want_force)
- *force = 1;
-
- return (level);
- }
- }
-
- return (X86_64_UNDEFINED);
-}
-
-/*
- * We can't use getenv(), strcmp(), and a bunch of other functions here as
- * they may in turn call SIMD-optimised string functions.
- *
- * *force is set to 1 if the architecture level is valid and begins with a !
- * and to 0 otherwise.
- */
-static int
-env_archlevel(int *force)
-{
- size_t i;
-
- if (environ == NULL)
- return (X86_64_UNDEFINED);
-
- for (i = 0; environ[i] != NULL; i++) {
- size_t j;
-
- for (j = 0; environ[i][j] == ARCHLEVEL_ENV "="[j]; j++)
- if (environ[i][j] == '=')
- return (match_archlevel(&environ[i][j + 1], force));
- }
-
- *force = 0;
-
- return (X86_64_UNDEFINED);
-
-}
-
-/*
- * Determine the architecture level by checking the CPU capabilities
- * and the environment:
- *
- * 1. If environment variable ARCHLEVEL starts with a ! and is followed
- * by a valid architecture level, that level is returned.
- * 2. Else if ARCHLEVEL is set to a valid architecture level that is
- * supported by the CPU, that level is returned.
- * 3. Else the highest architecture level supported by the CPU is
- * returned.
- *
- * Valid architecture levels are those defined in the levels array.
- * The architecture level "scalar" indicates that SIMD enhancements
- * shall not be used.
- */
-static int
-archlevel(u_int feat_edx, u_int feat_ecx, u_int ext_ebx, u_int ext_ecx)
-{
- int islevel, wantlevel, hwlevel, force;
-
- islevel = atomic_load_int(&amd64_archlevel);
- if (islevel != X86_64_UNDEFINED)
- return (islevel);
-
- wantlevel = env_archlevel(&force);
- if (!force) {
- hwlevel = supported_archlevel(feat_edx, feat_ecx, ext_ebx, ext_ecx);
- if (wantlevel == X86_64_UNDEFINED || wantlevel > hwlevel)
- wantlevel = hwlevel;
- }
-
- /*
- * Ensure amd64_archlevel is set only once and
- * all calls agree on what it was set to.
- */
- if (atomic_cmpset_int(&amd64_archlevel, islevel, wantlevel))
- return (wantlevel);
- else
- return (atomic_load_int(&amd64_archlevel));
-}
-
-/*
- * Helper function for SIMD ifunc dispatch: select the highest level
- * implementation up to the current architecture level.
- */
-dlfunc_t
-__archlevel_resolve(u_int feat_edx, u_int feat_ecx, u_int ext_ebx,
- u_int ext_ecx, int32_t funcs[static X86_64_MAX + 1])
-{
- int level;
-
- for (level = archlevel(feat_edx, feat_ecx, ext_ebx, ext_ecx); level >= 0; level--)
- if (funcs[level] != 0)
- return (dlfunc_t)((uintptr_t)funcs + (ptrdiff_t)funcs[level]);
-
- /* no function is present -- what now? */
- __builtin_trap();
-}
diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S
--- a/lib/libc/amd64/string/memcmp.S
+++ b/lib/libc/amd64/string/memcmp.S
@@ -1,12 +1,9 @@
/*-
- * Copyright (c) 2018, 2023 The FreeBSD Foundation
+ * Copyright (c) 2018 The FreeBSD Foundation
*
* This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
- * Portions of this software were developed by Robert Clausecker
- * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
- *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -30,10 +27,6 @@
*/
#include <machine/asm.h>
-#include <machine/param.h>
-
-#include "amd64_archlevel.h"
-
/*
* Note: this routine was written with kernel use in mind (read: no simd),
* it is only present in userspace as a temporary measure until something
@@ -43,15 +36,10 @@
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
#ifdef BCMP
-#define memcmp bcmp
+ENTRY(bcmp)
+#else
+ENTRY(memcmp)
#endif
-
-ARCHFUNCS(memcmp)
- ARCHFUNC(memcmp, scalar)
- ARCHFUNC(memcmp, baseline)
-ENDARCHFUNCS(memcmp)
-
-ARCHENTRY(memcmp, scalar)
xorl %eax,%eax
10:
cmpq $16,%rdx
@@ -169,6 +157,7 @@
1:
leal 1(%eax),%eax
ret
+END(bcmp)
#else
/*
* We need to compute the difference between strings.
@@ -241,165 +230,7 @@
2:
subl %r8d,%eax
ret
+END(memcmp)
#endif
-ARCHEND(memcmp, scalar)
-
-ARCHENTRY(memcmp, baseline)
- cmp $32, %rdx # enough to permit use of the long kernel?
- ja .Llong
-
- test %rdx, %rdx # zero bytes buffer?
- je .L0
-
- /*
- * Compare strings of 1--32 bytes. We want to do this by
- * loading into two xmm registers and then comparing. To avoid
- * crossing into unmapped pages, we either load 32 bytes from
- * the start of the buffer or 32 bytes before its end, depending
- * on whether there is a page boundary between the overread area
- * or not.
- */
-
- /* check for page boundaries overreads */
- lea 31(%rdi), %eax # end of overread
- lea 31(%rsi), %r8d
- lea -1(%rdi, %rdx, 1), %ecx # last character in buffer
- lea -1(%rsi, %rdx, 1), %r9d
- xor %ecx, %eax
- xor %r9d, %r8d
- test $PAGE_SIZE, %eax # are they on different pages?
- jz 0f
-
- /* fix up rdi */
- movdqu -32(%rdi, %rdx, 1), %xmm0
- movdqu -16(%rdi, %rdx, 1), %xmm1
- lea -8(%rsp), %rdi # end of replacement buffer
- sub %rdx, %rdi # start of replacement buffer
- movdqa %xmm0, -40(%rsp) # copy to replacement buffer
- movdqa %xmm1, -24(%rsp)
-
-0: test $PAGE_SIZE, %r8d
- jz 0f
-
- /* fix up rsi */
- movdqu -32(%rsi, %rdx, 1), %xmm0
- movdqu -16(%rsi, %rdx, 1), %xmm1
- lea -40(%rsp), %rsi # end of replacement buffer
- sub %rdx, %rsi # start of replacement buffer
- movdqa %xmm0, -72(%rsp) # copy to replacement buffer
- movdqa %xmm1, -56(%rsp)
-
- /* load data and compare properly */
-0: movdqu 16(%rdi), %xmm1
- movdqu 16(%rsi), %xmm3
- movdqu (%rdi), %xmm0
- movdqu (%rsi), %xmm2
- mov %edx, %ecx
- mov $-1, %edx
- shl %cl, %rdx # ones where the buffer is not
- pcmpeqb %xmm3, %xmm1
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm1, %ecx
- pmovmskb %xmm0, %eax
- shl $16, %ecx
- or %ecx, %eax # ones where the buffers match
- or %edx, %eax # including where the buffer is not
- not %eax # ones where there is a mismatch
-#ifndef BCMP
- bsf %eax, %edx # location of the first mismatch
- cmovz %eax, %edx # including if there is no mismatch
- movzbl (%rdi, %rdx, 1), %eax # mismatching bytes
- movzbl (%rsi, %rdx, 1), %edx
- sub %edx, %eax
-#endif
- ret
-
- /* empty input */
-.L0: xor %eax, %eax
- ret
-
- /* compare 33+ bytes */
- ALIGN_TEXT
-.Llong: movdqu (%rdi), %xmm0 # load head
- movdqu (%rsi), %xmm2
- mov %rdi, %rcx
- sub %rdi, %rsi # express rsi as distance from rdi
- and $~0xf, %rdi # align rdi to 16 bytes
- movdqu 16(%rsi, %rdi, 1), %xmm1
- pcmpeqb 16(%rdi), %xmm1 # compare second half of this iteration
- add %rcx, %rdx # pointer to last byte in buffer
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm0, %eax
- xor $0xffff, %eax # any mismatch?
- jne .Lmismatch_head
- add $64, %rdi # advance to next iteration
- jmp 1f # and get going with the loop
-
- /* process buffer 32 bytes at a time */
- ALIGN_TEXT
-0: movdqu -32(%rsi, %rdi, 1), %xmm0
- movdqu -16(%rsi, %rdi, 1), %xmm1
- pcmpeqb -32(%rdi), %xmm0
- pcmpeqb -16(%rdi), %xmm1
- add $32, %rdi # advance to next iteration
-1: pand %xmm0, %xmm1 # 0xff where both halves matched
- pmovmskb %xmm1, %eax
- cmp $0xffff, %eax # all bytes matched?
- jne .Lmismatch
- cmp %rdx, %rdi # end of buffer reached?
- jb 0b
-
- /* less than 32 bytes left to compare */
- movdqu -16(%rdx), %xmm1 # load 32 byte tail through end pointer
- movdqu -16(%rdx, %rsi, 1), %xmm3
- movdqu -32(%rdx), %xmm0
- movdqu -32(%rdx, %rsi, 1), %xmm2
- pcmpeqb %xmm3, %xmm1
- pcmpeqb %xmm2, %xmm0
- pmovmskb %xmm1, %ecx
- pmovmskb %xmm0, %eax
- shl $16, %ecx
- or %ecx, %eax # ones where the buffers match
- not %eax # ones where there is a mismatch
-#ifndef BCMP
- bsf %eax, %ecx # location of the first mismatch
- cmovz %eax, %ecx # including if there is no mismatch
- add %rcx, %rdx # pointer to potential mismatch
- movzbl -32(%rdx), %eax # mismatching bytes
- movzbl -32(%rdx, %rsi, 1), %edx
- sub %edx, %eax
-#endif
- ret
-
-#ifdef BCMP
-.Lmismatch:
- mov $1, %eax
-.Lmismatch_head:
- ret
-#else /* memcmp */
-.Lmismatch_head:
- tzcnt %eax, %eax # location of mismatch
- add %rax, %rcx # pointer to mismatch
- movzbl (%rcx), %eax # mismatching bytes
- movzbl (%rcx, %rsi, 1), %ecx
- sub %ecx, %eax
- ret
-
-.Lmismatch:
- movdqu -48(%rsi, %rdi, 1), %xmm1
- pcmpeqb -48(%rdi), %xmm1 # reconstruct xmm1 before PAND
- pmovmskb %xmm0, %eax # mismatches in first 16 bytes
- pmovmskb %xmm1, %edx # mismatches in second 16 bytes
- shl $16, %edx
- or %edx, %eax # mismatches in both
- not %eax # matches in both
- tzcnt %eax, %eax # location of mismatch
- add %rax, %rdi # pointer to mismatch
- movzbl -64(%rdi), %eax # mismatching bytes
- movzbl -64(%rdi, %rsi, 1), %ecx
- sub %ecx, %eax
- ret
-#endif
-ARCHEND(memcmp, baseline)
.section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/amd64/string/stpcpy.S b/lib/libc/amd64/string/stpcpy.S
--- a/lib/libc/amd64/string/stpcpy.S
+++ b/lib/libc/amd64/string/stpcpy.S
@@ -1,30 +1,10 @@
-/*-
- * Copyright (c) 2023, The FreeBSD Foundation
- *
- * SPDX-License-Expression: BSD-2-Clause
- *
- * Portions of this software were developed by Robert Clausecker
- * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
- *
- * Adapted from NetBSD's common/lib/libc/arch/x86_64/string/strcpy.S
- * written by J.T. Conklin <jtc@acorntoolworks.com> and
- * adapted by Guillaume Morin <guillaume@morinfr.org> to implement stpcpy
- * that was originally dedicated to the public domain
+/*
+ * Adapted by Guillaume Morin <guillaume@morinfr.org> from strcpy.S
+ * written by J.T. Conklin <jtc@acorntoolworks.com>
+ * Public domain.
*/
#include <machine/asm.h>
-
-#include "amd64_archlevel.h"
-
-#define ALIGN_TEXT .p2align 4, 0x90
-
- .weak stpcpy
- .set stpcpy, __stpcpy
-ARCHFUNCS(__stpcpy)
- ARCHFUNC(__stpcpy, scalar)
- ARCHFUNC(__stpcpy, baseline)
-ENDARCHFUNCS(__stpcpy)
-
/*
* This stpcpy implementation copies a byte at a time until the
* source pointer is aligned to a word boundary, it then copies by
@@ -38,7 +18,9 @@
* requirements.
*/
-ARCHENTRY(__stpcpy, scalar)
+ .globl stpcpy,__stpcpy
+ENTRY(stpcpy)
+__stpcpy:
movabsq $0x0101010101010101,%r8
movabsq $0x8080808080808080,%r9
@@ -59,7 +41,7 @@
dec %rax
ret
- ALIGN_TEXT
+ .p2align 4
.Lloop:
movq %rdx,(%rdi)
addq $8,%rdi
@@ -127,111 +109,6 @@
.Ldone:
movq %rdi,%rax
ret
-ARCHEND(__stpcpy, scalar)
-
-ARCHENTRY(__stpcpy, baseline)
- mov %esi, %ecx
- mov %rdi, %rdx
- sub %rsi, %rdi # express destination as distance to surce
- and $~0xf, %rsi # align source to 16 byte
- movdqa (%rsi), %xmm0 # head of string with junk before
- pxor %xmm1, %xmm1
- and $0xf, %ecx # misalignment in bytes
- pcmpeqb %xmm1, %xmm0 # NUL byte present?
- pmovmskb %xmm0, %eax
- shr %cl, %eax # clear out matches in junk bytes
- bsf %eax, %eax # find match if any
- jnz .Lrunt
-
- /* first normal iteration: write head back if it succeeds */
- movdqa 16(%rsi), %xmm0 # 16 bytes of current iteration
- movdqu (%rsi, %rcx, 1), %xmm2 # first 16 bytes of the string
- pcmpeqb %xmm0, %xmm1 # NUL byte present?
- pmovmskb %xmm1, %eax
- test %eax, %eax # find match if any
- jnz .Lshorty
-
- movdqu %xmm2, (%rdx) # store beginning of string
-
- /* main loop, unrolled twice */
- ALIGN_TEXT
-0: movdqa 32(%rsi), %xmm2 # load current iteraion
- movdqu %xmm0, 16(%rsi, %rdi, 1) # write back previous iteraion
- pxor %xmm1, %xmm1
- add $32, %rsi
- pcmpeqb %xmm2, %xmm1 # NUL byte present?
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jnz 1f
-
- movdqa 16(%rsi), %xmm0 # load current iteraion
- movdqu %xmm2, (%rsi, %rdi, 1) # write back previous iteraion
- pxor %xmm1, %xmm1
- pcmpeqb %xmm0, %xmm1 # NUL byte present?
- pmovmskb %xmm1, %eax
- test %eax, %eax
- jz 0b
-
- /* end of string after main loop has iterated */
- add $16, %rsi # advance rsi to second unrolled half
-1: tzcnt %eax, %eax # find location of match
- # (behaves as bsf on pre-x86-64-v3 CPUs)
- add %rsi, %rax # point to NUL byte
- movdqu -15(%rax), %xmm0 # last 16 bytes of string
- movdqu %xmm0, -15(%rax, %rdi, 1) # copied to destination
- add %rdi, %rax # point to destination's NUL byte
- ret
-
- /* NUL encountered in second iteration */
-.Lshorty:
- tzcnt %eax, %eax
- add $16, %eax # account for length of first iteration
- sub %ecx, %eax # but not the parts before the string
-
- /* NUL encountered in first iteration */
-.Lrunt: lea 1(%rax), %edi # string length including NUL byte
- add %rcx, %rsi # point to beginning of string
- add %rdx, %rax # point to NUL byte
-
- /* transfer 16--32 bytes */
-.L1632: cmp $16, %edi
- jb .L0815
-
- movdqu -16(%rsi, %rdi, 1), %xmm0 # load last 16 bytes
- movdqu %xmm2, (%rdx) # store first 16 bytes
- movdqu %xmm0, -15(%rax) # store last 16 bytes
- ret
-
- /* transfer 8--15 bytes */
-.L0815: cmp $8, %edi
- jb .L0407
-
- mov (%rsi), %rcx # load first 8 bytes
- mov -8(%rsi, %rdi, 1), %rdi # load last 8 bytes
- mov %rcx, (%rdx) # store to dst
- mov %rdi, -7(%rax) # dito
- ret
-
- /* transfer 4--7 bytes */
-.L0407: cmp $4, %edi
- jb .L0203
-
- mov (%rsi), %ecx
- mov -4(%rsi, %rdi, 1), %edi
- mov %ecx, (%rdx)
- mov %edi, -3(%rax)
- ret
-
- /* transfer 2--3 bytes */
-.L0203: cmp $2, %edi
- jb .L0101
-
- movzwl (%rsi), %ecx
- mov %cx, (%rdx) # store first two bytes
-
- /* transfer 0 bytes (last byte is always NUL) */
-.L0101: movb $0, (%rax) # store terminating NUL byte
- ret
-ARCHEND(__stpcpy, baseline)
-
+END(stpcpy)
+
.section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/amd64/string/strchrnul.S b/lib/libc/amd64/string/strchrnul.S
deleted file mode 100644
--- a/lib/libc/amd64/string/strchrnul.S
+++ /dev/null
@@ -1,170 +0,0 @@
-/*-
- * Copyright (c) 2023 The FreeBSD Foundation
- *
- * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
- * under sponsorship from the FreeBSD Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE
- */
-
-#include <machine/asm.h>
-
-#include "amd64_archlevel.h"
-
-#define ALIGN_TEXT .p2align 4,0x90 # 16-byte alignment, nop-filled
-
- .weak strchrnul
- .set strchrnul, __strchrnul
-
-ARCHFUNCS(__strchrnul)
- ARCHFUNC(__strchrnul, scalar)
- ARCHFUNC(__strchrnul, baseline)
-ENDARCHFUNCS(__strchrnul)
-
-/*
- * strchrnul(str, c)
- * This is implemented like strlen(str), but we check for the
- * presence of both NUL and c in each iteration.
- */
-ARCHENTRY(__strchrnul, scalar)
- mov %edi, %ecx
- and $~7, %rdi # align to 8 byte
- movzbl %sil, %esi # clear stray high bits
- movabs $0x0101010101010101, %r8
- mov (%rdi), %rax # load first word
- imul %r8, %rsi # replicate char 8 times
-
- /*
- * Unaligned input: align to 8 bytes. Then proceed the same
- * way as with aligned input, but prevent matches before the
- * beginning of the string. This is achieved by oring 0x01
- * into each byte of the buffer before the string
- */
- shl $3, %ecx
- mov %r8, %r10
- add $8, %rdi
- shl %cl, %r10 # 0x01 where the string is
- xor %r8, %r10 # 0x01 where it is not
- neg %r8 # negate 01..01 so we can use lea
- movabs $0x8080808080808080, %r9
-
- mov %rsi, %rcx
- xor %rax, %rcx # str ^ c
- or %r10, %rax # str without NUL bytes before it
- or %r10, %rcx # (str ^ c) without matches before it
- lea (%rax, %r8, 1), %rdx # str - 0x01..01
- lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01
- not %rax # ~str
- not %rcx # ~(str ^ c)
- and %rdx, %rax # (str - 0x01..01) & ~str
- and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
- or %rcx, %rax # matches for both
- and %r9, %rax # not including junk bytes
- jnz 1f
-
- /* main loop unrolled twice */
- ALIGN_TEXT
-0: mov (%rdi), %rax # str
- mov %rsi, %rcx
- xor %rax, %rcx # str ^ c
- lea (%rax, %r8, 1), %rdx # str - 0x01..01
- lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01
- not %rax # ~str
- not %rcx # ~(str ^ c)
- and %rdx, %rax # (str - 0x01..01) & ~str
- and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
- or %rcx, %rax # matches for both
- and %r9, %rax # not including junk bits
- jnz 2f
-
- mov 8(%rdi), %rax # str
- add $16, %rdi
- mov %rsi, %rcx
- xor %rax, %rcx # str ^ c
- lea (%rax, %r8, 1), %rdx # str - 0x01..01
- lea (%rcx, %r8, 1), %r11 # (str ^ c) - 0x01..01
- not %rax # ~str
- not %rcx # ~(str ^ c)
- and %rdx, %rax # (str - 0x01..01) & ~str
- and %r11, %rcx # ((str ^ c - 0x01..01) & ~(str ^ c)
- or %rcx, %rax # matches for both
- and %r9, %rax # not including junk bits
- jz 0b
-
- /* NUL or c found */
-1: sub $8, %rdi # undo advance past buffer
-2: tzcnt %rax, %rax # first NUL or c byte match
- shr $3, %eax # scale from bit to byte index
- add %rdi, %rax # pointer to found c or NUL
- ret
-ARCHEND(__strchrnul, scalar)
-
-ARCHENTRY(__strchrnul, baseline)
- mov %edi, %ecx
- and $~0xf, %rdi # align to 16 byte
- movdqa (%rdi), %xmm1
- movd %esi, %xmm0
- and $0xf, %ecx # distance from (%rdi) to start of string
- pxor %xmm2, %xmm2
- mov $-1, %edx
- punpcklbw %xmm0, %xmm0 # c -> cc
- shl %cl, %edx # bits corresponding to bytes in the string
- punpcklwd %xmm0, %xmm0 # cc -> cccc
- add $16, %rdi
-
- /* check for match in head */
- pcmpeqb %xmm1, %xmm2 # NUL bytes present?
- pshufd $0, %xmm0, %xmm0 # cccc -> cccccccccccccccc
- pcmpeqb %xmm0, %xmm1 # c present?
- por %xmm2, %xmm1 # either present?
- pmovmskb %xmm1, %eax
- and %edx, %eax # match in the string?
- jnz 1f
-
- /* main loop unrolled twice */
- ALIGN_TEXT
-0: movdqa (%rdi), %xmm1
- pxor %xmm2, %xmm2
- pcmpeqb %xmm1, %xmm2 # NUL bytes present?
- pcmpeqb %xmm0, %xmm1 # c present?
- por %xmm2, %xmm1 # either present?
- pmovmskb %xmm1, %eax
- test %eax, %eax # match in the string?
- jnz 2f
-
- movdqa 16(%rdi), %xmm1
- add $32, %rdi
- pxor %xmm2, %xmm2
- pcmpeqb %xmm1, %xmm2 # NUL bytes present?
- pcmpeqb %xmm0, %xmm1 # c present?
- por %xmm2, %xmm1 # either present?
- pmovmskb %xmm1, %eax
- test %eax, %eax # match in the string?
- jz 0b
-
-1: sub $16, %rdi # undo advance past buffer
-2: tzcnt %eax, %eax # where is the match?
- add %rdi, %rax # pointer to found c or NUL
- ret
-ARCHEND(__strchrnul, baseline)
-
- .section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/amd64/string/strlen.S b/lib/libc/amd64/string/strlen.S
--- a/lib/libc/amd64/string/strlen.S
+++ b/lib/libc/amd64/string/strlen.S
@@ -1,15 +1,9 @@
-/*-
+/*
* Written by Mateusz Guzik <mjg@freebsd.org>
- * Copyright (c) 2023 The FreeBSD Foundation
- *
- * Portions of this software were developed by Robert Clausecker
- * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
- *
* Public domain.
*/
#include <machine/asm.h>
-#include "amd64_archlevel.h"
/*
* Note: this routine was written with kernel use in mind (read: no simd),
@@ -19,11 +13,6 @@
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
-ARCHFUNCS(strlen)
- ARCHFUNC(strlen, scalar)
- ARCHFUNC(strlen, baseline)
-ENDARCHFUNCS(strlen)
-
/*
* strlen(string)
* %rdi
@@ -40,7 +29,7 @@
*
* The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
*/
-ARCHENTRY(strlen, scalar)
+ENTRY(strlen)
movabsq $0xfefefefefefefeff,%r8
movabsq $0x8080808080808080,%r9
@@ -86,46 +75,6 @@
leaq (%rcx,%rdi),%rax
subq %r10,%rax
ret
-ARCHEND(strlen, scalar)
-
-ARCHENTRY(strlen, baseline)
- mov %rdi, %rcx
- pxor %xmm1, %xmm1
- and $~0xf, %rdi # align string
- pcmpeqb (%rdi), %xmm1 # compare head (with junk before string)
- mov %rcx, %rsi # string pointer copy for later
- and $0xf, %ecx # amount of bytes rdi is past 16 byte alignment
- pmovmskb %xmm1, %eax
- add $32, %rdi # advance to next iteration
- shr %cl, %eax # clear out matches in junk bytes
- test %eax, %eax # any match? (can't use ZF from SHR as CL=0 is possible)
- jnz 2f
-
- ALIGN_TEXT
-1: pxor %xmm1, %xmm1
- pcmpeqb -16(%rdi), %xmm1 # find NUL bytes
- pmovmskb %xmm1, %eax
- test %eax, %eax # were any NUL bytes present?
- jnz 3f
-
- /* the same unrolled once more */
- pxor %xmm1, %xmm1
- pcmpeqb (%rdi), %xmm1
- pmovmskb %xmm1, %eax
- add $32, %rdi # advance to next iteration
- test %eax, %eax
- jz 1b
-
- /* match found in loop body */
- sub $16, %rdi # undo half the advancement
-3: tzcnt %eax, %eax # find the first NUL byte
- sub %rsi, %rdi # string length until beginning of (%rdi)
- lea -16(%rdi, %rax, 1), %rax # that plus loc. of NUL byte: full string length
- ret
-
- /* match found in head */
-2: tzcnt %eax, %eax # compute string length
- ret
-ARCHEND(strlen, baseline)
+END(strlen)
.section .note.GNU-stack,"",%progbits
diff --git a/lib/libc/string/string.3 b/lib/libc/string/string.3
--- a/lib/libc/string/string.3
+++ b/lib/libc/string/string.3
@@ -29,7 +29,7 @@
.\"
.\" @(#)string.3 8.2 (Berkeley) 12/11/93
.\"
-.Dd September 2, 2023
+.Dd December 11, 1993
.Dt STRING 3
.Os
.Sh NAME
@@ -132,8 +132,7 @@
.Xr strsep 3 ,
.Xr strspn 3 ,
.Xr strstr 3 ,
-.Xr strtok 3 ,
-.Xr simd 7
+.Xr strtok 3
.Sh STANDARDS
The
.Fn strcat ,
diff --git a/share/man/man7/Makefile b/share/man/man7/Makefile
--- a/share/man/man7/Makefile
+++ b/share/man/man7/Makefile
@@ -25,7 +25,6 @@
release.7 \
sdoc.7 \
security.7 \
- simd.7 \
sizeof.7 \
sprog.7 \
stats.7 \
diff --git a/share/man/man7/arch.7 b/share/man/man7/arch.7
--- a/share/man/man7/arch.7
+++ b/share/man/man7/arch.7
@@ -24,7 +24,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd September 2, 2023
+.Dd April 12, 2023
.Dt ARCH 7
.Os
.Sh NAME
@@ -431,8 +431,7 @@
.El
.Sh SEE ALSO
.Xr src.conf 5 ,
-.Xr build 7 ,
-.Xr simd 7
+.Xr build 7
.Sh HISTORY
An
.Nm
diff --git a/share/man/man7/environ.7 b/share/man/man7/environ.7
--- a/share/man/man7/environ.7
+++ b/share/man/man7/environ.7
@@ -27,7 +27,7 @@
.\"
.\" @(#)environ.7 8.3 (Berkeley) 4/19/94
.\"
-.Dd September 3, 2023
+.Dd August 5, 2020
.Dt ENVIRON 7
.Os
.Sh NAME
@@ -66,13 +66,6 @@
section of the appropriate manual page.
.Sh ENVIRONMENT
.Bl -tag -width LD_LIBRARY_PATH
-.It Ev ARCHLEVEL
-On
-.Em amd64 ,
-controls the level of SIMD enhancements used.
-See
-.Xr simd 7
-for details.
.It Ev BLOCKSIZE
The size of the block units used by several disk-related commands,
most notably
@@ -312,8 +305,7 @@
.Xr setlocale 3 ,
.Xr system 3 ,
.Xr termcap 3 ,
-.Xr termcap 5 ,
-.Xr simd 7
+.Xr termcap 5
.Sh HISTORY
The
.Nm
diff --git a/share/man/man7/simd.7 b/share/man/man7/simd.7
deleted file mode 100644
--- a/share/man/man7/simd.7
+++ /dev/null
@@ -1,227 +0,0 @@
-.\" Copyright (c) 2023 The FreeBSD Foundation
-.
-.\" This documentation was written by Robert Clausecker <fuz@FreeBSD.org>
-.\" under sponsorship from the FreeBSD Foundation.
-.
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.
-.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ''AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE
-.
-.Dd August 13, 2023
-.Dt SIMD 7
-.Os
-.Sh NAME
-.Nm simd
-.Nd SIMD enhancements
-.
-.Sh DESCRIPTION
-On some architectures, the
-.Fx
-.Em libc
-provides enhanced implementations of commonly used functions, replacing
-the architecture-independent implementations used otherwise.
-Depending on architecture and function, an enhanced
-implementation of a function may either always be used or the
-.Em libc
-detects at runtime which SIMD instruction set extensions are
-supported and picks the most suitable implementation automatically.
-On
-.Cm amd64 ,
-the environment variable
-.Ev ARCHLEVEL
-can be used to override this mechanism.
-.Pp
-Enhanced functions are present in the following architectures:
-.Bl -column FUNCTION__ aarch64_ arm_ amd64_ i386_ ppc64_ -offset indent
-.It Em FUNCTION Ta Em AARCH64 Ta Em ARM Ta Em AMD64 Ta Em I386 Ta Em PPC64
-.It bcmp Ta Ta Ta S1 Ta S
-.It bcopy Ta Ta S Ta S Ta S Ta SV
-.It bzero Ta Ta S Ta S Ta S
-.It div Ta Ta Ta S Ta S
-.It index Ta S Ta Ta S1
-.It ldiv Ta Ta Ta S Ta S
-.It lldiv Ta Ta Ta S
-.It memcmp Ta Ta S Ta S1 Ta S
-.It memcpy Ta S Ta S Ta S Ta S Ta SV
-.It memmove Ta S Ta S Ta S Ta S Ta SV
-.It memset Ta Ta S Ta S Ta S
-.It rindex Ta S
-.It stpcpy Ta Ta Ta S1
-.It strcat Ta Ta Ta S Ta S
-.It strchr Ta S Ta Ta S1 Ta S
-.It strchrnul Ta Ta Ta S1
-.It strcmp Ta Ta S Ta S Ta S
-.It strcpy Ta Ta Ta S1 Ta S Ta S2
-.It strlen Ta Ta S Ta S1
-.It strncmp Ta Ta S Ta Ta S
-.It strncpy Ta Ta Ta Ta Ta S2
-.It strrchr Ta S Ta Ta Ta S
-.It swab Ta Ta Ta Ta S
-.It wcschr Ta Ta Ta Ta S
-.It wcscmp Ta Ta Ta Ta S
-.It wcslen Ta Ta Ta Ta S
-.It wmemchr Ta Ta Ta Ta S
-.El
-.Pp
-.Sy S Ns :\ scalar (non-SIMD),
-.Sy 1 Ns :\ amd64 baseline,
-.Sy 2 Ns :\ x86-64-v2
-or PowerPC\ 2.05,
-.Sy 3 Ns :\ x86-64-v3,
-.Sy 4 Ns :\ x86-64-v4,
-.Sy V Ns :\ PowerPC\ VSX.
-.
-.Sh ENVIRONMENT
-.Bl -tag
-.It Ev ARCHLEVEL
-On
-.Em amd64 ,
-controls the level of SIMD enhancements used.
-If this variable is set to an architecture level from the list below
-and that architecture level is supported by the processor, SIMD
-enhancements up to
-.Ev ARCHLEVEL
-are used.
-If
-.Ev ARCHLEVEL
-is unset, not recognised, or not supported by the processor, the highest
-level of SIMD enhancements supported by the processor is used.
-.Pp
-A suffix beginning with
-.Sq ":"
-or
-.Sq "+"
-in
-.Ev ARCHLEVEL
-is ignored and may be used for future extensions.
-The architecture level can be prefixed with a
-.Sq "!"
-character to force use of the requested architecture level, even if the
-processor does not advertise that it is supported.
-This usually causes applications to crash and should only be used for
-testing purposes or if architecture level detection yields incorrect
-results.
-.Pp
-The architecture levels follow the AMD64 SysV ABI supplement:
-.Bl -tag -width x86-64-v2
-.It Cm scalar
-scalar enhancements only (no SIMD)
-.It Cm baseline
-cmov, cx8, x87 FPU, fxsr, MMX, osfxsr, SSE, SSE2
-.It Cm x86-64-v2
-cx16, lahf/sahf, popcnt, SSE3, SSSE3, SSE4.1, SSE4.2
-.It Cm x86-64-v3
-AVX, AVX2, BMI1, BMI2, F16C, FMA, lzcnt, movbe, osxsave
-.It Cm x86-64-v4
-AVX-512F/BW/CD/DQ/VL
-.El
-.El
-.
-.Sh DIAGNOSTICS
-.Bl -diag
-.It "Illegal Instruction"
-Printed by
-.Xr sh 1
-if a command is terminated through delivery of a
-.Dv SIGILL
-signal, see
-.Xr signal 3 .
-.Pp
-Use of an unsupported architecture level was forced by setting
-.Ev ARCHLEVEL
-to a string beginning with a
-.Sq "!"
-character, causing a process to crash due to use of an unsupported
-instruction.
-Unset
-.Ev ARCHLEVEL ,
-remove the
-.Sq "!"
-prefix or select a supported architecture level.
-.Pp
-Message may also appear for unrelated reasons.
-.El
-.
-.Sh SEE ALSO
-.Xr string 3 ,
-.Xr arch 7
-.Rs
-.%A H. J. Lu
-.%A Michael Matz
-.%A Milind Girkar
-.%A Jan Hubi\[u010D]ka \" \(vc
-.%A Andreas Jaeger
-.%A Mark Mitchell
-.%B System V Application Binary Interface
-.%D May 23, 2023
-.%T AMD64 Architecture Processor Supplement
-.%O Version 1.0
-.Re
-.
-.Sh HISTORY
-Architecture-specific enhanced
-.Em libc
-functions were added starting
-with
-.Fx 2.0
-for
-.Cm i386 ,
-.Fx 6.0
-for
-.Cm arm ,
-.Fx 6.1
-for
-.Cm amd64 ,
-.Fx 11.0
-for
-.Cm aarch64 ,
-and
-.Fx 12.0
-for
-.Cm powerpc64 .
-SIMD-enhanced functions were first added with
-.Fx 13.0
-for
-.Cm powerpc64
-and with
-.Fx 14.0
-for
-.Cm amd64 .
-.Pp
-A
-.Nm
-manual page appeared in
-.Fx 14.0 .
-.
-.Sh AUTHOR
-.An Robert Clausecker Aq Mt fuz@FreeBSD.org
-.
-.Sh CAVEATS
-Other parts of
-.Fx
-such as cryptographic routines in the kernel or in
-OpenSSL may also use SIMD enhancements.
-These enhancements are not subject to the
-.Ev ARCHLEVEL
-variable and may have their own configuration
-mechanism.
-.
-.Sh BUGS
-Use of SIMD enhancements cannot be configured on powerpc64.

File Metadata

Mime Type
text/plain
Expires
Mon, May 4, 7:33 AM (2 h, 9 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28552877
Default Alt Text
D41855.1777880039.diff (38 KB)

Event Timeline