Bug#1106761: bookworm-pu: package glibc/2.36-9+deb12u11 (2/2)
From
Aurelien Jarno@21:1/5 to
All on Thu May 29 15:50:01 2025
[continued from previous message]
++ str q0, [dstend2, -16]
++ str q0, [dstend, -16]
++ ret
++
++ .p2align 4
++L(set_16):
++ whilelo p0.b, xzr, count
++ st1b z0.b, p0, [dstin]
++ ret
++
++ .p2align 4
++L(set_128):
++ bic dst, dstin, 15
++ cmp count, 128
++ b.hi L(set_long)
++ stp q0, q0, [dstin]
++ stp q0, q0, [dstin, 32]
++ stp q0, q0, [dstend, -64]
++ stp q0, q0, [dstend, -32]
++ ret
++
++ .p2align 4
++L(set_long):
++ cmp count, 256
++ b.lo L(no_zva)
++ tst valw, 255
++ b.ne L(no_zva)
++
++ str q0, [dstin]
++ str q0, [dst, 16]
++ bic dst, dstin, 31
++ stp q0, q0, [dst, 32]
++ bic dst, dstin, 63
++ sub count, dstend, dst /* Count is now 64 too large. */
++ sub count, count, 128 /* Adjust count and bias for loop. */ ++
++ sub x8, dstend, 1 /* Write last bytes before ZVA loop. */
++ bic x8, x8, 15
++ stp q0, q0, [x8, -48]
++ str q0, [x8, -16]
++ str q0, [dstend, -16]
++
++ .p2align 4
++L(zva64_loop):
++ add dst, dst, 64
++ dc zva, dst
++ subs count, count, 64
++ b.hi L(zva64_loop)
++ ret
++
++L(no_zva):
++ str q0, [dstin]
++ sub count, dstend, dst /* Count is 16 too large. */
++ sub count, count, 64 + 16 /* Adjust count and bias for loop. */ ++L(no_zva_loop):
++ stp q0, q0, [dst, 16]
++ stp q0, q0, [dst, 48]
++ add dst, dst, 64
++ subs count, count, 64
++ b.hi L(no_zva_loop)
++ stp q0, q0, [dstend, -64]
++ stp q0, q0, [dstend, -32]
++ ret
++
++END (__memset_sve_zva64)
++#endif
diff --git a/sysdeps/aarch64/multiarch/memset_zva64.S b/sysdeps/aarch64/multiarch/memset_zva64.S
new file mode 100644
index 0000000000..13f45fd3d8
@@ -14006,10 +14619,17 @@ index 78d27b4aa6..6eeda12df6 100644
END (STRCPY)
diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S
-index 3a5d088407..10b9ec0769 100644
+index 3a5d088407..352fb40d3a 100644
--- a/sysdeps/aarch64/strlen.S
+++ b/sysdeps/aarch64/strlen.S
-@@ -43,12 +43,9 @@
+@@ -1,4 +1,5 @@
+-/* Copyright (C) 2012-2022 Free Software Foundation, Inc.
++/* Generic optimized strlen using SIMD.
++ Copyright (C) 2012-2024 Free Software Foundation, Inc.
+
+ This file is part of the GNU C Library.
+
+@@ -43,12 +44,9 @@
#define dend d2
/* Core algorithm:
@@ -14025,34 +14645,65 @@ index 3a5d088407..10b9ec0769 100644
ENTRY (STRLEN)
PTR_ARG (0)
-@@ -68,18 +65,25 @@ ENTRY (STRLEN)
+@@ -59,29 +57,50 @@ ENTRY (STRLEN)
+ shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
+ fmov synd, dend
+ lsr synd, synd, shift
+- cbz synd, L(loop)
++ cbz synd, L(next16)
- .p2align 5
- L(loop):
+ rbit synd, synd
+ clz result, synd
+ lsr result, result, 2
+ ret
+
+- .p2align 5
+-L(loop):
- ldr data, [src, 16]!
++L(next16):
+ ldr data, [src, 16]
-+ cmeq vhas_nul.16b, vdata.16b, 0
-+ umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
-+ fmov synd, dend
-+ cbnz synd, L(loop_end)
-+ ldr data, [src, 32]!
cmeq vhas_nul.16b, vdata.16b, 0
- umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
+- umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
++ shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
fmov synd, dend
cbz synd, L(loop)
-
-+ sub src, src, 16
-+L(loop_end):
- shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
+- shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
++ add src, src, 16
++#ifndef __AARCH64EB__
++ rbit synd, synd
++#endif
sub result, src, srcin
++ clz tmp, synd
++ add result, result, tmp, lsr 2
++ ret
++
++ .p2align 5
++L(loop):
++ ldr data, [src, 32]!
++ cmeq vhas_nul.16b, vdata.16b, 0
++ addhn vend.8b, vhas_nul.8h, vhas_nul.8h
fmov synd, dend
++ cbnz synd, L(loop_end)
++ ldr data, [src, 16]
++ cmeq vhas_nul.16b, vdata.16b, 0
++ addhn vend.8b, vhas_nul.8h, vhas_nul.8h
++ fmov synd, dend
++ cbz synd, L(loop)
++ add src, src, 16
++L(loop_end):
++ sub result, shift, src, lsl 2 /* (srcin - src) << 2. */
#ifndef __AARCH64EB__
rbit synd, synd
++ sub result, result, 3
#endif
-+ add result, result, 16
clz tmp, synd
- add result, result, tmp, lsr 2
+- add result, result, tmp, lsr 2
++ sub result, tmp, result
++ lsr result, result, 2
ret
+
+ END (STRLEN)
diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
index 282bddc9aa..a44a49a920 100644
--- a/sysdeps/aarch64/strnlen.S
@@ -14568,6 +15219,23 @@ index 0000000000..8f21ebe1b6
@@ -0,0 +1,2 @@
+#define UTMP_SIZE 384
+#define LASTLOG_SIZE 292
+diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h
+index a346fdca58..6c6cee511f 100644
+--- a/sysdeps/ieee754/dbl-64/math_config.h
++++ b/sysdeps/ieee754/dbl-64/math_config.h
+@@ -134,10 +134,11 @@ check_uflow (double x)
+ extern const struct exp_data
+ {
+ double invln2N;
+- double shift;
+ double negln2hiN;
+ double negln2loN;
+ double poly[4]; /* Last four coefficients. */
++ double shift;
++
+ double exp2_shift;
+ double exp2_poly[EXP2_POLY_ORDER];
+ uint64_t tab[2*(1 << EXP_TABLE_BITS)];
diff --git a/sysdeps/ieee754/dbl-64/s_expm1.c b/sysdeps/ieee754/dbl-64/s_expm1.c
index 8f1c95bd04..1cafeca9c0 100644
--- a/sysdeps/ieee754/dbl-64/s_expm1.c
@@ -14607,6 +15275,21 @@ index e6476a8260..eeb0af859f 100644
double
__log1p (double x)
{
+diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h
+index c7f71ca496..6a52d1d51b 100644
+--- a/sysdeps/ieee754/flt-32/math_config.h
++++ b/sysdeps/ieee754/flt-32/math_config.h
+@@ -126,9 +126,9 @@ extern const struct exp2f_data
+ uint64_t tab[1 << EXP2F_TABLE_BITS];
+ double shift_scaled;
+ double poly[EXP2F_POLY_ORDER];
+- double shift;
+ double invln2_scaled;
+ double poly_scaled[EXP2F_POLY_ORDER];
++ double shift;
+ } __exp2f_data attribute_hidden;
+
+ #define LOGF_TABLE_BITS 4
diff --git a/sysdeps/ieee754/ldbl-128/e_j1l.c b/sysdeps/ieee754/ldbl-128/e_j1l.c
index 54c457681a..9a9c5c6f00 100644
--- a/sysdeps/ieee754/ldbl-128/e_j1l.c
--- SoupGate-Win32 v1.05
* Origin: fsxNet Usenet Gateway (21:1/5)