Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause
2 : : * Copyright(c) 2024 Arm Limited
3 : : */
4 : :
5 : : #ifndef RTE_PTR_COMPRESS_H
6 : : #define RTE_PTR_COMPRESS_H
7 : :
8 : : /**
9 : : * @file
10 : : * Pointer compression and decompression functions.
11 : : *
12 : : * When passing arrays full of pointers between threads, memory containing
13 : : * the pointers is copied multiple times which is especially costly between
14 : : * cores. These functions allow us to compress the pointers.
15 : : *
16 : : * Compression takes advantage of the fact that pointers are usually located in
17 : : * a limited memory region. We compress them by converting them to offsets from
18 : : * a base memory address. Offsets can be stored in fewer bytes.
19 : : *
20 : : * The compression functions come in two varieties: 32-bit and 16-bit.
21 : : *
22 : : * To determine how many bits are needed to compress the pointer, calculate
23 : : * the biggest offset possible (highest value pointer - base pointer)
24 : : * and shift the value right according to alignment (shift by exponent of the
25 : : * power of 2 of alignment: aligned by 4 - shift by 2, aligned by 8 - shift by
26 : : * 3, etc.). The resulting value must fit in either 32 or 16 bits. You may
27 : : * use the macros provided in this file to do it programmatically.
28 : : *
29 : : * For usage example and further explanation please see this library's
30 : : * documentation in the programming guide.
31 : : */
32 : :
33 : : #include <stdint.h>
34 : : #include <inttypes.h>
35 : :
36 : : #include <rte_bitops.h>
37 : : #include <rte_branch_prediction.h>
38 : : #include <rte_common.h>
39 : : #include <rte_debug.h>
40 : : #include <rte_vect.h>
41 : :
42 : : #ifdef __cplusplus
43 : : extern "C" {
44 : : #endif
45 : :
46 : : /**
47 : : * Calculate how many bits are required to store pointers within a given memory
48 : : * region as offsets. This can help decide which pointer compression functions
49 : : * can be used.
50 : : *
51 : : * @param mem_length
52 : : * Length of the memory region the pointers are constrained to.
53 : : * @return
54 : : * Number of bits required to store a value.
55 : : **/
56 : : #define RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) \
57 : : (((uint64_t)mem_length) < 2 ? 1 : \
58 : : (sizeof(uint64_t) * CHAR_BIT - \
59 : : rte_clz64((uint64_t)mem_length - 1)))
60 : :
61 : : /**
62 : : * Calculate how many bits in the address can be dropped without losing any
63 : : * information thanks to the alignment of the address.
64 : : *
65 : : * @param alignment
66 : : * Memory alignment.
67 : : * @return
68 : : * Size of shift allowed without dropping any information from the pointer.
69 : : **/
70 : : #define RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(alignment) \
71 : : ((alignment) == 0 ? 0 : rte_ctz64((uint64_t)alignment))
72 : :
73 : : /**
74 : : * Determine if rte_ptr_compress_16_shift can be used to compress pointers
75 : : * that contain addresses of memory objects whose memory is aligned by
76 : : * a given amount and contained in a given memory region.
77 : : *
78 : : * @param mem_length
79 : : * The length of the memory region that contains the objects pointed to.
80 : : * @param obj_alignment
81 : : * The alignment of objects pointed to.
82 : : * @return
83 : : * 1 if function can be used, 0 otherwise.
84 : : **/
85 : : #define RTE_PTR_COMPRESS_CAN_COMPRESS_16_SHIFT(mem_length, obj_alignment) \
86 : : ((RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) - \
87 : : RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(obj_alignment)) <= 16 ? 1 : 0)
88 : :
89 : : /**
90 : : * Determine if rte_ptr_compress_32_shift can be used to compress pointers
91 : : * that contain addresses of memory objects whose memory is aligned by
92 : : * a given amount and contained in a given memory region.
93 : : *
94 : : * @param mem_length
95 : : * The length of the memory region that contains the objects pointed to.
96 : : * @param obj_alignment
97 : : * The alignment of objects pointed to.
98 : : * @return
99 : : * 1 if function can be used, 0 otherwise.
100 : : **/
101 : : #define RTE_PTR_COMPRESS_CAN_COMPRESS_32_SHIFT(mem_length, obj_alignment) \
102 : : ((RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) - \
103 : : RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(obj_alignment)) <= 32 ? 1 : 0)
104 : :
105 : : /**
106 : : * Compress pointers into 32-bit offsets from base pointer.
107 : : *
108 : : * @note It is programmer's responsibility to ensure the resulting offsets fit
109 : : * into 32 bits. Alignment of the structures pointed to by the pointers allows
110 : : * us to drop bits from the offsets. This is controlled by the bit_shift
111 : : * parameter. This means that if structures are aligned by 8 bytes they must be
112 : : * within 32GB of the base pointer. If there is no such alignment guarantee they
113 : : * must be within 4GB.
114 : : *
115 : : * @param ptr_base
116 : : * A pointer used to calculate offsets of pointers in src_table.
117 : : * @param src_table
118 : : * A pointer to an array of pointers.
119 : : * @param dest_table
120 : : * A pointer to an array of compressed pointers returned by this function.
121 : : * @param n
122 : : * The number of objects to compress, must be strictly positive.
123 : : * @param bit_shift
124 : : * Byte alignment of memory pointed to by the pointers allows for
125 : : * bits to be dropped from the offset and hence widen the memory region that
126 : : * can be covered. This controls how many bits are right shifted.
127 : : **/
128 : : static __rte_always_inline void
129 : : rte_ptr_compress_32_shift(void *ptr_base, void * const *src_table,
130 : : uint32_t *dest_table, size_t n, uint8_t bit_shift)
131 : : {
132 : : size_t i = 0;
133 : : #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
134 : : svuint64_t v_ptr_table;
135 : : do {
136 : : svbool_t pg = svwhilelt_b64(i, n);
137 : : v_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);
138 : : v_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);
139 : : v_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);
140 : : svst1w(pg, &dest_table[i], v_ptr_table);
141 : : i += svcntd();
142 : : } while (i < n);
143 : : #elif defined __ARM_NEON && !defined RTE_ARCH_ARMv8_AARCH32
144 : : uintptr_t ptr_diff;
145 : : uint64x2_t v_ptr_table;
146 : : /* right shift is done by left shifting by negative int */
147 : : int64x2_t v_shift = vdupq_n_s64(-bit_shift);
148 : : uint64x2_t v_ptr_base = vdupq_n_u64((uint64_t)ptr_base);
149 : : const size_t n_even = n & ~0x1;
150 : : for (; i < n_even; i += 2) {
151 : : v_ptr_table = vld1q_u64((const uint64_t *)src_table + i);
152 : : v_ptr_table = vsubq_u64(v_ptr_table, v_ptr_base);
153 : : v_ptr_table = vshlq_u64(v_ptr_table, v_shift);
154 : : vst1_u32(dest_table + i, vqmovn_u64(v_ptr_table));
155 : : }
156 : : /* process leftover single item in case of odd number of n */
157 : : if (unlikely(n & 0x1)) {
158 : : ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
159 : : dest_table[i] = (uint32_t) (ptr_diff >> bit_shift);
160 : : }
161 : : #else
162 : : uintptr_t ptr_diff;
163 [ + + # # ]: 1080 : for (; i < n; i++) {
164 : 960 : ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
165 : 960 : ptr_diff = ptr_diff >> bit_shift;
166 : : RTE_ASSERT(ptr_diff <= UINT32_MAX);
167 : 960 : dest_table[i] = (uint32_t) ptr_diff;
168 : : }
169 : : #endif
170 : : }
171 : :
172 : : /**
173 : : * Decompress pointers from 32-bit offsets from base pointer.
174 : : *
175 : : * @param ptr_base
176 : : * A pointer which was used to calculate offsets in src_table.
177 : : * @param src_table
178 : : * A pointer to an array to compressed pointers.
179 : : * @param dest_table
180 : : * A pointer to an array of decompressed pointers returned by this function.
181 : : * @param n
182 : : * The number of objects to decompress, must be strictly positive.
183 : : * @param bit_shift
184 : : * Byte alignment of memory pointed to by the pointers allows for
185 : : * bits to be dropped from the offset and hence widen the memory region that
186 : : * can be covered. This controls how many bits are left shifted when pointers
187 : : * are recovered from the offsets.
188 : : **/
189 : : static __rte_always_inline void
190 : : rte_ptr_decompress_32_shift(void *ptr_base, uint32_t const *src_table,
191 : : void **dest_table, size_t n, uint8_t bit_shift)
192 : : {
193 : : size_t i = 0;
194 : : #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
195 : : svuint64_t v_ptr_table;
196 : : do {
197 : : svbool_t pg = svwhilelt_b64(i, n);
198 : : v_ptr_table = svld1uw_u64(pg, &src_table[i]);
199 : : v_ptr_table = svlsl_x(pg, v_ptr_table, bit_shift);
200 : : v_ptr_table = svadd_x(pg, v_ptr_table, (uint64_t)ptr_base);
201 : : svst1(pg, (uint64_t *)dest_table + i, v_ptr_table);
202 : : i += svcntd();
203 : : } while (i < n);
204 : : #elif defined __ARM_NEON && !defined RTE_ARCH_ARMv8_AARCH32
205 : : uintptr_t ptr_diff;
206 : : uint64x2_t v_ptr_table;
207 : : int64x2_t v_shift = vdupq_n_s64(bit_shift);
208 : : uint64x2_t v_ptr_base = vdupq_n_u64((uint64_t)ptr_base);
209 : : const size_t n_even = n & ~0x1;
210 : : for (; i < n_even; i += 2) {
211 : : v_ptr_table = vmovl_u32(vld1_u32(src_table + i));
212 : : v_ptr_table = vshlq_u64(v_ptr_table, v_shift);
213 : : v_ptr_table = vaddq_u64(v_ptr_table, v_ptr_base);
214 : : vst1q_u64((uint64_t *)dest_table + i, v_ptr_table);
215 : : }
216 : : /* process leftover single item in case of odd number of n */
217 : : if (unlikely(n & 0x1)) {
218 : : ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
219 : : dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
220 : : }
221 : : #else
222 : : uintptr_t ptr_diff;
223 [ + + # # ]: 1080 : for (; i < n; i++) {
224 : 960 : ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
225 : 960 : dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
226 : : }
227 : : #endif
228 : : }
229 : :
230 : : /**
231 : : * Compress pointers into 16-bit offsets from base pointer.
232 : : *
233 : : * @note It is programmer's responsibility to ensure the resulting offsets fit
234 : : * into 16 bits. Alignment of the structures pointed to by the pointers allows
235 : : * us to drop bits from the offsets. This is controlled by the bit_shift
236 : : * parameter. This means that if structures are aligned by 8 bytes they must be
237 : : * within 256KB of the base pointer. If there is no such alignment guarantee
238 : : * they must be within 64KB.
239 : : *
240 : : * @param ptr_base
241 : : * A pointer used to calculate offsets of pointers in src_table.
242 : : * @param src_table
243 : : * A pointer to an array of pointers.
244 : : * @param dest_table
245 : : * A pointer to an array of compressed pointers returned by this function.
246 : : * @param n
247 : : * The number of objects to compress, must be strictly positive.
248 : : * @param bit_shift
249 : : * Byte alignment of memory pointed to by the pointers allows for
250 : : * bits to be dropped from the offset and hence widen the memory region that
251 : : * can be covered. This controls how many bits are right shifted.
252 : : **/
253 : : static __rte_always_inline void
254 : : rte_ptr_compress_16_shift(void *ptr_base, void * const *src_table,
255 : : uint16_t *dest_table, size_t n, uint8_t bit_shift)
256 : : {
257 : :
258 : : size_t i = 0;
259 : : #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
260 : : svuint64_t v_ptr_table;
261 : : do {
262 : : svbool_t pg = svwhilelt_b64(i, n);
263 : : v_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);
264 : : v_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);
265 : : v_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);
266 : : svst1h(pg, &dest_table[i], v_ptr_table);
267 : : i += svcntd();
268 : : } while (i < n);
269 : : #else
270 : : uintptr_t ptr_diff;
271 [ + + # # ]: 1080 : for (; i < n; i++) {
272 : 960 : ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
273 : 960 : ptr_diff = ptr_diff >> bit_shift;
274 : : RTE_ASSERT(ptr_diff <= UINT16_MAX);
275 : 960 : dest_table[i] = (uint16_t) ptr_diff;
276 : : }
277 : : #endif
278 : : }
279 : :
280 : : /**
281 : : * Decompress pointers from 16-bit offsets from base pointer.
282 : : *
283 : : * @param ptr_base
284 : : * A pointer which was used to calculate offsets in src_table.
285 : : * @param src_table
286 : : * A pointer to an array to compressed pointers.
287 : : * @param dest_table
288 : : * A pointer to an array of decompressed pointers returned by this function.
289 : : * @param n
290 : : * The number of objects to decompress, must be strictly positive.
291 : : * @param bit_shift
292 : : * Byte alignment of memory pointed to by the pointers allows for
293 : : * bits to be dropped from the offset and hence widen the memory region that
294 : : * can be covered. This controls how many bits are left shifted when pointers
295 : : * are recovered from the offsets.
296 : : **/
297 : : static __rte_always_inline void
298 : : rte_ptr_decompress_16_shift(void *ptr_base, uint16_t const *src_table,
299 : : void **dest_table, size_t n, uint8_t bit_shift)
300 : : {
301 : : size_t i = 0;
302 : : #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
303 : : svuint64_t v_ptr_table;
304 : : do {
305 : : svbool_t pg = svwhilelt_b64(i, n);
306 : : v_ptr_table = svld1uh_u64(pg, &src_table[i]);
307 : : v_ptr_table = svlsl_x(pg, v_ptr_table, bit_shift);
308 : : v_ptr_table = svadd_x(pg, v_ptr_table, (uint64_t)ptr_base);
309 : : svst1(pg, (uint64_t *)dest_table + i, v_ptr_table);
310 : : i += svcntd();
311 : : } while (i < n);
312 : : #else
313 : : uintptr_t ptr_diff;
314 [ + + # # ]: 1080 : for (; i < n; i++) {
315 : 960 : ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
316 : 960 : dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
317 : : }
318 : : #endif
319 : : }
320 : :
321 : : #ifdef __cplusplus
322 : : }
323 : : #endif
324 : :
325 : : #endif /* RTE_PTR_COMPRESS_H */
|