LCOV - code coverage report
Current view: top level - lib/ptr_compress - rte_ptr_compress.h (source / functions) Hit Total Coverage
Test: Code coverage Lines: 14 14 100.0 %
Date: 2025-02-01 18:54:23 Functions: 0 0 -
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 8 16 50.0 %

           Branch data     Line data    Source code
       1                 :            : /* SPDX-License-Identifier: BSD-3-Clause
       2                 :            :  * Copyright(c) 2024 Arm Limited
       3                 :            :  */
       4                 :            : 
       5                 :            : #ifndef RTE_PTR_COMPRESS_H
       6                 :            : #define RTE_PTR_COMPRESS_H
       7                 :            : 
       8                 :            : /**
       9                 :            :  * @file
      10                 :            :  * Pointer compression and decompression functions.
      11                 :            :  *
      12                 :            :  * When passing arrays full of pointers between threads, memory containing
      13                 :            :  * the pointers is copied multiple times which is especially costly between
      14                 :            :  * cores. These functions allow us to compress the pointers.
      15                 :            :  *
      16                 :            :  * Compression takes advantage of the fact that pointers are usually located in
      17                 :            :  * a limited memory region. We compress them by converting them to offsets from
      18                 :            :  * a base memory address. Offsets can be stored in fewer bytes.
      19                 :            :  *
      20                 :            :  * The compression functions come in two varieties: 32-bit and 16-bit.
      21                 :            :  *
      22                 :            :  * To determine how many bits are needed to compress the pointer, calculate
      23                 :            :  * the biggest offset possible (highest value pointer - base pointer)
      24                 :            :  * and shift the value right according to alignment (shift by exponent of the
      25                 :            :  * power of 2 of alignment: aligned by 4 - shift by 2, aligned by 8 - shift by
      26                 :            :  * 3, etc.). The resulting value must fit in either 32 or 16 bits. You may
      27                 :            :  * use the macros provided in this file to do it programmatically.
      28                 :            :  *
      29                 :            :  * For usage example and further explanation please see this library's
      30                 :            :  * documentation in the programming guide.
      31                 :            :  */
      32                 :            : 
      33                 :            : #include <stdint.h>
      34                 :            : #include <inttypes.h>
      35                 :            : 
      36                 :            : #include <rte_bitops.h>
      37                 :            : #include <rte_branch_prediction.h>
      38                 :            : #include <rte_common.h>
      39                 :            : #include <rte_debug.h>
      40                 :            : #include <rte_vect.h>
      41                 :            : 
      42                 :            : #ifdef __cplusplus
      43                 :            : extern "C" {
      44                 :            : #endif
      45                 :            : 
      46                 :            : /**
      47                 :            :  * Calculate how many bits are required to store pointers within a given memory
      48                 :            :  * region as offsets. This can help decide which pointer compression functions
      49                 :            :  * can be used.
      50                 :            :  *
      51                 :            :  * @param mem_length
      52                 :            :  *   Length of the memory region the pointers are constrained to.
      53                 :            :  * @return
      54                 :            :  *   Number of bits required to store a value.
      55                 :            :  **/
      56                 :            : #define RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) \
      57                 :            :         (((uint64_t)mem_length) < 2 ? 1 : \
      58                 :            :                 (sizeof(uint64_t) * CHAR_BIT - \
      59                 :            :                  rte_clz64((uint64_t)mem_length - 1)))
      60                 :            : 
      61                 :            : /**
      62                 :            :  * Calculate how many bits in the address can be dropped without losing any
      63                 :            :  * information thanks to the alignment of the address.
      64                 :            :  *
      65                 :            :  * @param alignment
      66                 :            :  *   Memory alignment.
      67                 :            :  * @return
      68                 :            :  *   Size of shift allowed without dropping any information from the pointer.
      69                 :            :  **/
      70                 :            : #define RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(alignment) \
      71                 :            :         ((alignment) == 0 ? 0 : rte_ctz64((uint64_t)alignment))
      72                 :            : 
      73                 :            : /**
      74                 :            :  * Determine if rte_ptr_compress_16_shift can be used to compress pointers
      75                 :            :  * that contain addresses of memory objects whose memory is aligned by
      76                 :            :  * a given amount and contained in a given memory region.
      77                 :            :  *
      78                 :            :  * @param mem_length
      79                 :            :  *   The length of the memory region that contains the objects pointed to.
      80                 :            :  * @param obj_alignment
      81                 :            :  *   The alignment of objects pointed to.
      82                 :            :  * @return
      83                 :            :  *   1 if function can be used, 0 otherwise.
      84                 :            :  **/
      85                 :            : #define RTE_PTR_COMPRESS_CAN_COMPRESS_16_SHIFT(mem_length, obj_alignment) \
      86                 :            :         ((RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) - \
      87                 :            :         RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(obj_alignment)) <= 16 ? 1 : 0)
      88                 :            : 
      89                 :            : /**
      90                 :            :  * Determine if rte_ptr_compress_32_shift can be used to compress pointers
      91                 :            :  * that contain addresses of memory objects whose memory is aligned by
      92                 :            :  * a given amount and contained in a given memory region.
      93                 :            :  *
      94                 :            :  * @param mem_length
      95                 :            :  *   The length of the memory region that contains the objects pointed to.
      96                 :            :  * @param obj_alignment
      97                 :            :  *   The alignment of objects pointed to.
      98                 :            :  * @return
      99                 :            :  *   1 if function can be used, 0 otherwise.
     100                 :            :  **/
     101                 :            : #define RTE_PTR_COMPRESS_CAN_COMPRESS_32_SHIFT(mem_length, obj_alignment) \
     102                 :            :         ((RTE_PTR_COMPRESS_BITS_NEEDED_FOR_POINTER_WITHIN_RANGE(mem_length) - \
     103                 :            :         RTE_PTR_COMPRESS_BIT_SHIFT_FROM_ALIGNMENT(obj_alignment)) <= 32 ? 1 : 0)
     104                 :            : 
     105                 :            : /**
     106                 :            :  * Compress pointers into 32-bit offsets from base pointer.
     107                 :            :  *
     108                 :            :  * @note It is programmer's responsibility to ensure the resulting offsets fit
     109                 :            :  * into 32 bits. Alignment of the structures pointed to by the pointers allows
     110                 :            :  * us to drop bits from the offsets. This is controlled by the bit_shift
     111                 :            :  * parameter. This means that if structures are aligned by 8 bytes they must be
     112                 :            :  * within 32GB of the base pointer. If there is no such alignment guarantee they
     113                 :            :  * must be within 4GB.
     114                 :            :  *
     115                 :            :  * @param ptr_base
     116                 :            :  *   A pointer used to calculate offsets of pointers in src_table.
     117                 :            :  * @param src_table
     118                 :            :  *   A pointer to an array of pointers.
     119                 :            :  * @param dest_table
     120                 :            :  *   A pointer to an array of compressed pointers returned by this function.
     121                 :            :  * @param n
     122                 :            :  *   The number of objects to compress, must be strictly positive.
     123                 :            :  * @param bit_shift
     124                 :            :  *   Byte alignment of memory pointed to by the pointers allows for
     125                 :            :  *   bits to be dropped from the offset and hence widen the memory region that
     126                 :            :  *   can be covered. This controls how many bits are right shifted.
     127                 :            :  **/
     128                 :            : static __rte_always_inline void
     129                 :            : rte_ptr_compress_32_shift(void *ptr_base, void * const *src_table,
     130                 :            :                 uint32_t *dest_table, size_t n, uint8_t bit_shift)
     131                 :            : {
     132                 :            :         size_t i = 0;
     133                 :            : #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
     134                 :            :         svuint64_t v_ptr_table;
     135                 :            :         do {
     136                 :            :                 svbool_t pg = svwhilelt_b64(i, n);
     137                 :            :                 v_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);
     138                 :            :                 v_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);
     139                 :            :                 v_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);
     140                 :            :                 svst1w(pg, &dest_table[i], v_ptr_table);
     141                 :            :                 i += svcntd();
     142                 :            :         } while (i < n);
     143                 :            : #elif defined __ARM_NEON && !defined RTE_ARCH_ARMv8_AARCH32
     144                 :            :         uintptr_t ptr_diff;
     145                 :            :         uint64x2_t v_ptr_table;
     146                 :            :         /* right shift is done by left shifting by negative int */
     147                 :            :         int64x2_t v_shift = vdupq_n_s64(-bit_shift);
     148                 :            :         uint64x2_t v_ptr_base = vdupq_n_u64((uint64_t)ptr_base);
     149                 :            :         const size_t n_even = n & ~0x1;
     150                 :            :         for (; i < n_even; i += 2) {
     151                 :            :                 v_ptr_table = vld1q_u64((const uint64_t *)src_table + i);
     152                 :            :                 v_ptr_table = vsubq_u64(v_ptr_table, v_ptr_base);
     153                 :            :                 v_ptr_table = vshlq_u64(v_ptr_table, v_shift);
     154                 :            :                 vst1_u32(dest_table + i, vqmovn_u64(v_ptr_table));
     155                 :            :         }
     156                 :            :         /* process leftover single item in case of odd number of n */
     157                 :            :         if (unlikely(n & 0x1)) {
     158                 :            :                 ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
     159                 :            :                 dest_table[i] = (uint32_t) (ptr_diff >> bit_shift);
     160                 :            :         }
     161                 :            : #else
     162                 :            :         uintptr_t ptr_diff;
     163   [ +  +  #  # ]:       1080 :         for (; i < n; i++) {
     164                 :        960 :                 ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
     165                 :        960 :                 ptr_diff = ptr_diff >> bit_shift;
     166                 :            :                 RTE_ASSERT(ptr_diff <= UINT32_MAX);
     167                 :        960 :                 dest_table[i] = (uint32_t) ptr_diff;
     168                 :            :         }
     169                 :            : #endif
     170                 :            : }
     171                 :            : 
     172                 :            : /**
     173                 :            :  * Decompress pointers from 32-bit offsets from base pointer.
     174                 :            :  *
     175                 :            :  * @param ptr_base
     176                 :            :  *   A pointer which was used to calculate offsets in src_table.
     177                 :            :  * @param src_table
     178                 :            :  *   A pointer to an array to compressed pointers.
     179                 :            :  * @param dest_table
     180                 :            :  *   A pointer to an array of decompressed pointers returned by this function.
     181                 :            :  * @param n
     182                 :            :  *   The number of objects to decompress, must be strictly positive.
     183                 :            :  * @param bit_shift
     184                 :            :  *   Byte alignment of memory pointed to by the pointers allows for
     185                 :            :  *   bits to be dropped from the offset and hence widen the memory region that
     186                 :            :  *   can be covered. This controls how many bits are left shifted when pointers
     187                 :            :  *   are recovered from the offsets.
     188                 :            :  **/
     189                 :            : static __rte_always_inline void
     190                 :            : rte_ptr_decompress_32_shift(void *ptr_base, uint32_t const *src_table,
     191                 :            :                 void **dest_table, size_t n, uint8_t bit_shift)
     192                 :            : {
     193                 :            :         size_t i = 0;
     194                 :            : #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
     195                 :            :         svuint64_t v_ptr_table;
     196                 :            :         do {
     197                 :            :                 svbool_t pg = svwhilelt_b64(i, n);
     198                 :            :                 v_ptr_table = svld1uw_u64(pg, &src_table[i]);
     199                 :            :                 v_ptr_table = svlsl_x(pg, v_ptr_table, bit_shift);
     200                 :            :                 v_ptr_table = svadd_x(pg, v_ptr_table, (uint64_t)ptr_base);
     201                 :            :                 svst1(pg, (uint64_t *)dest_table + i, v_ptr_table);
     202                 :            :                 i += svcntd();
     203                 :            :         } while (i < n);
     204                 :            : #elif defined __ARM_NEON && !defined RTE_ARCH_ARMv8_AARCH32
     205                 :            :         uintptr_t ptr_diff;
     206                 :            :         uint64x2_t v_ptr_table;
     207                 :            :         int64x2_t v_shift = vdupq_n_s64(bit_shift);
     208                 :            :         uint64x2_t v_ptr_base = vdupq_n_u64((uint64_t)ptr_base);
     209                 :            :         const size_t n_even = n & ~0x1;
     210                 :            :         for (; i < n_even; i += 2) {
     211                 :            :                 v_ptr_table = vmovl_u32(vld1_u32(src_table + i));
     212                 :            :                 v_ptr_table = vshlq_u64(v_ptr_table, v_shift);
     213                 :            :                 v_ptr_table = vaddq_u64(v_ptr_table, v_ptr_base);
     214                 :            :                 vst1q_u64((uint64_t *)dest_table + i, v_ptr_table);
     215                 :            :         }
     216                 :            :         /* process leftover single item in case of odd number of n */
     217                 :            :         if (unlikely(n & 0x1)) {
     218                 :            :                 ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
     219                 :            :                 dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
     220                 :            :         }
     221                 :            : #else
     222                 :            :         uintptr_t ptr_diff;
     223   [ +  +  #  # ]:       1080 :         for (; i < n; i++) {
     224                 :        960 :                 ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
     225                 :        960 :                 dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
     226                 :            :         }
     227                 :            : #endif
     228                 :            : }
     229                 :            : 
     230                 :            : /**
     231                 :            :  * Compress pointers into 16-bit offsets from base pointer.
     232                 :            :  *
     233                 :            :  * @note It is programmer's responsibility to ensure the resulting offsets fit
     234                 :            :  * into 16 bits. Alignment of the structures pointed to by the pointers allows
     235                 :            :  * us to drop bits from the offsets. This is controlled by the bit_shift
     236                 :            :  * parameter. This means that if structures are aligned by 8 bytes they must be
     237                 :            :  * within 256KB of the base pointer. If there is no such alignment guarantee
     238                 :            :  * they must be within 64KB.
     239                 :            :  *
     240                 :            :  * @param ptr_base
     241                 :            :  *   A pointer used to calculate offsets of pointers in src_table.
     242                 :            :  * @param src_table
     243                 :            :  *   A pointer to an array of pointers.
     244                 :            :  * @param dest_table
     245                 :            :  *   A pointer to an array of compressed pointers returned by this function.
     246                 :            :  * @param n
     247                 :            :  *   The number of objects to compress, must be strictly positive.
     248                 :            :  * @param bit_shift
     249                 :            :  *   Byte alignment of memory pointed to by the pointers allows for
     250                 :            :  *   bits to be dropped from the offset and hence widen the memory region that
     251                 :            :  *   can be covered. This controls how many bits are right shifted.
     252                 :            :  **/
     253                 :            : static __rte_always_inline void
     254                 :            : rte_ptr_compress_16_shift(void *ptr_base, void * const *src_table,
     255                 :            :                 uint16_t *dest_table, size_t n, uint8_t bit_shift)
     256                 :            : {
     257                 :            : 
     258                 :            :         size_t i = 0;
     259                 :            : #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
     260                 :            :         svuint64_t v_ptr_table;
     261                 :            :         do {
     262                 :            :                 svbool_t pg = svwhilelt_b64(i, n);
     263                 :            :                 v_ptr_table = svld1_u64(pg, (uint64_t *)src_table + i);
     264                 :            :                 v_ptr_table = svsub_x(pg, v_ptr_table, (uint64_t)ptr_base);
     265                 :            :                 v_ptr_table = svlsr_x(pg, v_ptr_table, bit_shift);
     266                 :            :                 svst1h(pg, &dest_table[i], v_ptr_table);
     267                 :            :                 i += svcntd();
     268                 :            :         } while (i < n);
     269                 :            : #else
     270                 :            :         uintptr_t ptr_diff;
     271   [ +  +  #  # ]:       1080 :         for (; i < n; i++) {
     272                 :        960 :                 ptr_diff = RTE_PTR_DIFF(src_table[i], ptr_base);
     273                 :        960 :                 ptr_diff = ptr_diff >> bit_shift;
     274                 :            :                 RTE_ASSERT(ptr_diff <= UINT16_MAX);
     275                 :        960 :                 dest_table[i] = (uint16_t) ptr_diff;
     276                 :            :         }
     277                 :            : #endif
     278                 :            : }
     279                 :            : 
     280                 :            : /**
     281                 :            :  * Decompress pointers from 16-bit offsets from base pointer.
     282                 :            :  *
     283                 :            :  * @param ptr_base
     284                 :            :  *   A pointer which was used to calculate offsets in src_table.
     285                 :            :  * @param src_table
     286                 :            :  *   A pointer to an array to compressed pointers.
     287                 :            :  * @param dest_table
     288                 :            :  *   A pointer to an array of decompressed pointers returned by this function.
     289                 :            :  * @param n
     290                 :            :  *   The number of objects to decompress, must be strictly positive.
     291                 :            :  * @param bit_shift
     292                 :            :  *   Byte alignment of memory pointed to by the pointers allows for
     293                 :            :  *   bits to be dropped from the offset and hence widen the memory region that
     294                 :            :  *   can be covered. This controls how many bits are left shifted when pointers
     295                 :            :  *   are recovered from the offsets.
     296                 :            :  **/
     297                 :            : static __rte_always_inline void
     298                 :            : rte_ptr_decompress_16_shift(void *ptr_base, uint16_t const *src_table,
     299                 :            :                 void **dest_table, size_t n, uint8_t bit_shift)
     300                 :            : {
     301                 :            :         size_t i = 0;
     302                 :            : #if defined RTE_HAS_SVE_ACLE && !defined RTE_ARCH_ARMv8_AARCH32
     303                 :            :         svuint64_t v_ptr_table;
     304                 :            :         do {
     305                 :            :                 svbool_t pg = svwhilelt_b64(i, n);
     306                 :            :                 v_ptr_table = svld1uh_u64(pg, &src_table[i]);
     307                 :            :                 v_ptr_table = svlsl_x(pg, v_ptr_table, bit_shift);
     308                 :            :                 v_ptr_table = svadd_x(pg, v_ptr_table, (uint64_t)ptr_base);
     309                 :            :                 svst1(pg, (uint64_t *)dest_table + i, v_ptr_table);
     310                 :            :                 i += svcntd();
     311                 :            :         } while (i < n);
     312                 :            : #else
     313                 :            :         uintptr_t ptr_diff;
     314   [ +  +  #  # ]:       1080 :         for (; i < n; i++) {
     315                 :        960 :                 ptr_diff = ((uintptr_t) src_table[i]) << bit_shift;
     316                 :        960 :                 dest_table[i] = RTE_PTR_ADD(ptr_base, ptr_diff);
     317                 :            :         }
     318                 :            : #endif
     319                 :            : }
     320                 :            : 
     321                 :            : #ifdef __cplusplus
     322                 :            : }
     323                 :            : #endif
     324                 :            : 
     325                 :            : #endif /* RTE_PTR_COMPRESS_H */

Generated by: LCOV version 1.14