LCOV - code coverage report
Current view: top level - drivers/net/mlx5 - mlx5_rxtx_vec_sse.h (source / functions) Hit Total Coverage
Test: Code coverage Lines: 0 262 0.0 %
Date: 2024-01-22 16:13:49 Functions: 0 3 0.0 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 0 128 0.0 %

           Branch data     Line data    Source code
       1                 :            : /* SPDX-License-Identifier: BSD-3-Clause
       2                 :            :  * Copyright 2017 6WIND S.A.
       3                 :            :  * Copyright 2017 Mellanox Technologies, Ltd
       4                 :            :  */
       5                 :            : 
       6                 :            : #ifndef RTE_PMD_MLX5_RXTX_VEC_SSE_H_
       7                 :            : #define RTE_PMD_MLX5_RXTX_VEC_SSE_H_
       8                 :            : 
       9                 :            : #include <stdint.h>
      10                 :            : #include <string.h>
      11                 :            : #include <stdlib.h>
      12                 :            : #include <smmintrin.h>
      13                 :            : 
      14                 :            : #include <rte_mbuf.h>
      15                 :            : #include <rte_mempool.h>
      16                 :            : #include <rte_prefetch.h>
      17                 :            : 
      18                 :            : #include <mlx5_prm.h>
      19                 :            : 
      20                 :            : #include "mlx5_defs.h"
      21                 :            : #include "mlx5.h"
      22                 :            : #include "mlx5_utils.h"
      23                 :            : #include "mlx5_rxtx.h"
      24                 :            : #include "mlx5_rxtx_vec.h"
      25                 :            : #include "mlx5_autoconf.h"
      26                 :            : 
      27                 :            : #ifndef __INTEL_COMPILER
      28                 :            : #pragma GCC diagnostic ignored "-Wcast-qual"
      29                 :            : #endif
      30                 :            : 
      31                 :            : /**
      32                 :            :  * Store free buffers to RX SW ring.
      33                 :            :  *
      34                 :            :  * @param elts
      35                 :            :  *   Pointer to SW ring to be filled.
      36                 :            :  * @param pkts
      37                 :            :  *   Pointer to array of packets to be stored.
      38                 :            :  * @param pkts_n
      39                 :            :  *   Number of packets to be stored.
      40                 :            :  */
      41                 :            : static inline void
      42                 :            : rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n)
      43                 :            : {
      44                 :            :         unsigned int pos;
      45                 :          0 :         uint16_t p = n & -2;
      46                 :            : 
      47   [ #  #  #  # ]:          0 :         for (pos = 0; pos < p; pos += 2) {
      48                 :            :                 __m128i mbp;
      49                 :            : 
      50                 :          0 :                 mbp = _mm_loadu_si128((__m128i *)&elts[pos]);
      51                 :          0 :                 _mm_storeu_si128((__m128i *)&pkts[pos], mbp);
      52                 :            :         }
      53   [ #  #  #  # ]:          0 :         if (n & 1)
      54                 :          0 :                 pkts[pos] = elts[pos];
      55                 :            : }
      56                 :            : 
      57                 :            : /**
      58                 :            :  * Decompress a compressed completion and fill in mbufs in RX SW ring with data
      59                 :            :  * extracted from the title completion descriptor.
      60                 :            :  *
      61                 :            :  * @param rxq
      62                 :            :  *   Pointer to RX queue structure.
      63                 :            :  * @param cq
      64                 :            :  *   Pointer to completion array having a compressed completion at first.
      65                 :            :  * @param elts
      66                 :            :  *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from
      67                 :            :  *   the title completion descriptor to be copied to the rest of mbufs.
      68                 :            :  *
      69                 :            :  * @return
      70                 :            :  *   Number of mini-CQEs successfully decompressed.
      71                 :            :  */
      72                 :            : static inline uint16_t
      73                 :          0 : rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
      74                 :            :                     struct rte_mbuf **elts)
      75                 :            : {
      76         [ #  # ]:          0 :         volatile struct mlx5_mini_cqe8 *mcq = (void *)(cq + !rxq->cqe_comp_layout);
      77                 :            :         /* Title packet is pre-built. */
      78         [ #  # ]:          0 :         struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
      79                 :            :         unsigned int pos;
      80                 :            :         unsigned int i;
      81                 :            :         unsigned int inv = 0;
      82                 :            :         /* Mask to shuffle from extracted mini CQE to mbuf. */
      83                 :            :         const __m128i shuf_mask1 =
      84                 :            :                 _mm_set_epi8(0,  1,  2,  3, /* rss, bswap32 */
      85                 :            :                             -1, -1,         /* skip vlan_tci */
      86                 :            :                              6,  7,         /* data_len, bswap16 */
      87                 :            :                             -1, -1,  6,  7, /* pkt_len, bswap16 */
      88                 :            :                             -1, -1, -1, -1  /* skip packet_type */);
      89                 :            :         const __m128i shuf_mask2 =
      90                 :            :                 _mm_set_epi8(8,  9, 10, 11, /* rss, bswap32 */
      91                 :            :                             -1, -1,         /* skip vlan_tci */
      92                 :            :                             14, 15,         /* data_len, bswap16 */
      93                 :            :                             -1, -1, 14, 15, /* pkt_len, bswap16 */
      94                 :            :                             -1, -1, -1, -1  /* skip packet_type */);
      95                 :            :         /* Restore the compressed count. Must be 16 bits. */
      96         [ #  # ]:          0 :         uint16_t mcqe_n = (rxq->cqe_comp_layout) ?
      97                 :          0 :                 (MLX5_CQE_NUM_MINIS(cq->op_own) + 1) :
      98                 :          0 :                 t_pkt->data_len + (rxq->crc_present * RTE_ETHER_CRC_LEN);
      99                 :            :         uint16_t pkts_n = mcqe_n;
     100                 :            :         const __m128i rearm =
     101                 :            :                 _mm_loadu_si128((__m128i *)&t_pkt->rearm_data);
     102                 :            :         const __m128i rxdf =
     103                 :            :                 _mm_loadu_si128((__m128i *)&t_pkt->rx_descriptor_fields1);
     104                 :            :         const __m128i crc_adj =
     105                 :          0 :                 _mm_set_epi16(0, 0, 0,
     106                 :            :                               rxq->crc_present * RTE_ETHER_CRC_LEN,
     107                 :            :                               0,
     108                 :          0 :                               rxq->crc_present * RTE_ETHER_CRC_LEN,
     109                 :            :                               0, 0);
     110                 :            :         __m128i ol_flags = _mm_setzero_si128();
     111                 :            :         __m128i ol_flags_mask = _mm_setzero_si128();
     112                 :            : #ifdef MLX5_PMD_SOFT_COUNTERS
     113                 :            :         const __m128i zero = _mm_setzero_si128();
     114                 :            :         const __m128i ones = _mm_cmpeq_epi32(zero, zero);
     115                 :            :         uint32_t rcvd_byte = 0;
     116                 :            :         /* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
     117                 :            :         const __m128i len_shuf_mask =
     118                 :            :                 _mm_set_epi8(-1, -1, -1, -1,
     119                 :            :                              -1, -1, -1, -1,
     120                 :            :                              14, 15,  6,  7,
     121                 :            :                              10, 11,  2,  3);
     122                 :            : #endif
     123                 :            :         /*
     124                 :            :          * A. load mCQEs into a 128bit register.
     125                 :            :          * B. store rearm data to mbuf.
     126                 :            :          * C. combine data from mCQEs with rx_descriptor_fields1.
     127                 :            :          * D. store rx_descriptor_fields1.
     128                 :            :          * E. store flow tag (rte_flow mark).
     129                 :            :          */
     130                 :          0 : cycle:
     131         [ #  # ]:          0 :         if (rxq->cqe_comp_layout)
     132                 :          0 :                 rte_prefetch0((void *)(cq + mcqe_n));
     133         [ #  # ]:          0 :         for (pos = 0; pos < mcqe_n; ) {
     134                 :            :                 __m128i mcqe1, mcqe2;
     135                 :            :                 __m128i rxdf1, rxdf2;
     136                 :            : #ifdef MLX5_PMD_SOFT_COUNTERS
     137                 :            :                 __m128i byte_cnt, invalid_mask;
     138                 :            : #endif
     139                 :            : 
     140         [ #  # ]:          0 :                 if (!rxq->cqe_comp_layout)
     141         [ #  # ]:          0 :                         for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
     142         [ #  # ]:          0 :                                 if (likely(pos + i < mcqe_n))
     143                 :          0 :                                         rte_prefetch0((void *)(cq + pos + i));
     144                 :            :                 /* A.1 load mCQEs into a 128bit register. */
     145         [ #  # ]:          0 :                 mcqe1 = _mm_loadu_si128((__m128i *)&mcq[pos % 8]);
     146                 :          0 :                 mcqe2 = _mm_loadu_si128((__m128i *)&mcq[pos % 8 + 2]);
     147                 :            :                 /* B.1 store rearm data to mbuf. */
     148         [ #  # ]:          0 :                 _mm_storeu_si128((__m128i *)&elts[pos]->rearm_data, rearm);
     149         [ #  # ]:          0 :                 _mm_storeu_si128((__m128i *)&elts[pos + 1]->rearm_data, rearm);
     150                 :            :                 /* C.1 combine data from mCQEs with rx_descriptor_fields1. */
     151                 :            :                 rxdf1 = _mm_shuffle_epi8(mcqe1, shuf_mask1);
     152                 :            :                 rxdf2 = _mm_shuffle_epi8(mcqe1, shuf_mask2);
     153                 :            :                 rxdf1 = _mm_sub_epi16(rxdf1, crc_adj);
     154                 :            :                 rxdf2 = _mm_sub_epi16(rxdf2, crc_adj);
     155                 :            :                 rxdf1 = _mm_blend_epi16(rxdf1, rxdf, 0x23);
     156                 :            :                 rxdf2 = _mm_blend_epi16(rxdf2, rxdf, 0x23);
     157                 :            :                 /* D.1 store rx_descriptor_fields1. */
     158                 :            :                 _mm_storeu_si128((__m128i *)
     159                 :          0 :                                   &elts[pos]->rx_descriptor_fields1,
     160                 :            :                                  rxdf1);
     161                 :            :                 _mm_storeu_si128((__m128i *)
     162                 :          0 :                                   &elts[pos + 1]->rx_descriptor_fields1,
     163                 :            :                                  rxdf2);
     164                 :            :                 /* B.1 store rearm data to mbuf. */
     165                 :          0 :                 _mm_storeu_si128((__m128i *)&elts[pos + 2]->rearm_data, rearm);
     166                 :          0 :                 _mm_storeu_si128((__m128i *)&elts[pos + 3]->rearm_data, rearm);
     167                 :            :                 /* C.1 combine data from mCQEs with rx_descriptor_fields1. */
     168                 :            :                 rxdf1 = _mm_shuffle_epi8(mcqe2, shuf_mask1);
     169                 :            :                 rxdf2 = _mm_shuffle_epi8(mcqe2, shuf_mask2);
     170                 :            :                 rxdf1 = _mm_sub_epi16(rxdf1, crc_adj);
     171                 :            :                 rxdf2 = _mm_sub_epi16(rxdf2, crc_adj);
     172                 :            :                 rxdf1 = _mm_blend_epi16(rxdf1, rxdf, 0x23);
     173                 :            :                 rxdf2 = _mm_blend_epi16(rxdf2, rxdf, 0x23);
     174                 :            :                 /* D.1 store rx_descriptor_fields1. */
     175                 :            :                 _mm_storeu_si128((__m128i *)
     176                 :          0 :                                   &elts[pos + 2]->rx_descriptor_fields1,
     177                 :            :                                  rxdf1);
     178                 :            :                 _mm_storeu_si128((__m128i *)
     179                 :          0 :                                   &elts[pos + 3]->rx_descriptor_fields1,
     180                 :            :                                  rxdf2);
     181                 :            : #ifdef MLX5_PMD_SOFT_COUNTERS
     182                 :          0 :                 invalid_mask = _mm_set_epi64x(0,
     183                 :          0 :                                               (mcqe_n - pos) *
     184         [ #  # ]:          0 :                                               sizeof(uint16_t) * 8);
     185                 :            :                 invalid_mask = _mm_sll_epi64(ones, invalid_mask);
     186                 :            :                 byte_cnt = _mm_blend_epi16(_mm_srli_si128(mcqe1, 4),
     187                 :            :                                            mcqe2, 0xcc);
     188                 :            :                 byte_cnt = _mm_shuffle_epi8(byte_cnt, len_shuf_mask);
     189                 :            :                 byte_cnt = _mm_andnot_si128(invalid_mask, byte_cnt);
     190                 :            :                 byte_cnt = _mm_hadd_epi16(byte_cnt, zero);
     191                 :          0 :                 rcvd_byte += _mm_cvtsi128_si64(_mm_hadd_epi16(byte_cnt, zero));
     192                 :            : #endif
     193         [ #  # ]:          0 :                 if (rxq->mark) {
     194         [ #  # ]:          0 :                         if (rxq->mcqe_format !=
     195                 :            :                                 MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) {
     196                 :          0 :                                 const uint32_t flow_tag = t_pkt->hash.fdir.hi;
     197                 :            : 
     198                 :            :                                 /* E.1 store flow tag (rte_flow mark). */
     199                 :          0 :                                 elts[pos]->hash.fdir.hi = flow_tag;
     200                 :          0 :                                 elts[pos + 1]->hash.fdir.hi = flow_tag;
     201                 :          0 :                                 elts[pos + 2]->hash.fdir.hi = flow_tag;
     202                 :          0 :                                 elts[pos + 3]->hash.fdir.hi = flow_tag;
     203                 :            :                         } else {
     204                 :            :                                 const __m128i flow_mark_adj =
     205                 :            :                                         _mm_set_epi32(-1, -1, -1, -1);
     206                 :            :                                 const __m128i flow_mark_shuf =
     207                 :            :                                         _mm_set_epi8(-1,  9,  8, 12,
     208                 :            :                                                      -1,  1,  0,  4,
     209                 :            :                                                      -1, -1, -1, -1,
     210                 :            :                                                      -1, -1, -1, -1);
     211                 :            :                                 const __m128i ft_mask =
     212                 :            :                                         _mm_set1_epi32(0xffffff00);
     213                 :            :                                 const __m128i fdir_flags =
     214                 :            :                                         _mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
     215                 :            :                                 const __m128i fdir_all_flags =
     216                 :          0 :                                         _mm_set1_epi32(RTE_MBUF_F_RX_FDIR |
     217                 :          0 :                                                        rxq->mark_flag);
     218                 :            :                                 __m128i fdir_id_flags =
     219                 :          0 :                                         _mm_set1_epi32(rxq->mark_flag);
     220                 :            : 
     221                 :            :                                 /* Extract flow_tag field. */
     222                 :            :                                 __m128i ftag0 =
     223                 :            :                                         _mm_shuffle_epi8(mcqe1, flow_mark_shuf);
     224                 :            :                                 __m128i ftag1 =
     225                 :            :                                         _mm_shuffle_epi8(mcqe2, flow_mark_shuf);
     226                 :            :                                 __m128i ftag =
     227                 :            :                                         _mm_unpackhi_epi64(ftag0, ftag1);
     228                 :            :                                 __m128i invalid_mask =
     229                 :            :                                         _mm_cmpeq_epi32(ftag, zero);
     230                 :            : 
     231                 :            :                                 ol_flags_mask = _mm_or_si128(ol_flags_mask,
     232                 :            :                                                              fdir_all_flags);
     233                 :            :                                 /* Set RTE_MBUF_F_RX_FDIR if flow tag is non-zero. */
     234                 :            :                                 ol_flags = _mm_or_si128(ol_flags,
     235                 :            :                                         _mm_andnot_si128(invalid_mask,
     236                 :            :                                                          fdir_flags));
     237                 :            :                                 /* Mask out invalid entries. */
     238                 :            :                                 fdir_id_flags = _mm_andnot_si128(invalid_mask,
     239                 :            :                                                                  fdir_id_flags);
     240                 :            :                                 /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
     241                 :            :                                 ol_flags = _mm_or_si128(ol_flags,
     242                 :            :                                         _mm_andnot_si128(_mm_cmpeq_epi32(ftag,
     243                 :            :                                                          ft_mask),
     244                 :            :                                         fdir_id_flags));
     245                 :            :                                 ftag = _mm_add_epi32(ftag, flow_mark_adj);
     246                 :          0 :                                 elts[pos]->hash.fdir.hi =
     247                 :          0 :                                                 _mm_extract_epi32(ftag, 0);
     248                 :          0 :                                 elts[pos + 1]->hash.fdir.hi =
     249                 :          0 :                                                 _mm_extract_epi32(ftag, 1);
     250                 :          0 :                                 elts[pos + 2]->hash.fdir.hi =
     251                 :          0 :                                                 _mm_extract_epi32(ftag, 2);
     252                 :          0 :                                 elts[pos + 3]->hash.fdir.hi =
     253                 :          0 :                                                 _mm_extract_epi32(ftag, 3);
     254                 :            :                         }
     255                 :            :                 }
     256         [ #  # ]:          0 :                 if (unlikely(rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)) {
     257         [ #  # ]:          0 :                         if (rxq->mcqe_format ==
     258                 :            :                             MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
     259                 :            :                                 const uint8_t pkt_info =
     260         [ #  # ]:          0 :                                         (cq->pkt_info & 0x3) << 6;
     261                 :            :                                 const uint8_t pkt_hdr0 =
     262                 :            :                                         _mm_extract_epi8(mcqe1, 0);
     263                 :            :                                 const uint8_t pkt_hdr1 =
     264                 :            :                                         _mm_extract_epi8(mcqe1, 8);
     265                 :            :                                 const uint8_t pkt_hdr2 =
     266                 :            :                                         _mm_extract_epi8(mcqe2, 0);
     267                 :            :                                 const uint8_t pkt_hdr3 =
     268                 :            :                                         _mm_extract_epi8(mcqe2, 8);
     269                 :            :                                 const __m128i vlan_mask =
     270                 :            :                                         _mm_set1_epi32(RTE_MBUF_F_RX_VLAN |
     271                 :            :                                                        RTE_MBUF_F_RX_VLAN_STRIPPED);
     272                 :            :                                 const __m128i cv_mask =
     273                 :            :                                         _mm_set1_epi32(MLX5_CQE_VLAN_STRIPPED);
     274                 :            :                                 const __m128i pkt_cv =
     275         [ #  # ]:          0 :                                         _mm_set_epi32(pkt_hdr0 & 0x1,
     276                 :            :                                                       pkt_hdr1 & 0x1,
     277                 :            :                                                       pkt_hdr2 & 0x1,
     278                 :            :                                                       pkt_hdr3 & 0x1);
     279                 :            : 
     280                 :            :                                 ol_flags_mask = _mm_or_si128(ol_flags_mask,
     281                 :            :                                                              vlan_mask);
     282                 :            :                                 ol_flags = _mm_or_si128(ol_flags,
     283                 :            :                                         _mm_and_si128(_mm_cmpeq_epi32(pkt_cv,
     284                 :            :                                         cv_mask), vlan_mask));
     285                 :          0 :                                 elts[pos]->packet_type =
     286                 :          0 :                                         mlx5_ptype_table[(pkt_hdr0 >> 2) |
     287                 :            :                                                          pkt_info];
     288                 :          0 :                                 elts[pos + 1]->packet_type =
     289                 :          0 :                                         mlx5_ptype_table[(pkt_hdr1 >> 2) |
     290                 :            :                                                          pkt_info];
     291                 :          0 :                                 elts[pos + 2]->packet_type =
     292                 :          0 :                                         mlx5_ptype_table[(pkt_hdr2 >> 2) |
     293                 :            :                                                          pkt_info];
     294                 :          0 :                                 elts[pos + 3]->packet_type =
     295                 :          0 :                                         mlx5_ptype_table[(pkt_hdr3 >> 2) |
     296                 :            :                                                          pkt_info];
     297         [ #  # ]:          0 :                                 if (rxq->tunnel) {
     298                 :          0 :                                         elts[pos]->packet_type |=
     299                 :          0 :                                                 !!(((pkt_hdr0 >> 2) |
     300                 :            :                                                 pkt_info) & (1 << 6));
     301                 :          0 :                                         elts[pos + 1]->packet_type |=
     302                 :          0 :                                                 !!(((pkt_hdr1 >> 2) |
     303                 :            :                                                 pkt_info) & (1 << 6));
     304                 :          0 :                                         elts[pos + 2]->packet_type |=
     305                 :          0 :                                                 !!(((pkt_hdr2 >> 2) |
     306                 :            :                                                 pkt_info) & (1 << 6));
     307                 :          0 :                                         elts[pos + 3]->packet_type |=
     308                 :          0 :                                                 !!(((pkt_hdr3 >> 2) |
     309                 :            :                                                 pkt_info) & (1 << 6));
     310                 :            :                                 }
     311                 :            :                         }
     312                 :            :                         const __m128i hash_flags =
     313                 :            :                                 _mm_set1_epi32(RTE_MBUF_F_RX_RSS_HASH);
     314                 :            :                         const __m128i rearm_flags =
     315                 :          0 :                                 _mm_set1_epi32((uint32_t)t_pkt->ol_flags);
     316                 :            : 
     317                 :            :                         ol_flags_mask = _mm_or_si128(ol_flags_mask, hash_flags);
     318                 :            :                         ol_flags = _mm_or_si128(ol_flags,
     319                 :            :                                 _mm_andnot_si128(ol_flags_mask, rearm_flags));
     320                 :          0 :                         elts[pos]->ol_flags =
     321                 :          0 :                                 _mm_extract_epi32(ol_flags, 0);
     322                 :          0 :                         elts[pos + 1]->ol_flags =
     323                 :          0 :                                 _mm_extract_epi32(ol_flags, 1);
     324                 :          0 :                         elts[pos + 2]->ol_flags =
     325                 :          0 :                                 _mm_extract_epi32(ol_flags, 2);
     326                 :          0 :                         elts[pos + 3]->ol_flags =
     327                 :          0 :                                 _mm_extract_epi32(ol_flags, 3);
     328                 :          0 :                         elts[pos]->hash.rss = 0;
     329                 :          0 :                         elts[pos + 1]->hash.rss = 0;
     330                 :          0 :                         elts[pos + 2]->hash.rss = 0;
     331                 :          0 :                         elts[pos + 3]->hash.rss = 0;
     332                 :            :                 }
     333         [ #  # ]:          0 :                 if (rxq->dynf_meta) {
     334                 :          0 :                         int32_t offs = rxq->flow_meta_offset;
     335                 :          0 :                         const uint32_t meta =
     336                 :          0 :                                 *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *);
     337                 :            : 
     338                 :            :                         /* Check if title packet has valid metadata. */
     339         [ #  # ]:          0 :                         if (meta) {
     340                 :            :                                 MLX5_ASSERT(t_pkt->ol_flags &
     341                 :            :                                             rxq->flow_meta_mask);
     342                 :          0 :                                 *RTE_MBUF_DYNFIELD(elts[pos], offs,
     343                 :          0 :                                                         uint32_t *) = meta;
     344                 :          0 :                                 *RTE_MBUF_DYNFIELD(elts[pos + 1], offs,
     345                 :          0 :                                                         uint32_t *) = meta;
     346                 :          0 :                                 *RTE_MBUF_DYNFIELD(elts[pos + 2], offs,
     347                 :          0 :                                                         uint32_t *) = meta;
     348                 :          0 :                                 *RTE_MBUF_DYNFIELD(elts[pos + 3], offs,
     349                 :          0 :                                                         uint32_t *) = meta;
     350                 :            :                         }
     351                 :            :                 }
     352                 :          0 :                 pos += MLX5_VPMD_DESCS_PER_LOOP;
     353                 :            :                 /* Move to next CQE and invalidate consumed CQEs. */
     354         [ #  # ]:          0 :                 if (!rxq->cqe_comp_layout) {
     355   [ #  #  #  # ]:          0 :                         if (!(pos & 0x7) && pos < mcqe_n) {
     356         [ #  # ]:          0 :                                 if (pos + 8 < mcqe_n)
     357                 :          0 :                                         rte_prefetch0((void *)(cq + pos + 8));
     358                 :          0 :                                 mcq = (void *)(cq + pos);
     359         [ #  # ]:          0 :                                 for (i = 0; i < 8; ++i)
     360                 :          0 :                                         cq[inv++].op_own = MLX5_CQE_INVALIDATE;
     361                 :            :                         }
     362                 :            :                 }
     363                 :            :         }
     364         [ #  # ]:          0 :         if (rxq->cqe_comp_layout) {
     365                 :            :                 int ret;
     366                 :            :                 /* Keep unzipping if the next CQE is the miniCQE array. */
     367                 :          0 :                 cq = &cq[mcqe_n];
     368         [ #  # ]:          0 :                 ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci + pkts_n);
     369                 :          0 :                 if (ret == MLX5_CQE_STATUS_SW_OWN &&
     370         [ #  # ]:          0 :                     MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED) {
     371                 :            :                         pos = 0;
     372                 :          0 :                         elts = &elts[mcqe_n];
     373                 :            :                         mcq = (void *)cq;
     374                 :          0 :                         mcqe_n = MLX5_CQE_NUM_MINIS(cq->op_own) + 1;
     375                 :          0 :                         pkts_n += mcqe_n;
     376                 :          0 :                         goto cycle;
     377                 :            :                 }
     378                 :            :         } else {
     379                 :            :                 /* Invalidate the rest of CQEs. */
     380         [ #  # ]:          0 :                 for (; inv < pkts_n; ++inv)
     381                 :          0 :                         cq[inv].op_own = MLX5_CQE_INVALIDATE;
     382                 :            :         }
     383                 :            : #ifdef MLX5_PMD_SOFT_COUNTERS
     384                 :          0 :         rxq->stats.ipackets += pkts_n;
     385                 :          0 :         rxq->stats.ibytes += rcvd_byte;
     386                 :            : #endif
     387                 :          0 :         return pkts_n;
     388                 :            : }
     389                 :            : 
     390                 :            : /**
     391                 :            :  * Calculate packet type and offload flag for mbuf and store it.
     392                 :            :  *
     393                 :            :  * @param rxq
     394                 :            :  *   Pointer to RX queue structure.
     395                 :            :  * @param cqes[4]
     396                 :            :  *   Array of four 16bytes completions extracted from the original completion
     397                 :            :  *   descriptor.
     398                 :            :  * @param op_err
     399                 :            :  *   Opcode vector having responder error status. Each field is 4B.
     400                 :            :  * @param pkts
     401                 :            :  *   Pointer to array of packets to be filled.
     402                 :            :  */
     403                 :            : static inline void
     404                 :          0 : rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
     405                 :            :                          __m128i op_err, struct rte_mbuf **pkts)
     406                 :            : {
     407                 :            :         __m128i pinfo0, pinfo1;
     408                 :            :         __m128i pinfo, ptype;
     409                 :          0 :         __m128i ol_flags = _mm_set1_epi32(rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH |
     410         [ #  # ]:          0 :                                           rxq->hw_timestamp * rxq->timestamp_rx_flag);
     411                 :            :         __m128i cv_flags;
     412                 :            :         const __m128i zero = _mm_setzero_si128();
     413                 :            :         const __m128i ptype_mask = _mm_set1_epi32(0xfd06);
     414                 :            :         const __m128i ptype_ol_mask = _mm_set1_epi32(0x106);
     415                 :            :         const __m128i pinfo_mask = _mm_set1_epi32(0x3);
     416                 :            :         const __m128i cv_flag_sel =
     417                 :            :                 _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0,
     418                 :            :                              (uint8_t)((RTE_MBUF_F_RX_IP_CKSUM_GOOD |
     419                 :            :                                         RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
     420                 :            :                              0,
     421                 :            :                              (uint8_t)(RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1),
     422                 :            :                              0,
     423                 :            :                              (uint8_t)(RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1),
     424                 :            :                              (uint8_t)(RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED),
     425                 :            :                              0);
     426                 :            :         const __m128i cv_mask =
     427                 :            :                 _mm_set1_epi32(RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
     428                 :            :                                RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
     429                 :            :         const __m128i mbuf_init =
     430                 :            :                 _mm_load_si128((__m128i *)&rxq->mbuf_initializer);
     431                 :            :         __m128i rearm0, rearm1, rearm2, rearm3;
     432                 :            :         uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
     433                 :            : 
     434                 :            :         /* Extract pkt_info field. */
     435         [ #  # ]:          0 :         pinfo0 = _mm_unpacklo_epi32(cqes[0], cqes[1]);
     436         [ #  # ]:          0 :         pinfo1 = _mm_unpacklo_epi32(cqes[2], cqes[3]);
     437                 :            :         pinfo = _mm_unpacklo_epi64(pinfo0, pinfo1);
     438                 :            :         /* Extract hdr_type_etc field. */
     439                 :            :         pinfo0 = _mm_unpackhi_epi32(cqes[0], cqes[1]);
     440                 :            :         pinfo1 = _mm_unpackhi_epi32(cqes[2], cqes[3]);
     441                 :            :         ptype = _mm_unpacklo_epi64(pinfo0, pinfo1);
     442         [ #  # ]:          0 :         if (rxq->mark) {
     443                 :            :                 const __m128i pinfo_ft_mask = _mm_set1_epi32(0xffffff00);
     444                 :            :                 const __m128i fdir_flags = _mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
     445                 :          0 :                 __m128i fdir_id_flags = _mm_set1_epi32(rxq->mark_flag);
     446                 :            :                 __m128i flow_tag, invalid_mask;
     447                 :            : 
     448                 :            :                 flow_tag = _mm_and_si128(pinfo, pinfo_ft_mask);
     449                 :            :                 /* Check if flow tag is non-zero then set RTE_MBUF_F_RX_FDIR. */
     450                 :            :                 invalid_mask = _mm_cmpeq_epi32(flow_tag, zero);
     451                 :            :                 ol_flags = _mm_or_si128(ol_flags,
     452                 :            :                                         _mm_andnot_si128(invalid_mask,
     453                 :            :                                                          fdir_flags));
     454                 :            :                 /* Mask out invalid entries. */
     455                 :            :                 fdir_id_flags = _mm_andnot_si128(invalid_mask, fdir_id_flags);
     456                 :            :                 /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
     457                 :            :                 ol_flags = _mm_or_si128(ol_flags,
     458                 :            :                                         _mm_andnot_si128(
     459                 :            :                                                 _mm_cmpeq_epi32(flow_tag,
     460                 :            :                                                                 pinfo_ft_mask),
     461                 :            :                                                 fdir_id_flags));
     462                 :            :         }
     463                 :            :         /*
     464                 :            :          * Merge the two fields to generate the following:
     465                 :            :          * bit[1]     = l3_ok
     466                 :            :          * bit[2]     = l4_ok
     467                 :            :          * bit[8]     = cv
     468                 :            :          * bit[11:10] = l3_hdr_type
     469                 :            :          * bit[14:12] = l4_hdr_type
     470                 :            :          * bit[15]    = ip_frag
     471                 :            :          * bit[16]    = tunneled
     472                 :            :          * bit[17]    = outer_l3_type
     473                 :            :          */
     474                 :            :         ptype = _mm_and_si128(ptype, ptype_mask);
     475                 :            :         pinfo = _mm_and_si128(pinfo, pinfo_mask);
     476                 :            :         pinfo = _mm_slli_epi32(pinfo, 16);
     477                 :            :         /* Make pinfo has merged fields for ol_flags calculation. */
     478                 :            :         pinfo = _mm_or_si128(ptype, pinfo);
     479                 :            :         ptype = _mm_srli_epi32(pinfo, 10);
     480                 :            :         ptype = _mm_packs_epi32(ptype, zero);
     481                 :            :         /* Errored packets will have RTE_PTYPE_ALL_MASK. */
     482                 :            :         op_err = _mm_srli_epi16(op_err, 8);
     483                 :            :         ptype = _mm_or_si128(ptype, op_err);
     484                 :            :         pt_idx0 = _mm_extract_epi8(ptype, 0);
     485                 :            :         pt_idx1 = _mm_extract_epi8(ptype, 2);
     486                 :            :         pt_idx2 = _mm_extract_epi8(ptype, 4);
     487                 :            :         pt_idx3 = _mm_extract_epi8(ptype, 6);
     488                 :          0 :         pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |
     489                 :          0 :                                !!(pt_idx0 & (1 << 6)) * rxq->tunnel;
     490                 :          0 :         pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |
     491                 :          0 :                                !!(pt_idx1 & (1 << 6)) * rxq->tunnel;
     492                 :          0 :         pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |
     493                 :          0 :                                !!(pt_idx2 & (1 << 6)) * rxq->tunnel;
     494                 :          0 :         pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |
     495                 :          0 :                                !!(pt_idx3 & (1 << 6)) * rxq->tunnel;
     496                 :            :         /* Fill flags for checksum and VLAN. */
     497                 :            :         pinfo = _mm_and_si128(pinfo, ptype_ol_mask);
     498                 :            :         pinfo = _mm_shuffle_epi8(cv_flag_sel, pinfo);
     499                 :            :         /* Locate checksum flags at byte[2:1] and merge with VLAN flags. */
     500                 :            :         cv_flags = _mm_slli_epi32(pinfo, 9);
     501                 :            :         cv_flags = _mm_or_si128(pinfo, cv_flags);
     502                 :            :         /* Move back flags to start from byte[0]. */
     503                 :            :         cv_flags = _mm_srli_epi32(cv_flags, 8);
     504                 :            :         /* Mask out garbage bits. */
     505                 :            :         cv_flags = _mm_and_si128(cv_flags, cv_mask);
     506                 :            :         /* Merge to ol_flags. */
     507                 :            :         ol_flags = _mm_or_si128(ol_flags, cv_flags);
     508                 :            :         /* Merge mbuf_init and ol_flags. */
     509                 :            :         rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(ol_flags, 8), 0x30);
     510                 :            :         rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(ol_flags, 4), 0x30);
     511                 :            :         rearm2 = _mm_blend_epi16(mbuf_init, ol_flags, 0x30);
     512                 :            :         rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(ol_flags, 4), 0x30);
     513                 :            :         /* Write 8B rearm_data and 8B ol_flags. */
     514                 :            :         _mm_store_si128((__m128i *)&pkts[0]->rearm_data, rearm0);
     515                 :          0 :         _mm_store_si128((__m128i *)&pkts[1]->rearm_data, rearm1);
     516                 :          0 :         _mm_store_si128((__m128i *)&pkts[2]->rearm_data, rearm2);
     517                 :          0 :         _mm_store_si128((__m128i *)&pkts[3]->rearm_data, rearm3);
     518                 :          0 : }
     519                 :            : 
     520                 :            : /**
     521                 :            :  * Process a non-compressed completion and fill in mbufs in RX SW ring
     522                 :            :  * with data extracted from the title completion descriptor.
     523                 :            :  *
     524                 :            :  * @param rxq
     525                 :            :  *   Pointer to RX queue structure.
     526                 :            :  * @param cq
     527                 :            :  *   Pointer to completion array having a non-compressed completion at first.
     528                 :            :  * @param elts
     529                 :            :  *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from
     530                 :            :  *   the title completion descriptor to be copied to the rest of mbufs.
     531                 :            :  * @param[out] pkts
     532                 :            :  *   Array to store received packets.
     533                 :            :  * @param pkts_n
     534                 :            :  *   Maximum number of packets in array.
     535                 :            :  * @param[out] err
     536                 :            :  *   Pointer to a flag. Set non-zero value if pkts array has at least one error
     537                 :            :  *   packet to handle.
     538                 :            :  * @param[out] comp
     539                 :            :  *   Pointer to a index. Set it to the first compressed completion if any.
     540                 :            :  *
     541                 :            :  * @return
     542                 :            :  *   Number of CQEs successfully processed.
     543                 :            :  */
     544                 :            : static inline uint16_t
     545                 :          0 : rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
     546                 :            :                  struct rte_mbuf **elts, struct rte_mbuf **pkts,
     547                 :            :                  uint16_t pkts_n, uint64_t *err, uint64_t *comp)
     548                 :            : {
     549                 :          0 :         const uint16_t q_n = 1 << rxq->cqe_n;
     550                 :          0 :         const uint16_t q_mask = q_n - 1;
     551                 :            :         unsigned int pos, adj;
     552                 :            :         uint64_t n = 0;
     553                 :            :         uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
     554                 :            :         uint16_t nocmp_n = 0;
     555                 :          0 :         const uint8_t vic = rxq->cq_ci >> rxq->cqe_n;
     556                 :          0 :         const uint8_t own = !(rxq->cq_ci & (q_mask + 1));
     557                 :            :         const __m128i vic_check = _mm_set1_epi64x(0x00ff000000ff0000LL);
     558                 :            :         const __m128i owner_check =     _mm_set1_epi64x(0x0100000001000000LL);
     559                 :            :         const __m128i opcode_check = _mm_set1_epi64x(0xf0000000f0000000LL);
     560                 :            :         const __m128i format_check = _mm_set1_epi64x(0x0c0000000c000000LL);
     561                 :            :         const __m128i resp_err_check = _mm_set1_epi64x(0xe0000000e0000000LL);
     562                 :            : #ifdef MLX5_PMD_SOFT_COUNTERS
     563                 :            :         uint32_t rcvd_byte = 0;
     564                 :            :         /* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
     565                 :            :         const __m128i len_shuf_mask =
     566                 :            :                 _mm_set_epi8(-1, -1, -1, -1,
     567                 :            :                              -1, -1, -1, -1,
     568                 :            :                              12, 13,  8,  9,
     569                 :            :                               4,  5,  0,  1);
     570                 :            : #endif
     571                 :            :         const __m128i validity =
     572                 :          0 :                 _mm_set_epi8(0, vic, 0, 0,
     573                 :            :                              0, vic, 0, 0,
     574                 :            :                              0, vic, 0, 0,
     575                 :            :                              0, vic, 0, 0);
     576                 :            :         const __m128i ownership =
     577                 :          0 :                 _mm_set_epi8(own, 0, 0, 0,
     578                 :            :                              own, 0, 0, 0,
     579                 :            :                              own, 0, 0, 0,
     580                 :            :                              own, 0, 0, 0);
     581                 :            :         /* Mask to shuffle from extracted CQE to mbuf. */
     582                 :            :         const __m128i shuf_mask =
     583                 :            :                 _mm_set_epi8(-1,  3,  2,  1, /* fdir.hi */
     584                 :            :                              12, 13, 14, 15, /* rss, bswap32 */
     585                 :            :                              10, 11,         /* vlan_tci, bswap16 */
     586                 :            :                               4,  5,         /* data_len, bswap16 */
     587                 :            :                              -1, -1,         /* zero out 2nd half of pkt_len */
     588                 :            :                               4,  5          /* pkt_len, bswap16 */);
     589                 :            :         /* Mask to blend from the last Qword to the first DQword. */
     590                 :            :         const __m128i blend_mask =
     591                 :            :                 _mm_set_epi8(-1, -1, -1, -1,
     592                 :            :                              -1, -1, -1, -1,
     593                 :            :                               0,  0,  0,  0,
     594                 :            :                               0,  0,  0, -1);
     595                 :            :         const __m128i zero = _mm_setzero_si128();
     596                 :            :         const __m128i ones = _mm_cmpeq_epi32(zero, zero);
     597                 :            :         const __m128i crc_adj =
     598                 :          0 :                 _mm_set_epi16(0, 0, 0, 0, 0,
     599                 :            :                               rxq->crc_present * RTE_ETHER_CRC_LEN,
     600                 :            :                               0,
     601                 :          0 :                               rxq->crc_present * RTE_ETHER_CRC_LEN);
     602                 :          0 :         const __m128i flow_mark_adj = _mm_set_epi32(rxq->mark * (-1), 0, 0, 0);
     603                 :            :         /*
     604                 :            :          * A. load first Qword (8bytes) in one loop.
     605                 :            :          * B. copy 4 mbuf pointers from elts ring to returning pkts.
     606                 :            :          * C. load remained CQE data and extract necessary fields.
     607                 :            :          *    Final 16bytes cqes[] extracted from original 64bytes CQE has the
     608                 :            :          *    following structure:
     609                 :            :          *        struct {
     610                 :            :          *          uint8_t  pkt_info;
     611                 :            :          *          uint8_t  flow_tag[3];
     612                 :            :          *          uint16_t byte_cnt;
     613                 :            :          *          uint8_t  validity_iteration_count;
     614                 :            :          *          uint8_t  op_own;
     615                 :            :          *          uint16_t hdr_type_etc;
     616                 :            :          *          uint16_t vlan_info;
     617                 :            :          *          uint32_t rx_has_res;
     618                 :            :          *        } c;
     619                 :            :          * D. fill in mbuf.
     620                 :            :          * E. get valid CQEs.
     621                 :            :          * F. find compressed CQE.
     622                 :            :          */
     623                 :          0 :         for (pos = 0;
     624         [ #  # ]:          0 :              pos < pkts_n;
     625                 :          0 :              pos += MLX5_VPMD_DESCS_PER_LOOP) {
     626                 :            :                 __m128i cqes[MLX5_VPMD_DESCS_PER_LOOP];
     627                 :            :                 __m128i cqe_tmp1, cqe_tmp2;
     628                 :            :                 __m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
     629                 :            :                 __m128i op_own, op_own_tmp1, op_own_tmp2;
     630                 :            :                 __m128i opcode, owner_mask, invalid_mask;
     631                 :            :                 __m128i comp_mask, mini_mask;
     632                 :            :                 __m128i mask;
     633                 :            : #ifdef MLX5_PMD_SOFT_COUNTERS
     634                 :            :                 __m128i byte_cnt;
     635                 :            : #endif
     636                 :            :                 __m128i mbp1, mbp2;
     637                 :            :                 __m128i p = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
     638                 :            :                 unsigned int p1, p2, p3;
     639                 :            : 
     640                 :            :                 /* Prefetch next 4 CQEs. */
     641         [ #  # ]:          0 :                 if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
     642                 :          0 :                         rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP]);
     643                 :          0 :                         rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 1]);
     644                 :          0 :                         rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 2]);
     645                 :          0 :                         rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 3]);
     646                 :            :                 }
     647                 :            :                 /* A.0 do not cross the end of CQ. */
     648                 :          0 :                 mask = _mm_set_epi64x(0, (pkts_n - pos) * sizeof(uint16_t) * 8);
     649                 :            :                 mask = _mm_sll_epi64(ones, mask);
     650                 :            :                 p = _mm_andnot_si128(mask, p);
     651                 :            :                 /* A.1 load cqes. */
     652                 :          0 :                 p3 = _mm_extract_epi16(p, 3);
     653                 :          0 :                 cqes[3] = _mm_loadl_epi64((__m128i *)
     654                 :          0 :                                            &cq[pos + p3].sop_drop_qpn);
     655                 :          0 :                 rte_compiler_barrier();
     656                 :          0 :                 p2 = _mm_extract_epi16(p, 2);
     657                 :          0 :                 cqes[2] = _mm_loadl_epi64((__m128i *)
     658                 :          0 :                                            &cq[pos + p2].sop_drop_qpn);
     659                 :          0 :                 rte_compiler_barrier();
     660                 :            :                 /* B.1 load mbuf pointers. */
     661                 :          0 :                 mbp1 = _mm_loadu_si128((__m128i *)&elts[pos]);
     662                 :          0 :                 mbp2 = _mm_loadu_si128((__m128i *)&elts[pos + 2]);
     663                 :            :                 /* A.1 load a block having op_own. */
     664                 :          0 :                 p1 = _mm_extract_epi16(p, 1);
     665                 :          0 :                 cqes[1] = _mm_loadl_epi64((__m128i *)
     666                 :          0 :                                            &cq[pos + p1].sop_drop_qpn);
     667                 :          0 :                 rte_compiler_barrier();
     668                 :          0 :                 cqes[0] = _mm_loadl_epi64((__m128i *)
     669                 :          0 :                                            &cq[pos].sop_drop_qpn);
     670                 :            :                 /* B.2 copy mbuf pointers. */
     671                 :          0 :                 _mm_storeu_si128((__m128i *)&pkts[pos], mbp1);
     672                 :          0 :                 _mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2);
     673                 :          0 :                 rte_io_rmb();
     674                 :            :                 /* C.1 load remained CQE data and extract necessary fields. */
     675                 :            :                 cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]);
     676                 :            :                 cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]);
     677         [ #  # ]:          0 :                 cqes[3] = _mm_blendv_epi8(cqes[3], cqe_tmp2, blend_mask);
     678         [ #  # ]:          0 :                 cqes[2] = _mm_blendv_epi8(cqes[2], cqe_tmp1, blend_mask);
     679                 :            :                 cqe_tmp2 = _mm_loadu_si128((__m128i *)&cq[pos + p3].csum);
     680                 :            :                 cqe_tmp1 = _mm_loadu_si128((__m128i *)&cq[pos + p2].csum);
     681                 :          0 :                 cqes[3] = _mm_blend_epi16(cqes[3], cqe_tmp2, 0x30);
     682         [ #  # ]:          0 :                 cqes[2] = _mm_blend_epi16(cqes[2], cqe_tmp1, 0x30);
     683                 :            :                 cqe_tmp2 = _mm_loadl_epi64((__m128i *)&cq[pos + p3].rsvd4[2]);
     684                 :            :                 cqe_tmp1 = _mm_loadl_epi64((__m128i *)&cq[pos + p2].rsvd4[2]);
     685                 :          0 :                 cqes[3] = _mm_blend_epi16(cqes[3], cqe_tmp2, 0x04);
     686         [ #  # ]:          0 :                 cqes[2] = _mm_blend_epi16(cqes[2], cqe_tmp1, 0x04);
     687                 :            :                 /* C.2 generate final structure for mbuf with swapping bytes. */
     688                 :            :                 pkt_mb3 = _mm_shuffle_epi8(cqes[3], shuf_mask);
     689                 :            :                 pkt_mb2 = _mm_shuffle_epi8(cqes[2], shuf_mask);
     690                 :            :                 /* C.3 adjust CRC length. */
     691                 :            :                 pkt_mb3 = _mm_sub_epi16(pkt_mb3, crc_adj);
     692                 :            :                 pkt_mb2 = _mm_sub_epi16(pkt_mb2, crc_adj);
     693                 :            :                 /* C.4 adjust flow mark. */
     694                 :            :                 pkt_mb3 = _mm_add_epi32(pkt_mb3, flow_mark_adj);
     695                 :            :                 pkt_mb2 = _mm_add_epi32(pkt_mb2, flow_mark_adj);
     696                 :            :                 /* D.1 fill in mbuf - rx_descriptor_fields1. */
     697         [ #  # ]:          0 :                 _mm_storeu_si128((void *)&pkts[pos + 3]->pkt_len, pkt_mb3);
     698                 :          0 :                 _mm_storeu_si128((void *)&pkts[pos + 2]->pkt_len, pkt_mb2);
     699                 :            :                 /* E.1 extract op_own field. */
     700         [ #  # ]:          0 :                 op_own_tmp2 = _mm_unpacklo_epi32(cqes[2], cqes[3]);
     701                 :            :                 /* C.1 load remained CQE data and extract necessary fields. */
     702                 :            :                 cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p1]);
     703                 :            :                 cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos]);
     704                 :          0 :                 cqes[1] = _mm_blendv_epi8(cqes[1], cqe_tmp2, blend_mask);
     705                 :          0 :                 cqes[0] = _mm_blendv_epi8(cqes[0], cqe_tmp1, blend_mask);
     706                 :            :                 cqe_tmp2 = _mm_loadu_si128((__m128i *)&cq[pos + p1].csum);
     707                 :            :                 cqe_tmp1 = _mm_loadu_si128((__m128i *)&cq[pos].csum);
     708                 :            :                 cqes[1] = _mm_blend_epi16(cqes[1], cqe_tmp2, 0x30);
     709                 :            :                 cqes[0] = _mm_blend_epi16(cqes[0], cqe_tmp1, 0x30);
     710                 :            :                 cqe_tmp2 = _mm_loadl_epi64((__m128i *)&cq[pos + p1].rsvd4[2]);
     711                 :            :                 cqe_tmp1 = _mm_loadl_epi64((__m128i *)&cq[pos].rsvd4[2]);
     712                 :          0 :                 cqes[1] = _mm_blend_epi16(cqes[1], cqe_tmp2, 0x04);
     713         [ #  # ]:          0 :                 cqes[0] = _mm_blend_epi16(cqes[0], cqe_tmp1, 0x04);
     714                 :            :                 /* C.2 generate final structure for mbuf with swapping bytes. */
     715                 :            :                 pkt_mb1 = _mm_shuffle_epi8(cqes[1], shuf_mask);
     716                 :            :                 pkt_mb0 = _mm_shuffle_epi8(cqes[0], shuf_mask);
     717                 :            :                 /* C.3 adjust CRC length. */
     718                 :            :                 pkt_mb1 = _mm_sub_epi16(pkt_mb1, crc_adj);
     719                 :            :                 pkt_mb0 = _mm_sub_epi16(pkt_mb0, crc_adj);
     720                 :            :                 /* C.4 adjust flow mark. */
     721                 :            :                 pkt_mb1 = _mm_add_epi32(pkt_mb1, flow_mark_adj);
     722                 :            :                 pkt_mb0 = _mm_add_epi32(pkt_mb0, flow_mark_adj);
     723                 :            :                 /* E.1 extract op_own byte. */
     724                 :            :                 op_own_tmp1 = _mm_unpacklo_epi32(cqes[0], cqes[1]);
     725                 :            :                 op_own = _mm_unpackhi_epi64(op_own_tmp1, op_own_tmp2);
     726                 :            :                 /* D.1 fill in mbuf - rx_descriptor_fields1. */
     727                 :          0 :                 _mm_storeu_si128((void *)&pkts[pos + 1]->pkt_len, pkt_mb1);
     728                 :          0 :                 _mm_storeu_si128((void *)&pkts[pos]->pkt_len, pkt_mb0);
     729                 :            :                 /* E.2 mask out CQEs belonging to HW. */
     730         [ #  # ]:          0 :                 if (rxq->cqe_comp_layout) {
     731                 :            :                         owner_mask = _mm_and_si128(op_own, vic_check);
     732                 :            :                         owner_mask = _mm_cmpeq_epi32(owner_mask, validity);
     733                 :            :                         owner_mask = _mm_xor_si128(owner_mask, ones);
     734                 :            :                 } else {
     735                 :            :                         owner_mask = _mm_and_si128(op_own, owner_check);
     736                 :            :                         owner_mask = _mm_cmpeq_epi32(owner_mask, ownership);
     737                 :            :                 }
     738                 :            :                 owner_mask = _mm_packs_epi32(owner_mask, zero);
     739                 :            :                 /* E.3 get mask for invalidated CQEs. */
     740                 :            :                 opcode = _mm_and_si128(op_own, opcode_check);
     741                 :            :                 invalid_mask = _mm_cmpeq_epi32(opcode_check, opcode);
     742                 :            :                 invalid_mask = _mm_packs_epi32(invalid_mask, zero);
     743                 :            :                 /* E.4 mask out beyond boundary. */
     744                 :            :                 invalid_mask = _mm_or_si128(invalid_mask, mask);
     745                 :            :                 /* E.5 merge invalid_mask with invalid owner. */
     746                 :            :                 invalid_mask = _mm_or_si128(invalid_mask, owner_mask);
     747                 :            :                 /* F.1 find compressed CQE format. */
     748                 :            :                 comp_mask = _mm_and_si128(op_own, format_check);
     749                 :            :                 comp_mask = _mm_cmpeq_epi32(comp_mask, format_check);
     750                 :            :                 comp_mask = _mm_packs_epi32(comp_mask, zero);
     751                 :            :                 /* F.2 mask out invalid entries. */
     752                 :            :                 comp_mask = _mm_andnot_si128(invalid_mask, comp_mask);
     753                 :          0 :                 comp_idx = _mm_cvtsi128_si64(comp_mask);
     754                 :            :                 /* F.3 get the first compressed CQE. */
     755                 :            :                 comp_idx = comp_idx ?
     756                 :          0 :                                 rte_ctz64(comp_idx) /
     757         [ #  # ]:          0 :                                         (sizeof(uint16_t) * 8) :
     758                 :            :                                 MLX5_VPMD_DESCS_PER_LOOP;
     759                 :            :                 /* E.6 mask out entries after the compressed CQE. */
     760         [ #  # ]:          0 :                 mask = _mm_set_epi64x(0, comp_idx * sizeof(uint16_t) * 8);
     761                 :            :                 mask = _mm_sll_epi64(ones, mask);
     762                 :            :                 invalid_mask = _mm_or_si128(invalid_mask, mask);
     763                 :            :                 /* E.7 count non-compressed valid CQEs. */
     764                 :          0 :                 n = _mm_cvtsi128_si64(invalid_mask);
     765         [ #  # ]:          0 :                 n = n ? rte_ctz64(n) / (sizeof(uint16_t) * 8) :
     766                 :            :                         MLX5_VPMD_DESCS_PER_LOOP;
     767                 :          0 :                 nocmp_n += n;
     768                 :            :                 /* D.2 get the final invalid mask. */
     769         [ #  # ]:          0 :                 mask = _mm_set_epi64x(0, n * sizeof(uint16_t) * 8);
     770                 :            :                 mask = _mm_sll_epi64(ones, mask);
     771                 :            :                 invalid_mask = _mm_or_si128(invalid_mask, mask);
     772                 :            :                 /* D.3 check error in opcode. */
     773                 :            :                 adj = (!rxq->cqe_comp_layout &&
     774   [ #  #  #  # ]:          0 :                        comp_idx != MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n);
     775                 :          0 :                 mask = _mm_set_epi64x(0, adj * sizeof(uint16_t) * 8);
     776                 :            :                 mini_mask = _mm_sll_epi64(invalid_mask, mask);
     777                 :            :                 opcode = _mm_cmpeq_epi32(resp_err_check, opcode);
     778                 :            :                 opcode = _mm_packs_epi32(opcode, zero);
     779                 :            :                 opcode = _mm_andnot_si128(mini_mask, opcode);
     780                 :            :                 /* D.4 mark if any error is set */
     781                 :          0 :                 *err |= _mm_cvtsi128_si64(opcode);
     782                 :            :                 /* D.5 fill in mbuf - rearm_data and packet_type. */
     783                 :          0 :                 rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]);
     784         [ #  # ]:          0 :                 if (unlikely(rxq->shared)) {
     785                 :          0 :                         pkts[pos]->port = cq[pos].user_index_low;
     786                 :          0 :                         pkts[pos + p1]->port = cq[pos + p1].user_index_low;
     787                 :          0 :                         pkts[pos + p2]->port = cq[pos + p2].user_index_low;
     788                 :          0 :                         pkts[pos + p3]->port = cq[pos + p3].user_index_low;
     789                 :            :                 }
     790         [ #  # ]:          0 :                 if (unlikely(rxq->hw_timestamp)) {
     791                 :          0 :                         int offset = rxq->timestamp_offset;
     792         [ #  # ]:          0 :                         if (rxq->rt_timestamp) {
     793                 :            :                                 struct mlx5_dev_ctx_shared *sh = rxq->sh;
     794                 :            :                                 uint64_t ts;
     795                 :            : 
     796                 :          0 :                                 ts = rte_be_to_cpu_64(cq[pos].timestamp);
     797                 :          0 :                                 mlx5_timestamp_set(pkts[pos], offset,
     798                 :            :                                         mlx5_txpp_convert_rx_ts(sh, ts));
     799                 :          0 :                                 ts = rte_be_to_cpu_64(cq[pos + p1].timestamp);
     800                 :          0 :                                 mlx5_timestamp_set(pkts[pos + 1], offset,
     801                 :            :                                         mlx5_txpp_convert_rx_ts(sh, ts));
     802                 :          0 :                                 ts = rte_be_to_cpu_64(cq[pos + p2].timestamp);
     803                 :          0 :                                 mlx5_timestamp_set(pkts[pos + 2], offset,
     804                 :            :                                         mlx5_txpp_convert_rx_ts(sh, ts));
     805                 :          0 :                                 ts = rte_be_to_cpu_64(cq[pos + p3].timestamp);
     806                 :          0 :                                 mlx5_timestamp_set(pkts[pos + 3], offset,
     807                 :            :                                         mlx5_txpp_convert_rx_ts(sh, ts));
     808                 :            :                         } else {
     809                 :          0 :                                 mlx5_timestamp_set(pkts[pos], offset,
     810                 :          0 :                                         rte_be_to_cpu_64(cq[pos].timestamp));
     811                 :          0 :                                 mlx5_timestamp_set(pkts[pos + 1], offset,
     812                 :          0 :                                         rte_be_to_cpu_64(cq[pos + p1].timestamp));
     813                 :          0 :                                 mlx5_timestamp_set(pkts[pos + 2], offset,
     814                 :          0 :                                         rte_be_to_cpu_64(cq[pos + p2].timestamp));
     815                 :          0 :                                 mlx5_timestamp_set(pkts[pos + 3], offset,
     816                 :          0 :                                         rte_be_to_cpu_64(cq[pos + p3].timestamp));
     817                 :            :                         }
     818                 :            :                 }
     819         [ #  # ]:          0 :                 if (rxq->dynf_meta) {
     820                 :            :                         /* This code is subject for further optimization. */
     821                 :          0 :                         int32_t offs = rxq->flow_meta_offset;
     822                 :          0 :                         uint32_t mask = rxq->flow_meta_port_mask;
     823                 :            : 
     824                 :          0 :                         *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
     825                 :          0 :                                 rte_be_to_cpu_32
     826                 :          0 :                                 (cq[pos].flow_table_metadata) &     mask;
     827                 :          0 :                         *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
     828                 :          0 :                                 rte_be_to_cpu_32
     829                 :          0 :                                 (cq[pos + p1].flow_table_metadata) & mask;
     830                 :          0 :                         *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
     831                 :          0 :                                 rte_be_to_cpu_32
     832                 :          0 :                                 (cq[pos + p2].flow_table_metadata) & mask;
     833                 :          0 :                         *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
     834                 :          0 :                                 rte_be_to_cpu_32
     835                 :          0 :                                 (cq[pos + p3].flow_table_metadata) & mask;
     836         [ #  # ]:          0 :                         if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *))
     837                 :          0 :                                 pkts[pos]->ol_flags |= rxq->flow_meta_mask;
     838         [ #  # ]:          0 :                         if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *))
     839                 :          0 :                                 pkts[pos + 1]->ol_flags |= rxq->flow_meta_mask;
     840         [ #  # ]:          0 :                         if (*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *))
     841                 :          0 :                                 pkts[pos + 2]->ol_flags |= rxq->flow_meta_mask;
     842         [ #  # ]:          0 :                         if (*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *))
     843                 :          0 :                                 pkts[pos + 3]->ol_flags |= rxq->flow_meta_mask;
     844                 :            :                 }
     845                 :            : #ifdef MLX5_PMD_SOFT_COUNTERS
     846                 :            :                 /* Add up received bytes count. */
     847                 :            :                 byte_cnt = _mm_shuffle_epi8(op_own, len_shuf_mask);
     848                 :            :                 byte_cnt = _mm_andnot_si128(invalid_mask, byte_cnt);
     849                 :            :                 byte_cnt = _mm_hadd_epi16(byte_cnt, zero);
     850                 :          0 :                 rcvd_byte += _mm_cvtsi128_si64(_mm_hadd_epi16(byte_cnt, zero));
     851                 :            : #endif
     852                 :            :                 /*
     853                 :            :                  * Break the loop unless more valid CQE is expected, or if
     854                 :            :                  * there's a compressed CQE.
     855                 :            :                  */
     856         [ #  # ]:          0 :                 if (n != MLX5_VPMD_DESCS_PER_LOOP)
     857                 :            :                         break;
     858                 :            :         }
     859                 :            : #ifdef MLX5_PMD_SOFT_COUNTERS
     860                 :          0 :         rxq->stats.ipackets += nocmp_n;
     861                 :          0 :         rxq->stats.ibytes += rcvd_byte;
     862                 :            : #endif
     863         [ #  # ]:          0 :         if (comp_idx == n)
     864                 :          0 :                 *comp = comp_idx;
     865                 :          0 :         return nocmp_n;
     866                 :            : }
     867                 :            : 
     868                 :            : #endif /* RTE_PMD_MLX5_RXTX_VEC_SSE_H_ */

Generated by: LCOV version 1.14