Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause
2 : : * Copyright 2017 6WIND S.A.
3 : : * Copyright 2017 Mellanox Technologies, Ltd
4 : : */
5 : :
6 : : #ifndef RTE_PMD_MLX5_RXTX_VEC_SSE_H_
7 : : #define RTE_PMD_MLX5_RXTX_VEC_SSE_H_
8 : :
9 : : #include <stdint.h>
10 : : #include <string.h>
11 : : #include <stdlib.h>
12 : : #include <smmintrin.h>
13 : :
14 : : #include <rte_mbuf.h>
15 : : #include <rte_mempool.h>
16 : : #include <rte_prefetch.h>
17 : :
18 : : #include <mlx5_prm.h>
19 : :
20 : : #include "mlx5_defs.h"
21 : : #include "mlx5.h"
22 : : #include "mlx5_utils.h"
23 : : #include "mlx5_rxtx.h"
24 : : #include "mlx5_rxtx_vec.h"
25 : : #include "mlx5_autoconf.h"
26 : :
27 : : #ifndef __INTEL_COMPILER
28 : : #pragma GCC diagnostic ignored "-Wcast-qual"
29 : : #endif
30 : :
31 : : /**
32 : : * Store free buffers to RX SW ring.
33 : : *
34 : : * @param elts
35 : : * Pointer to SW ring to be filled.
36 : : * @param pkts
37 : : * Pointer to array of packets to be stored.
38 : : * @param pkts_n
39 : : * Number of packets to be stored.
40 : : */
41 : : static inline void
42 : : rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n)
43 : : {
44 : : unsigned int pos;
45 : 0 : uint16_t p = n & -2;
46 : :
47 [ # # # # ]: 0 : for (pos = 0; pos < p; pos += 2) {
48 : : __m128i mbp;
49 : :
50 : 0 : mbp = _mm_loadu_si128((__m128i *)&elts[pos]);
51 : 0 : _mm_storeu_si128((__m128i *)&pkts[pos], mbp);
52 : : }
53 [ # # # # ]: 0 : if (n & 1)
54 : 0 : pkts[pos] = elts[pos];
55 : : }
56 : :
57 : : /**
58 : : * Decompress a compressed completion and fill in mbufs in RX SW ring with data
59 : : * extracted from the title completion descriptor.
60 : : *
61 : : * @param rxq
62 : : * Pointer to RX queue structure.
63 : : * @param cq
64 : : * Pointer to completion array having a compressed completion at first.
65 : : * @param elts
66 : : * Pointer to SW ring to be filled. The first mbuf has to be pre-built from
67 : : * the title completion descriptor to be copied to the rest of mbufs.
68 : : *
69 : : * @return
70 : : * Number of mini-CQEs successfully decompressed.
71 : : */
72 : : static inline uint16_t
73 : 0 : rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
74 : : struct rte_mbuf **elts)
75 : : {
76 [ # # ]: 0 : volatile struct mlx5_mini_cqe8 *mcq = (void *)(cq + !rxq->cqe_comp_layout);
77 : : /* Title packet is pre-built. */
78 [ # # ]: 0 : struct rte_mbuf *t_pkt = rxq->cqe_comp_layout ? &rxq->title_pkt : elts[0];
79 : : unsigned int pos;
80 : : unsigned int i;
81 : : unsigned int inv = 0;
82 : : /* Mask to shuffle from extracted mini CQE to mbuf. */
83 : : const __m128i shuf_mask1 =
84 : : _mm_set_epi8(0, 1, 2, 3, /* rss, bswap32 */
85 : : -1, -1, /* skip vlan_tci */
86 : : 6, 7, /* data_len, bswap16 */
87 : : -1, -1, 6, 7, /* pkt_len, bswap16 */
88 : : -1, -1, -1, -1 /* skip packet_type */);
89 : : const __m128i shuf_mask2 =
90 : : _mm_set_epi8(8, 9, 10, 11, /* rss, bswap32 */
91 : : -1, -1, /* skip vlan_tci */
92 : : 14, 15, /* data_len, bswap16 */
93 : : -1, -1, 14, 15, /* pkt_len, bswap16 */
94 : : -1, -1, -1, -1 /* skip packet_type */);
95 : : /* Restore the compressed count. Must be 16 bits. */
96 [ # # ]: 0 : uint16_t mcqe_n = (rxq->cqe_comp_layout) ?
97 : 0 : (MLX5_CQE_NUM_MINIS(cq->op_own) + 1) :
98 : 0 : t_pkt->data_len + (rxq->crc_present * RTE_ETHER_CRC_LEN);
99 : : uint16_t pkts_n = mcqe_n;
100 : : const __m128i rearm =
101 : : _mm_loadu_si128((__m128i *)&t_pkt->rearm_data);
102 : : const __m128i rxdf =
103 : : _mm_loadu_si128((__m128i *)&t_pkt->rx_descriptor_fields1);
104 : : const __m128i crc_adj =
105 : 0 : _mm_set_epi16(0, 0, 0,
106 : : rxq->crc_present * RTE_ETHER_CRC_LEN,
107 : : 0,
108 : 0 : rxq->crc_present * RTE_ETHER_CRC_LEN,
109 : : 0, 0);
110 : : __m128i ol_flags = _mm_setzero_si128();
111 : : __m128i ol_flags_mask = _mm_setzero_si128();
112 : : #ifdef MLX5_PMD_SOFT_COUNTERS
113 : : const __m128i zero = _mm_setzero_si128();
114 : : const __m128i ones = _mm_cmpeq_epi32(zero, zero);
115 : : uint32_t rcvd_byte = 0;
116 : : /* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
117 : : const __m128i len_shuf_mask =
118 : : _mm_set_epi8(-1, -1, -1, -1,
119 : : -1, -1, -1, -1,
120 : : 14, 15, 6, 7,
121 : : 10, 11, 2, 3);
122 : : #endif
123 : : /*
124 : : * A. load mCQEs into a 128bit register.
125 : : * B. store rearm data to mbuf.
126 : : * C. combine data from mCQEs with rx_descriptor_fields1.
127 : : * D. store rx_descriptor_fields1.
128 : : * E. store flow tag (rte_flow mark).
129 : : */
130 : 0 : cycle:
131 [ # # ]: 0 : if (rxq->cqe_comp_layout)
132 : 0 : rte_prefetch0((void *)(cq + mcqe_n));
133 [ # # ]: 0 : for (pos = 0; pos < mcqe_n; ) {
134 : : __m128i mcqe1, mcqe2;
135 : : __m128i rxdf1, rxdf2;
136 : : #ifdef MLX5_PMD_SOFT_COUNTERS
137 : : __m128i byte_cnt, invalid_mask;
138 : : #endif
139 : :
140 [ # # ]: 0 : if (!rxq->cqe_comp_layout)
141 [ # # ]: 0 : for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
142 [ # # ]: 0 : if (likely(pos + i < mcqe_n))
143 : 0 : rte_prefetch0((void *)(cq + pos + i));
144 : : /* A.1 load mCQEs into a 128bit register. */
145 [ # # ]: 0 : mcqe1 = _mm_loadu_si128((__m128i *)&mcq[pos % 8]);
146 : 0 : mcqe2 = _mm_loadu_si128((__m128i *)&mcq[pos % 8 + 2]);
147 : : /* B.1 store rearm data to mbuf. */
148 [ # # ]: 0 : _mm_storeu_si128((__m128i *)&elts[pos]->rearm_data, rearm);
149 [ # # ]: 0 : _mm_storeu_si128((__m128i *)&elts[pos + 1]->rearm_data, rearm);
150 : : /* C.1 combine data from mCQEs with rx_descriptor_fields1. */
151 : : rxdf1 = _mm_shuffle_epi8(mcqe1, shuf_mask1);
152 : : rxdf2 = _mm_shuffle_epi8(mcqe1, shuf_mask2);
153 : : rxdf1 = _mm_sub_epi16(rxdf1, crc_adj);
154 : : rxdf2 = _mm_sub_epi16(rxdf2, crc_adj);
155 : : rxdf1 = _mm_blend_epi16(rxdf1, rxdf, 0x23);
156 : : rxdf2 = _mm_blend_epi16(rxdf2, rxdf, 0x23);
157 : : /* D.1 store rx_descriptor_fields1. */
158 : : _mm_storeu_si128((__m128i *)
159 : 0 : &elts[pos]->rx_descriptor_fields1,
160 : : rxdf1);
161 : : _mm_storeu_si128((__m128i *)
162 : 0 : &elts[pos + 1]->rx_descriptor_fields1,
163 : : rxdf2);
164 : : /* B.1 store rearm data to mbuf. */
165 : 0 : _mm_storeu_si128((__m128i *)&elts[pos + 2]->rearm_data, rearm);
166 : 0 : _mm_storeu_si128((__m128i *)&elts[pos + 3]->rearm_data, rearm);
167 : : /* C.1 combine data from mCQEs with rx_descriptor_fields1. */
168 : : rxdf1 = _mm_shuffle_epi8(mcqe2, shuf_mask1);
169 : : rxdf2 = _mm_shuffle_epi8(mcqe2, shuf_mask2);
170 : : rxdf1 = _mm_sub_epi16(rxdf1, crc_adj);
171 : : rxdf2 = _mm_sub_epi16(rxdf2, crc_adj);
172 : : rxdf1 = _mm_blend_epi16(rxdf1, rxdf, 0x23);
173 : : rxdf2 = _mm_blend_epi16(rxdf2, rxdf, 0x23);
174 : : /* D.1 store rx_descriptor_fields1. */
175 : : _mm_storeu_si128((__m128i *)
176 : 0 : &elts[pos + 2]->rx_descriptor_fields1,
177 : : rxdf1);
178 : : _mm_storeu_si128((__m128i *)
179 : 0 : &elts[pos + 3]->rx_descriptor_fields1,
180 : : rxdf2);
181 : : #ifdef MLX5_PMD_SOFT_COUNTERS
182 : 0 : invalid_mask = _mm_set_epi64x(0,
183 : 0 : (mcqe_n - pos) *
184 [ # # ]: 0 : sizeof(uint16_t) * 8);
185 : : invalid_mask = _mm_sll_epi64(ones, invalid_mask);
186 : : byte_cnt = _mm_blend_epi16(_mm_srli_si128(mcqe1, 4),
187 : : mcqe2, 0xcc);
188 : : byte_cnt = _mm_shuffle_epi8(byte_cnt, len_shuf_mask);
189 : : byte_cnt = _mm_andnot_si128(invalid_mask, byte_cnt);
190 : : byte_cnt = _mm_hadd_epi16(byte_cnt, zero);
191 : 0 : rcvd_byte += _mm_cvtsi128_si64(_mm_hadd_epi16(byte_cnt, zero));
192 : : #endif
193 [ # # ]: 0 : if (rxq->mark) {
194 [ # # ]: 0 : if (rxq->mcqe_format !=
195 : : MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) {
196 : 0 : const uint32_t flow_tag = t_pkt->hash.fdir.hi;
197 : :
198 : : /* E.1 store flow tag (rte_flow mark). */
199 : 0 : elts[pos]->hash.fdir.hi = flow_tag;
200 : 0 : elts[pos + 1]->hash.fdir.hi = flow_tag;
201 : 0 : elts[pos + 2]->hash.fdir.hi = flow_tag;
202 : 0 : elts[pos + 3]->hash.fdir.hi = flow_tag;
203 : : } else {
204 : : const __m128i flow_mark_adj =
205 : : _mm_set_epi32(-1, -1, -1, -1);
206 : : const __m128i flow_mark_shuf =
207 : : _mm_set_epi8(-1, 9, 8, 12,
208 : : -1, 1, 0, 4,
209 : : -1, -1, -1, -1,
210 : : -1, -1, -1, -1);
211 : : const __m128i ft_mask =
212 : : _mm_set1_epi32(0xffffff00);
213 : : const __m128i fdir_flags =
214 : : _mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
215 : : const __m128i fdir_all_flags =
216 : 0 : _mm_set1_epi32(RTE_MBUF_F_RX_FDIR |
217 : 0 : rxq->mark_flag);
218 : : __m128i fdir_id_flags =
219 : 0 : _mm_set1_epi32(rxq->mark_flag);
220 : :
221 : : /* Extract flow_tag field. */
222 : : __m128i ftag0 =
223 : : _mm_shuffle_epi8(mcqe1, flow_mark_shuf);
224 : : __m128i ftag1 =
225 : : _mm_shuffle_epi8(mcqe2, flow_mark_shuf);
226 : : __m128i ftag =
227 : : _mm_unpackhi_epi64(ftag0, ftag1);
228 : : __m128i invalid_mask =
229 : : _mm_cmpeq_epi32(ftag, zero);
230 : :
231 : : ol_flags_mask = _mm_or_si128(ol_flags_mask,
232 : : fdir_all_flags);
233 : : /* Set RTE_MBUF_F_RX_FDIR if flow tag is non-zero. */
234 : : ol_flags = _mm_or_si128(ol_flags,
235 : : _mm_andnot_si128(invalid_mask,
236 : : fdir_flags));
237 : : /* Mask out invalid entries. */
238 : : fdir_id_flags = _mm_andnot_si128(invalid_mask,
239 : : fdir_id_flags);
240 : : /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
241 : : ol_flags = _mm_or_si128(ol_flags,
242 : : _mm_andnot_si128(_mm_cmpeq_epi32(ftag,
243 : : ft_mask),
244 : : fdir_id_flags));
245 : : ftag = _mm_add_epi32(ftag, flow_mark_adj);
246 : 0 : elts[pos]->hash.fdir.hi =
247 : 0 : _mm_extract_epi32(ftag, 0);
248 : 0 : elts[pos + 1]->hash.fdir.hi =
249 : 0 : _mm_extract_epi32(ftag, 1);
250 : 0 : elts[pos + 2]->hash.fdir.hi =
251 : 0 : _mm_extract_epi32(ftag, 2);
252 : 0 : elts[pos + 3]->hash.fdir.hi =
253 : 0 : _mm_extract_epi32(ftag, 3);
254 : : }
255 : : }
256 [ # # ]: 0 : if (unlikely(rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH)) {
257 [ # # ]: 0 : if (rxq->mcqe_format ==
258 : : MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
259 : : const uint8_t pkt_info =
260 [ # # ]: 0 : (cq->pkt_info & 0x3) << 6;
261 : : const uint8_t pkt_hdr0 =
262 : : _mm_extract_epi8(mcqe1, 0);
263 : : const uint8_t pkt_hdr1 =
264 : : _mm_extract_epi8(mcqe1, 8);
265 : : const uint8_t pkt_hdr2 =
266 : : _mm_extract_epi8(mcqe2, 0);
267 : : const uint8_t pkt_hdr3 =
268 : : _mm_extract_epi8(mcqe2, 8);
269 : : const __m128i vlan_mask =
270 : : _mm_set1_epi32(RTE_MBUF_F_RX_VLAN |
271 : : RTE_MBUF_F_RX_VLAN_STRIPPED);
272 : : const __m128i cv_mask =
273 : : _mm_set1_epi32(MLX5_CQE_VLAN_STRIPPED);
274 : : const __m128i pkt_cv =
275 [ # # ]: 0 : _mm_set_epi32(pkt_hdr0 & 0x1,
276 : : pkt_hdr1 & 0x1,
277 : : pkt_hdr2 & 0x1,
278 : : pkt_hdr3 & 0x1);
279 : :
280 : : ol_flags_mask = _mm_or_si128(ol_flags_mask,
281 : : vlan_mask);
282 : : ol_flags = _mm_or_si128(ol_flags,
283 : : _mm_and_si128(_mm_cmpeq_epi32(pkt_cv,
284 : : cv_mask), vlan_mask));
285 : 0 : elts[pos]->packet_type =
286 : 0 : mlx5_ptype_table[(pkt_hdr0 >> 2) |
287 : : pkt_info];
288 : 0 : elts[pos + 1]->packet_type =
289 : 0 : mlx5_ptype_table[(pkt_hdr1 >> 2) |
290 : : pkt_info];
291 : 0 : elts[pos + 2]->packet_type =
292 : 0 : mlx5_ptype_table[(pkt_hdr2 >> 2) |
293 : : pkt_info];
294 : 0 : elts[pos + 3]->packet_type =
295 : 0 : mlx5_ptype_table[(pkt_hdr3 >> 2) |
296 : : pkt_info];
297 [ # # ]: 0 : if (rxq->tunnel) {
298 : 0 : elts[pos]->packet_type |=
299 : 0 : !!(((pkt_hdr0 >> 2) |
300 : : pkt_info) & (1 << 6));
301 : 0 : elts[pos + 1]->packet_type |=
302 : 0 : !!(((pkt_hdr1 >> 2) |
303 : : pkt_info) & (1 << 6));
304 : 0 : elts[pos + 2]->packet_type |=
305 : 0 : !!(((pkt_hdr2 >> 2) |
306 : : pkt_info) & (1 << 6));
307 : 0 : elts[pos + 3]->packet_type |=
308 : 0 : !!(((pkt_hdr3 >> 2) |
309 : : pkt_info) & (1 << 6));
310 : : }
311 : : }
312 : : const __m128i hash_flags =
313 : : _mm_set1_epi32(RTE_MBUF_F_RX_RSS_HASH);
314 : : const __m128i rearm_flags =
315 : 0 : _mm_set1_epi32((uint32_t)t_pkt->ol_flags);
316 : :
317 : : ol_flags_mask = _mm_or_si128(ol_flags_mask, hash_flags);
318 : : ol_flags = _mm_or_si128(ol_flags,
319 : : _mm_andnot_si128(ol_flags_mask, rearm_flags));
320 : 0 : elts[pos]->ol_flags =
321 : 0 : _mm_extract_epi32(ol_flags, 0);
322 : 0 : elts[pos + 1]->ol_flags =
323 : 0 : _mm_extract_epi32(ol_flags, 1);
324 : 0 : elts[pos + 2]->ol_flags =
325 : 0 : _mm_extract_epi32(ol_flags, 2);
326 : 0 : elts[pos + 3]->ol_flags =
327 : 0 : _mm_extract_epi32(ol_flags, 3);
328 : 0 : elts[pos]->hash.rss = 0;
329 : 0 : elts[pos + 1]->hash.rss = 0;
330 : 0 : elts[pos + 2]->hash.rss = 0;
331 : 0 : elts[pos + 3]->hash.rss = 0;
332 : : }
333 [ # # ]: 0 : if (rxq->dynf_meta) {
334 : 0 : int32_t offs = rxq->flow_meta_offset;
335 : 0 : const uint32_t meta =
336 : 0 : *RTE_MBUF_DYNFIELD(t_pkt, offs, uint32_t *);
337 : :
338 : : /* Check if title packet has valid metadata. */
339 [ # # ]: 0 : if (meta) {
340 : : MLX5_ASSERT(t_pkt->ol_flags &
341 : : rxq->flow_meta_mask);
342 : 0 : *RTE_MBUF_DYNFIELD(elts[pos], offs,
343 : 0 : uint32_t *) = meta;
344 : 0 : *RTE_MBUF_DYNFIELD(elts[pos + 1], offs,
345 : 0 : uint32_t *) = meta;
346 : 0 : *RTE_MBUF_DYNFIELD(elts[pos + 2], offs,
347 : 0 : uint32_t *) = meta;
348 : 0 : *RTE_MBUF_DYNFIELD(elts[pos + 3], offs,
349 : 0 : uint32_t *) = meta;
350 : : }
351 : : }
352 : 0 : pos += MLX5_VPMD_DESCS_PER_LOOP;
353 : : /* Move to next CQE and invalidate consumed CQEs. */
354 [ # # ]: 0 : if (!rxq->cqe_comp_layout) {
355 [ # # # # ]: 0 : if (!(pos & 0x7) && pos < mcqe_n) {
356 [ # # ]: 0 : if (pos + 8 < mcqe_n)
357 : 0 : rte_prefetch0((void *)(cq + pos + 8));
358 : 0 : mcq = (void *)(cq + pos);
359 [ # # ]: 0 : for (i = 0; i < 8; ++i)
360 : 0 : cq[inv++].op_own = MLX5_CQE_INVALIDATE;
361 : : }
362 : : }
363 : : }
364 [ # # ]: 0 : if (rxq->cqe_comp_layout) {
365 : : int ret;
366 : : /* Keep unzipping if the next CQE is the miniCQE array. */
367 : 0 : cq = &cq[mcqe_n];
368 [ # # ]: 0 : ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci + pkts_n);
369 : 0 : if (ret == MLX5_CQE_STATUS_SW_OWN &&
370 [ # # ]: 0 : MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED) {
371 : : pos = 0;
372 : 0 : elts = &elts[mcqe_n];
373 : : mcq = (void *)cq;
374 : 0 : mcqe_n = MLX5_CQE_NUM_MINIS(cq->op_own) + 1;
375 : 0 : pkts_n += mcqe_n;
376 : 0 : goto cycle;
377 : : }
378 : : } else {
379 : : /* Invalidate the rest of CQEs. */
380 [ # # ]: 0 : for (; inv < pkts_n; ++inv)
381 : 0 : cq[inv].op_own = MLX5_CQE_INVALIDATE;
382 : : }
383 : : #ifdef MLX5_PMD_SOFT_COUNTERS
384 : 0 : rxq->stats.ipackets += pkts_n;
385 : 0 : rxq->stats.ibytes += rcvd_byte;
386 : : #endif
387 : 0 : return pkts_n;
388 : : }
389 : :
390 : : /**
391 : : * Calculate packet type and offload flag for mbuf and store it.
392 : : *
393 : : * @param rxq
394 : : * Pointer to RX queue structure.
395 : : * @param cqes[4]
396 : : * Array of four 16bytes completions extracted from the original completion
397 : : * descriptor.
398 : : * @param op_err
399 : : * Opcode vector having responder error status. Each field is 4B.
400 : : * @param pkts
401 : : * Pointer to array of packets to be filled.
402 : : */
403 : : static inline void
404 : 0 : rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
405 : : __m128i op_err, struct rte_mbuf **pkts)
406 : : {
407 : : __m128i pinfo0, pinfo1;
408 : : __m128i pinfo, ptype;
409 : 0 : __m128i ol_flags = _mm_set1_epi32(rxq->rss_hash * RTE_MBUF_F_RX_RSS_HASH |
410 [ # # ]: 0 : rxq->hw_timestamp * rxq->timestamp_rx_flag);
411 : : __m128i cv_flags;
412 : : const __m128i zero = _mm_setzero_si128();
413 : : const __m128i ptype_mask = _mm_set1_epi32(0xfd06);
414 : : const __m128i ptype_ol_mask = _mm_set1_epi32(0x106);
415 : : const __m128i pinfo_mask = _mm_set1_epi32(0x3);
416 : : const __m128i cv_flag_sel =
417 : : _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0,
418 : : (uint8_t)((RTE_MBUF_F_RX_IP_CKSUM_GOOD |
419 : : RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1),
420 : : 0,
421 : : (uint8_t)(RTE_MBUF_F_RX_L4_CKSUM_GOOD >> 1),
422 : : 0,
423 : : (uint8_t)(RTE_MBUF_F_RX_IP_CKSUM_GOOD >> 1),
424 : : (uint8_t)(RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED),
425 : : 0);
426 : : const __m128i cv_mask =
427 : : _mm_set1_epi32(RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD |
428 : : RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED);
429 : : const __m128i mbuf_init =
430 : : _mm_load_si128((__m128i *)&rxq->mbuf_initializer);
431 : : __m128i rearm0, rearm1, rearm2, rearm3;
432 : : uint8_t pt_idx0, pt_idx1, pt_idx2, pt_idx3;
433 : :
434 : : /* Extract pkt_info field. */
435 [ # # ]: 0 : pinfo0 = _mm_unpacklo_epi32(cqes[0], cqes[1]);
436 [ # # ]: 0 : pinfo1 = _mm_unpacklo_epi32(cqes[2], cqes[3]);
437 : : pinfo = _mm_unpacklo_epi64(pinfo0, pinfo1);
438 : : /* Extract hdr_type_etc field. */
439 : : pinfo0 = _mm_unpackhi_epi32(cqes[0], cqes[1]);
440 : : pinfo1 = _mm_unpackhi_epi32(cqes[2], cqes[3]);
441 : : ptype = _mm_unpacklo_epi64(pinfo0, pinfo1);
442 [ # # ]: 0 : if (rxq->mark) {
443 : : const __m128i pinfo_ft_mask = _mm_set1_epi32(0xffffff00);
444 : : const __m128i fdir_flags = _mm_set1_epi32(RTE_MBUF_F_RX_FDIR);
445 : 0 : __m128i fdir_id_flags = _mm_set1_epi32(rxq->mark_flag);
446 : : __m128i flow_tag, invalid_mask;
447 : :
448 : : flow_tag = _mm_and_si128(pinfo, pinfo_ft_mask);
449 : : /* Check if flow tag is non-zero then set RTE_MBUF_F_RX_FDIR. */
450 : : invalid_mask = _mm_cmpeq_epi32(flow_tag, zero);
451 : : ol_flags = _mm_or_si128(ol_flags,
452 : : _mm_andnot_si128(invalid_mask,
453 : : fdir_flags));
454 : : /* Mask out invalid entries. */
455 : : fdir_id_flags = _mm_andnot_si128(invalid_mask, fdir_id_flags);
456 : : /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
457 : : ol_flags = _mm_or_si128(ol_flags,
458 : : _mm_andnot_si128(
459 : : _mm_cmpeq_epi32(flow_tag,
460 : : pinfo_ft_mask),
461 : : fdir_id_flags));
462 : : }
463 : : /*
464 : : * Merge the two fields to generate the following:
465 : : * bit[1] = l3_ok
466 : : * bit[2] = l4_ok
467 : : * bit[8] = cv
468 : : * bit[11:10] = l3_hdr_type
469 : : * bit[14:12] = l4_hdr_type
470 : : * bit[15] = ip_frag
471 : : * bit[16] = tunneled
472 : : * bit[17] = outer_l3_type
473 : : */
474 : : ptype = _mm_and_si128(ptype, ptype_mask);
475 : : pinfo = _mm_and_si128(pinfo, pinfo_mask);
476 : : pinfo = _mm_slli_epi32(pinfo, 16);
477 : : /* Make pinfo has merged fields for ol_flags calculation. */
478 : : pinfo = _mm_or_si128(ptype, pinfo);
479 : : ptype = _mm_srli_epi32(pinfo, 10);
480 : : ptype = _mm_packs_epi32(ptype, zero);
481 : : /* Errored packets will have RTE_PTYPE_ALL_MASK. */
482 : : op_err = _mm_srli_epi16(op_err, 8);
483 : : ptype = _mm_or_si128(ptype, op_err);
484 : : pt_idx0 = _mm_extract_epi8(ptype, 0);
485 : : pt_idx1 = _mm_extract_epi8(ptype, 2);
486 : : pt_idx2 = _mm_extract_epi8(ptype, 4);
487 : : pt_idx3 = _mm_extract_epi8(ptype, 6);
488 : 0 : pkts[0]->packet_type = mlx5_ptype_table[pt_idx0] |
489 : 0 : !!(pt_idx0 & (1 << 6)) * rxq->tunnel;
490 : 0 : pkts[1]->packet_type = mlx5_ptype_table[pt_idx1] |
491 : 0 : !!(pt_idx1 & (1 << 6)) * rxq->tunnel;
492 : 0 : pkts[2]->packet_type = mlx5_ptype_table[pt_idx2] |
493 : 0 : !!(pt_idx2 & (1 << 6)) * rxq->tunnel;
494 : 0 : pkts[3]->packet_type = mlx5_ptype_table[pt_idx3] |
495 : 0 : !!(pt_idx3 & (1 << 6)) * rxq->tunnel;
496 : : /* Fill flags for checksum and VLAN. */
497 : : pinfo = _mm_and_si128(pinfo, ptype_ol_mask);
498 : : pinfo = _mm_shuffle_epi8(cv_flag_sel, pinfo);
499 : : /* Locate checksum flags at byte[2:1] and merge with VLAN flags. */
500 : : cv_flags = _mm_slli_epi32(pinfo, 9);
501 : : cv_flags = _mm_or_si128(pinfo, cv_flags);
502 : : /* Move back flags to start from byte[0]. */
503 : : cv_flags = _mm_srli_epi32(cv_flags, 8);
504 : : /* Mask out garbage bits. */
505 : : cv_flags = _mm_and_si128(cv_flags, cv_mask);
506 : : /* Merge to ol_flags. */
507 : : ol_flags = _mm_or_si128(ol_flags, cv_flags);
508 : : /* Merge mbuf_init and ol_flags. */
509 : : rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(ol_flags, 8), 0x30);
510 : : rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(ol_flags, 4), 0x30);
511 : : rearm2 = _mm_blend_epi16(mbuf_init, ol_flags, 0x30);
512 : : rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(ol_flags, 4), 0x30);
513 : : /* Write 8B rearm_data and 8B ol_flags. */
514 : : _mm_store_si128((__m128i *)&pkts[0]->rearm_data, rearm0);
515 : 0 : _mm_store_si128((__m128i *)&pkts[1]->rearm_data, rearm1);
516 : 0 : _mm_store_si128((__m128i *)&pkts[2]->rearm_data, rearm2);
517 : 0 : _mm_store_si128((__m128i *)&pkts[3]->rearm_data, rearm3);
518 : 0 : }
519 : :
520 : : /**
521 : : * Process a non-compressed completion and fill in mbufs in RX SW ring
522 : : * with data extracted from the title completion descriptor.
523 : : *
524 : : * @param rxq
525 : : * Pointer to RX queue structure.
526 : : * @param cq
527 : : * Pointer to completion array having a non-compressed completion at first.
528 : : * @param elts
529 : : * Pointer to SW ring to be filled. The first mbuf has to be pre-built from
530 : : * the title completion descriptor to be copied to the rest of mbufs.
531 : : * @param[out] pkts
532 : : * Array to store received packets.
533 : : * @param pkts_n
534 : : * Maximum number of packets in array.
535 : : * @param[out] err
536 : : * Pointer to a flag. Set non-zero value if pkts array has at least one error
537 : : * packet to handle.
538 : : * @param[out] comp
539 : : * Pointer to a index. Set it to the first compressed completion if any.
540 : : *
541 : : * @return
542 : : * Number of CQEs successfully processed.
543 : : */
544 : : static inline uint16_t
545 : 0 : rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
546 : : struct rte_mbuf **elts, struct rte_mbuf **pkts,
547 : : uint16_t pkts_n, uint64_t *err, uint64_t *comp)
548 : : {
549 : 0 : const uint16_t q_n = 1 << rxq->cqe_n;
550 : 0 : const uint16_t q_mask = q_n - 1;
551 : : unsigned int pos, adj;
552 : : uint64_t n = 0;
553 : : uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
554 : : uint16_t nocmp_n = 0;
555 : 0 : const uint8_t vic = rxq->cq_ci >> rxq->cqe_n;
556 : 0 : const uint8_t own = !(rxq->cq_ci & (q_mask + 1));
557 : : const __m128i vic_check = _mm_set1_epi64x(0x00ff000000ff0000LL);
558 : : const __m128i owner_check = _mm_set1_epi64x(0x0100000001000000LL);
559 : : const __m128i opcode_check = _mm_set1_epi64x(0xf0000000f0000000LL);
560 : : const __m128i format_check = _mm_set1_epi64x(0x0c0000000c000000LL);
561 : : const __m128i resp_err_check = _mm_set1_epi64x(0xe0000000e0000000LL);
562 : : #ifdef MLX5_PMD_SOFT_COUNTERS
563 : : uint32_t rcvd_byte = 0;
564 : : /* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
565 : : const __m128i len_shuf_mask =
566 : : _mm_set_epi8(-1, -1, -1, -1,
567 : : -1, -1, -1, -1,
568 : : 12, 13, 8, 9,
569 : : 4, 5, 0, 1);
570 : : #endif
571 : : const __m128i validity =
572 : 0 : _mm_set_epi8(0, vic, 0, 0,
573 : : 0, vic, 0, 0,
574 : : 0, vic, 0, 0,
575 : : 0, vic, 0, 0);
576 : : const __m128i ownership =
577 : 0 : _mm_set_epi8(own, 0, 0, 0,
578 : : own, 0, 0, 0,
579 : : own, 0, 0, 0,
580 : : own, 0, 0, 0);
581 : : /* Mask to shuffle from extracted CQE to mbuf. */
582 : : const __m128i shuf_mask =
583 : : _mm_set_epi8(-1, 3, 2, 1, /* fdir.hi */
584 : : 12, 13, 14, 15, /* rss, bswap32 */
585 : : 10, 11, /* vlan_tci, bswap16 */
586 : : 4, 5, /* data_len, bswap16 */
587 : : -1, -1, /* zero out 2nd half of pkt_len */
588 : : 4, 5 /* pkt_len, bswap16 */);
589 : : /* Mask to blend from the last Qword to the first DQword. */
590 : : const __m128i blend_mask =
591 : : _mm_set_epi8(-1, -1, -1, -1,
592 : : -1, -1, -1, -1,
593 : : 0, 0, 0, 0,
594 : : 0, 0, 0, -1);
595 : : const __m128i zero = _mm_setzero_si128();
596 : : const __m128i ones = _mm_cmpeq_epi32(zero, zero);
597 : : const __m128i crc_adj =
598 : 0 : _mm_set_epi16(0, 0, 0, 0, 0,
599 : : rxq->crc_present * RTE_ETHER_CRC_LEN,
600 : : 0,
601 : 0 : rxq->crc_present * RTE_ETHER_CRC_LEN);
602 : 0 : const __m128i flow_mark_adj = _mm_set_epi32(rxq->mark * (-1), 0, 0, 0);
603 : : /*
604 : : * A. load first Qword (8bytes) in one loop.
605 : : * B. copy 4 mbuf pointers from elts ring to returning pkts.
606 : : * C. load remained CQE data and extract necessary fields.
607 : : * Final 16bytes cqes[] extracted from original 64bytes CQE has the
608 : : * following structure:
609 : : * struct {
610 : : * uint8_t pkt_info;
611 : : * uint8_t flow_tag[3];
612 : : * uint16_t byte_cnt;
613 : : * uint8_t validity_iteration_count;
614 : : * uint8_t op_own;
615 : : * uint16_t hdr_type_etc;
616 : : * uint16_t vlan_info;
617 : : * uint32_t rx_has_res;
618 : : * } c;
619 : : * D. fill in mbuf.
620 : : * E. get valid CQEs.
621 : : * F. find compressed CQE.
622 : : */
623 : 0 : for (pos = 0;
624 [ # # ]: 0 : pos < pkts_n;
625 : 0 : pos += MLX5_VPMD_DESCS_PER_LOOP) {
626 : : __m128i cqes[MLX5_VPMD_DESCS_PER_LOOP];
627 : : __m128i cqe_tmp1, cqe_tmp2;
628 : : __m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
629 : : __m128i op_own, op_own_tmp1, op_own_tmp2;
630 : : __m128i opcode, owner_mask, invalid_mask;
631 : : __m128i comp_mask, mini_mask;
632 : : __m128i mask;
633 : : #ifdef MLX5_PMD_SOFT_COUNTERS
634 : : __m128i byte_cnt;
635 : : #endif
636 : : __m128i mbp1, mbp2;
637 : : __m128i p = _mm_set_epi16(0, 0, 0, 0, 3, 2, 1, 0);
638 : : unsigned int p1, p2, p3;
639 : :
640 : : /* Prefetch next 4 CQEs. */
641 [ # # ]: 0 : if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
642 : 0 : rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP]);
643 : 0 : rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 1]);
644 : 0 : rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 2]);
645 : 0 : rte_prefetch0(&cq[pos + MLX5_VPMD_DESCS_PER_LOOP + 3]);
646 : : }
647 : : /* A.0 do not cross the end of CQ. */
648 : 0 : mask = _mm_set_epi64x(0, (pkts_n - pos) * sizeof(uint16_t) * 8);
649 : : mask = _mm_sll_epi64(ones, mask);
650 : : p = _mm_andnot_si128(mask, p);
651 : : /* A.1 load cqes. */
652 : 0 : p3 = _mm_extract_epi16(p, 3);
653 : 0 : cqes[3] = _mm_loadl_epi64((__m128i *)
654 : 0 : &cq[pos + p3].sop_drop_qpn);
655 : 0 : rte_compiler_barrier();
656 : 0 : p2 = _mm_extract_epi16(p, 2);
657 : 0 : cqes[2] = _mm_loadl_epi64((__m128i *)
658 : 0 : &cq[pos + p2].sop_drop_qpn);
659 : 0 : rte_compiler_barrier();
660 : : /* B.1 load mbuf pointers. */
661 : 0 : mbp1 = _mm_loadu_si128((__m128i *)&elts[pos]);
662 : 0 : mbp2 = _mm_loadu_si128((__m128i *)&elts[pos + 2]);
663 : : /* A.1 load a block having op_own. */
664 : 0 : p1 = _mm_extract_epi16(p, 1);
665 : 0 : cqes[1] = _mm_loadl_epi64((__m128i *)
666 : 0 : &cq[pos + p1].sop_drop_qpn);
667 : 0 : rte_compiler_barrier();
668 : 0 : cqes[0] = _mm_loadl_epi64((__m128i *)
669 : 0 : &cq[pos].sop_drop_qpn);
670 : : /* B.2 copy mbuf pointers. */
671 : 0 : _mm_storeu_si128((__m128i *)&pkts[pos], mbp1);
672 : 0 : _mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2);
673 : 0 : rte_io_rmb();
674 : : /* C.1 load remained CQE data and extract necessary fields. */
675 : : cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]);
676 : : cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]);
677 [ # # ]: 0 : cqes[3] = _mm_blendv_epi8(cqes[3], cqe_tmp2, blend_mask);
678 [ # # ]: 0 : cqes[2] = _mm_blendv_epi8(cqes[2], cqe_tmp1, blend_mask);
679 : : cqe_tmp2 = _mm_loadu_si128((__m128i *)&cq[pos + p3].csum);
680 : : cqe_tmp1 = _mm_loadu_si128((__m128i *)&cq[pos + p2].csum);
681 : 0 : cqes[3] = _mm_blend_epi16(cqes[3], cqe_tmp2, 0x30);
682 [ # # ]: 0 : cqes[2] = _mm_blend_epi16(cqes[2], cqe_tmp1, 0x30);
683 : : cqe_tmp2 = _mm_loadl_epi64((__m128i *)&cq[pos + p3].rsvd4[2]);
684 : : cqe_tmp1 = _mm_loadl_epi64((__m128i *)&cq[pos + p2].rsvd4[2]);
685 : 0 : cqes[3] = _mm_blend_epi16(cqes[3], cqe_tmp2, 0x04);
686 [ # # ]: 0 : cqes[2] = _mm_blend_epi16(cqes[2], cqe_tmp1, 0x04);
687 : : /* C.2 generate final structure for mbuf with swapping bytes. */
688 : : pkt_mb3 = _mm_shuffle_epi8(cqes[3], shuf_mask);
689 : : pkt_mb2 = _mm_shuffle_epi8(cqes[2], shuf_mask);
690 : : /* C.3 adjust CRC length. */
691 : : pkt_mb3 = _mm_sub_epi16(pkt_mb3, crc_adj);
692 : : pkt_mb2 = _mm_sub_epi16(pkt_mb2, crc_adj);
693 : : /* C.4 adjust flow mark. */
694 : : pkt_mb3 = _mm_add_epi32(pkt_mb3, flow_mark_adj);
695 : : pkt_mb2 = _mm_add_epi32(pkt_mb2, flow_mark_adj);
696 : : /* D.1 fill in mbuf - rx_descriptor_fields1. */
697 [ # # ]: 0 : _mm_storeu_si128((void *)&pkts[pos + 3]->pkt_len, pkt_mb3);
698 : 0 : _mm_storeu_si128((void *)&pkts[pos + 2]->pkt_len, pkt_mb2);
699 : : /* E.1 extract op_own field. */
700 [ # # ]: 0 : op_own_tmp2 = _mm_unpacklo_epi32(cqes[2], cqes[3]);
701 : : /* C.1 load remained CQE data and extract necessary fields. */
702 : : cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p1]);
703 : : cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos]);
704 : 0 : cqes[1] = _mm_blendv_epi8(cqes[1], cqe_tmp2, blend_mask);
705 : 0 : cqes[0] = _mm_blendv_epi8(cqes[0], cqe_tmp1, blend_mask);
706 : : cqe_tmp2 = _mm_loadu_si128((__m128i *)&cq[pos + p1].csum);
707 : : cqe_tmp1 = _mm_loadu_si128((__m128i *)&cq[pos].csum);
708 : : cqes[1] = _mm_blend_epi16(cqes[1], cqe_tmp2, 0x30);
709 : : cqes[0] = _mm_blend_epi16(cqes[0], cqe_tmp1, 0x30);
710 : : cqe_tmp2 = _mm_loadl_epi64((__m128i *)&cq[pos + p1].rsvd4[2]);
711 : : cqe_tmp1 = _mm_loadl_epi64((__m128i *)&cq[pos].rsvd4[2]);
712 : 0 : cqes[1] = _mm_blend_epi16(cqes[1], cqe_tmp2, 0x04);
713 [ # # ]: 0 : cqes[0] = _mm_blend_epi16(cqes[0], cqe_tmp1, 0x04);
714 : : /* C.2 generate final structure for mbuf with swapping bytes. */
715 : : pkt_mb1 = _mm_shuffle_epi8(cqes[1], shuf_mask);
716 : : pkt_mb0 = _mm_shuffle_epi8(cqes[0], shuf_mask);
717 : : /* C.3 adjust CRC length. */
718 : : pkt_mb1 = _mm_sub_epi16(pkt_mb1, crc_adj);
719 : : pkt_mb0 = _mm_sub_epi16(pkt_mb0, crc_adj);
720 : : /* C.4 adjust flow mark. */
721 : : pkt_mb1 = _mm_add_epi32(pkt_mb1, flow_mark_adj);
722 : : pkt_mb0 = _mm_add_epi32(pkt_mb0, flow_mark_adj);
723 : : /* E.1 extract op_own byte. */
724 : : op_own_tmp1 = _mm_unpacklo_epi32(cqes[0], cqes[1]);
725 : : op_own = _mm_unpackhi_epi64(op_own_tmp1, op_own_tmp2);
726 : : /* D.1 fill in mbuf - rx_descriptor_fields1. */
727 : 0 : _mm_storeu_si128((void *)&pkts[pos + 1]->pkt_len, pkt_mb1);
728 : 0 : _mm_storeu_si128((void *)&pkts[pos]->pkt_len, pkt_mb0);
729 : : /* E.2 mask out CQEs belonging to HW. */
730 [ # # ]: 0 : if (rxq->cqe_comp_layout) {
731 : : owner_mask = _mm_and_si128(op_own, vic_check);
732 : : owner_mask = _mm_cmpeq_epi32(owner_mask, validity);
733 : : owner_mask = _mm_xor_si128(owner_mask, ones);
734 : : } else {
735 : : owner_mask = _mm_and_si128(op_own, owner_check);
736 : : owner_mask = _mm_cmpeq_epi32(owner_mask, ownership);
737 : : }
738 : : owner_mask = _mm_packs_epi32(owner_mask, zero);
739 : : /* E.3 get mask for invalidated CQEs. */
740 : : opcode = _mm_and_si128(op_own, opcode_check);
741 : : invalid_mask = _mm_cmpeq_epi32(opcode_check, opcode);
742 : : invalid_mask = _mm_packs_epi32(invalid_mask, zero);
743 : : /* E.4 mask out beyond boundary. */
744 : : invalid_mask = _mm_or_si128(invalid_mask, mask);
745 : : /* E.5 merge invalid_mask with invalid owner. */
746 : : invalid_mask = _mm_or_si128(invalid_mask, owner_mask);
747 : : /* F.1 find compressed CQE format. */
748 : : comp_mask = _mm_and_si128(op_own, format_check);
749 : : comp_mask = _mm_cmpeq_epi32(comp_mask, format_check);
750 : : comp_mask = _mm_packs_epi32(comp_mask, zero);
751 : : /* F.2 mask out invalid entries. */
752 : : comp_mask = _mm_andnot_si128(invalid_mask, comp_mask);
753 : 0 : comp_idx = _mm_cvtsi128_si64(comp_mask);
754 : : /* F.3 get the first compressed CQE. */
755 : : comp_idx = comp_idx ?
756 : 0 : rte_ctz64(comp_idx) /
757 [ # # ]: 0 : (sizeof(uint16_t) * 8) :
758 : : MLX5_VPMD_DESCS_PER_LOOP;
759 : : /* E.6 mask out entries after the compressed CQE. */
760 [ # # ]: 0 : mask = _mm_set_epi64x(0, comp_idx * sizeof(uint16_t) * 8);
761 : : mask = _mm_sll_epi64(ones, mask);
762 : : invalid_mask = _mm_or_si128(invalid_mask, mask);
763 : : /* E.7 count non-compressed valid CQEs. */
764 : 0 : n = _mm_cvtsi128_si64(invalid_mask);
765 [ # # ]: 0 : n = n ? rte_ctz64(n) / (sizeof(uint16_t) * 8) :
766 : : MLX5_VPMD_DESCS_PER_LOOP;
767 : 0 : nocmp_n += n;
768 : : /* D.2 get the final invalid mask. */
769 [ # # ]: 0 : mask = _mm_set_epi64x(0, n * sizeof(uint16_t) * 8);
770 : : mask = _mm_sll_epi64(ones, mask);
771 : : invalid_mask = _mm_or_si128(invalid_mask, mask);
772 : : /* D.3 check error in opcode. */
773 : : adj = (!rxq->cqe_comp_layout &&
774 [ # # # # ]: 0 : comp_idx != MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n);
775 : 0 : mask = _mm_set_epi64x(0, adj * sizeof(uint16_t) * 8);
776 : : mini_mask = _mm_sll_epi64(invalid_mask, mask);
777 : : opcode = _mm_cmpeq_epi32(resp_err_check, opcode);
778 : : opcode = _mm_packs_epi32(opcode, zero);
779 : : opcode = _mm_andnot_si128(mini_mask, opcode);
780 : : /* D.4 mark if any error is set */
781 : 0 : *err |= _mm_cvtsi128_si64(opcode);
782 : : /* D.5 fill in mbuf - rearm_data and packet_type. */
783 : 0 : rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]);
784 [ # # ]: 0 : if (unlikely(rxq->shared)) {
785 : 0 : pkts[pos]->port = cq[pos].user_index_low;
786 : 0 : pkts[pos + p1]->port = cq[pos + p1].user_index_low;
787 : 0 : pkts[pos + p2]->port = cq[pos + p2].user_index_low;
788 : 0 : pkts[pos + p3]->port = cq[pos + p3].user_index_low;
789 : : }
790 [ # # ]: 0 : if (unlikely(rxq->hw_timestamp)) {
791 : 0 : int offset = rxq->timestamp_offset;
792 [ # # ]: 0 : if (rxq->rt_timestamp) {
793 : : struct mlx5_dev_ctx_shared *sh = rxq->sh;
794 : : uint64_t ts;
795 : :
796 : 0 : ts = rte_be_to_cpu_64(cq[pos].timestamp);
797 : 0 : mlx5_timestamp_set(pkts[pos], offset,
798 : : mlx5_txpp_convert_rx_ts(sh, ts));
799 : 0 : ts = rte_be_to_cpu_64(cq[pos + p1].timestamp);
800 : 0 : mlx5_timestamp_set(pkts[pos + 1], offset,
801 : : mlx5_txpp_convert_rx_ts(sh, ts));
802 : 0 : ts = rte_be_to_cpu_64(cq[pos + p2].timestamp);
803 : 0 : mlx5_timestamp_set(pkts[pos + 2], offset,
804 : : mlx5_txpp_convert_rx_ts(sh, ts));
805 : 0 : ts = rte_be_to_cpu_64(cq[pos + p3].timestamp);
806 : 0 : mlx5_timestamp_set(pkts[pos + 3], offset,
807 : : mlx5_txpp_convert_rx_ts(sh, ts));
808 : : } else {
809 : 0 : mlx5_timestamp_set(pkts[pos], offset,
810 : 0 : rte_be_to_cpu_64(cq[pos].timestamp));
811 : 0 : mlx5_timestamp_set(pkts[pos + 1], offset,
812 : 0 : rte_be_to_cpu_64(cq[pos + p1].timestamp));
813 : 0 : mlx5_timestamp_set(pkts[pos + 2], offset,
814 : 0 : rte_be_to_cpu_64(cq[pos + p2].timestamp));
815 : 0 : mlx5_timestamp_set(pkts[pos + 3], offset,
816 : 0 : rte_be_to_cpu_64(cq[pos + p3].timestamp));
817 : : }
818 : : }
819 [ # # ]: 0 : if (rxq->dynf_meta) {
820 : : /* This code is subject for further optimization. */
821 : 0 : int32_t offs = rxq->flow_meta_offset;
822 : 0 : uint32_t mask = rxq->flow_meta_port_mask;
823 : :
824 : 0 : *RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *) =
825 : 0 : rte_be_to_cpu_32
826 : 0 : (cq[pos].flow_table_metadata) & mask;
827 : 0 : *RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *) =
828 : 0 : rte_be_to_cpu_32
829 : 0 : (cq[pos + p1].flow_table_metadata) & mask;
830 : 0 : *RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *) =
831 : 0 : rte_be_to_cpu_32
832 : 0 : (cq[pos + p2].flow_table_metadata) & mask;
833 : 0 : *RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *) =
834 : 0 : rte_be_to_cpu_32
835 : 0 : (cq[pos + p3].flow_table_metadata) & mask;
836 [ # # ]: 0 : if (*RTE_MBUF_DYNFIELD(pkts[pos], offs, uint32_t *))
837 : 0 : pkts[pos]->ol_flags |= rxq->flow_meta_mask;
838 [ # # ]: 0 : if (*RTE_MBUF_DYNFIELD(pkts[pos + 1], offs, uint32_t *))
839 : 0 : pkts[pos + 1]->ol_flags |= rxq->flow_meta_mask;
840 [ # # ]: 0 : if (*RTE_MBUF_DYNFIELD(pkts[pos + 2], offs, uint32_t *))
841 : 0 : pkts[pos + 2]->ol_flags |= rxq->flow_meta_mask;
842 [ # # ]: 0 : if (*RTE_MBUF_DYNFIELD(pkts[pos + 3], offs, uint32_t *))
843 : 0 : pkts[pos + 3]->ol_flags |= rxq->flow_meta_mask;
844 : : }
845 : : #ifdef MLX5_PMD_SOFT_COUNTERS
846 : : /* Add up received bytes count. */
847 : : byte_cnt = _mm_shuffle_epi8(op_own, len_shuf_mask);
848 : : byte_cnt = _mm_andnot_si128(invalid_mask, byte_cnt);
849 : : byte_cnt = _mm_hadd_epi16(byte_cnt, zero);
850 : 0 : rcvd_byte += _mm_cvtsi128_si64(_mm_hadd_epi16(byte_cnt, zero));
851 : : #endif
852 : : /*
853 : : * Break the loop unless more valid CQE is expected, or if
854 : : * there's a compressed CQE.
855 : : */
856 [ # # ]: 0 : if (n != MLX5_VPMD_DESCS_PER_LOOP)
857 : : break;
858 : : }
859 : : #ifdef MLX5_PMD_SOFT_COUNTERS
860 : 0 : rxq->stats.ipackets += nocmp_n;
861 : 0 : rxq->stats.ibytes += rcvd_byte;
862 : : #endif
863 [ # # ]: 0 : if (comp_idx == n)
864 : 0 : *comp = comp_idx;
865 : 0 : return nocmp_n;
866 : : }
867 : :
868 : : #endif /* RTE_PMD_MLX5_RXTX_VEC_SSE_H_ */
|