Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause 2 : : * Copyright(c) 2017 Intel Corporation 3 : : */ 4 : : 5 : : #include <rte_mbuf.h> 6 : : #include "distributor_private.h" 7 : : #include "smmintrin.h" 8 : : 9 : : 10 : : void 11 : 131210 : find_match_vec(struct rte_distributor *d, 12 : : uint16_t *data_ptr, 13 : : uint16_t *output_ptr) 14 : : { 15 : : /* Setup */ 16 : : __m128i incoming_fids; 17 : : __m128i inflight_fids; 18 : : __m128i preflight_fids; 19 : : __m128i wkr; 20 : : __m128i mask1; 21 : : __m128i mask2; 22 : : __m128i output; 23 : : struct rte_distributor_backlog *bl; 24 : : uint16_t i; 25 : : 26 : : /* 27 : : * Function overview: 28 : : * 2. Loop through all worker ID's 29 : : * 2a. Load the current inflights for that worker into an xmm reg 30 : : * 2b. Load the current backlog for that worker into an xmm reg 31 : : * 2c. use cmpestrm to intersect flow_ids with backlog and inflights 32 : : * 2d. Add any matches to the output 33 : : * 3. Write the output xmm (matching worker ids). 34 : : */ 35 : : 36 : : 37 : : output = _mm_set1_epi16(0); 38 : : incoming_fids = _mm_load_si128((__m128i *)data_ptr); 39 : : 40 [ + + ]: 262420 : for (i = 0; i < d->num_workers; i++) { 41 : 131210 : bl = &d->backlog[i]; 42 : : 43 : : inflight_fids = 44 : 131210 : _mm_load_si128((__m128i *)&(d->in_flight_tags[i])); 45 : : preflight_fids = 46 : 131210 : _mm_load_si128((__m128i *)(bl->tags)); 47 : : 48 : : /* 49 : : * Any incoming_fid that exists anywhere in inflight_fids will 50 : : * have 0xffff in same position of the mask as the incoming fid 51 : : * Example (shortened to bytes for brevity): 52 : : * incoming_fids 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 53 : : * inflight_fids 0x03 0x05 0x07 0x00 0x00 0x00 0x00 0x00 54 : : * mask 0x00 0x00 0xff 0x00 0xff 0x00 0xff 0x00 55 : : */ 56 : : 57 : : mask1 = _mm_cmpestrm(inflight_fids, 8, incoming_fids, 8, 58 : : _SIDD_UWORD_OPS | 59 : : _SIDD_CMP_EQUAL_ANY | 60 : : _SIDD_UNIT_MASK); 61 : : mask2 = _mm_cmpestrm(preflight_fids, 8, incoming_fids, 8, 62 : : _SIDD_UWORD_OPS | 63 : : _SIDD_CMP_EQUAL_ANY | 64 : : _SIDD_UNIT_MASK); 65 : : 66 : : mask1 = _mm_or_si128(mask1, mask2); 67 : : /* 68 : : * Now mask contains 0xffff where there's a match. 69 : : * Next we need to store the worker_id in the relevant position 70 : : * in the output. 71 : : */ 72 : : 73 : 131210 : wkr = _mm_set1_epi16(i+1); 74 : : mask1 = _mm_and_si128(mask1, wkr); 75 : : output = _mm_or_si128(mask1, output); 76 : : } 77 : : 78 : : /* 79 : : * At this stage, the output 128-bit contains 8 16-bit values, with 80 : : * each non-zero value containing the worker ID on which the 81 : : * corresponding flow is pinned to. 82 : : */ 83 : : _mm_store_si128((__m128i *)output_ptr, output); 84 : 131210 : }