Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause
2 : : * Copyright(c) 2018 Ericsson AB
3 : : */
4 : :
5 : : #ifndef _DSW_EVDEV_H_
6 : : #define _DSW_EVDEV_H_
7 : :
8 : : #include <eventdev_pmd.h>
9 : :
10 : : #include <rte_bitset.h>
11 : : #include <rte_event_ring.h>
12 : : #include <rte_eventdev.h>
13 : :
14 : : #define DSW_PMD_NAME RTE_STR(event_dsw)
15 : :
16 : : #define DSW_MAX_PORTS (64)
17 : : #define DSW_MAX_PORT_DEQUEUE_DEPTH (128)
18 : : #define DSW_MAX_PORT_ENQUEUE_DEPTH (128)
19 : : #define DSW_MAX_PORT_OUT_BUFFER (32)
20 : :
21 : : #define DSW_MAX_QUEUES (16)
22 : :
23 : : #define DSW_MAX_EVENTS (16384)
24 : :
25 : : /* Multiple 24-bit flow ids will map to the same DSW-level flow. The
26 : : * number of DSW flows should be high enough make it unlikely that
27 : : * flow ids of several large flows hash to the same DSW-level flow.
28 : : * Such collisions will limit parallelism and thus the number of cores
29 : : * that may be utilized. However, configuring a large number of DSW
30 : : * flows might potentially, depending on traffic and actual
31 : : * application flow id value range, result in each such DSW-level flow
32 : : * being very small. The effect of migrating such flows will be small,
33 : : * in terms amount of processing load redistributed. This will in turn
34 : : * reduce the load balancing speed, since flow migration rate has an
35 : : * upper limit. Code changes are required to allow > 32k DSW-level
36 : : * flows.
37 : : */
38 : : #define DSW_MAX_FLOWS_BITS (13)
39 : : #define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS))
40 : : #define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1)
41 : :
42 : : /* Eventdev RTE_SCHED_TYPE_PARALLEL doesn't have a concept of flows,
43 : : * but the 'dsw' scheduler (more or less) randomly assign flow id to
44 : : * events on parallel queues, to be able to reuse some of the
45 : : * migration mechanism and scheduling logic from
46 : : * RTE_SCHED_TYPE_ATOMIC. By moving one of the parallel "flows" from a
47 : : * particular port, the likely-hood of events being scheduled to this
48 : : * port is reduced, and thus a kind of statistical load balancing is
49 : : * achieved.
50 : : */
51 : : #define DSW_PARALLEL_FLOWS (1024)
52 : :
53 : : /* 'Background tasks' are polling the control rings for *
54 : : * migration-related messages, or flush the output buffer (so
55 : : * buffered events doesn't linger too long). Shouldn't be too low,
56 : : * since the system won't benefit from the 'batching' effects from
57 : : * the output buffer, and shouldn't be too high, since it will make
58 : : * buffered events linger too long in case the port goes idle.
59 : : */
60 : : #define DSW_MAX_PORT_OPS_PER_BG_TASK (128)
61 : :
62 : : /* Avoid making small 'loans' from the central in-flight event credit
63 : : * pool, to improve efficiency.
64 : : */
65 : : #define DSW_MIN_CREDIT_LOAN (64)
66 : : #define DSW_PORT_MAX_CREDITS (2*DSW_MIN_CREDIT_LOAN)
67 : : #define DSW_PORT_MIN_CREDITS (DSW_MIN_CREDIT_LOAN)
68 : :
69 : : /* The rings are dimensioned so that all in-flight events can reside
70 : : * on any one of the port rings, to avoid the trouble of having to
71 : : * care about the case where there's no room on the destination port's
72 : : * input ring.
73 : : */
74 : : #define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)
75 : :
76 : : #define DSW_MAX_LOAD (INT16_MAX)
77 : : #define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
78 : : #define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)
79 : :
80 : : /* The thought behind keeping the load update interval shorter than
81 : : * the migration interval is that the load from newly migrated flows
82 : : * should 'show up' on the load measurement before new migrations are
83 : : * considered. This is to avoid having too many flows, from too many
84 : : * source ports, to be migrated too quickly to a lightly loaded port -
85 : : * in particular since this might cause the system to oscillate.
86 : : */
87 : : #define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
88 : : #define DSW_OLD_LOAD_WEIGHT (1)
89 : :
90 : : /* The minimum time (in us) between two flow migrations. What puts an
91 : : * upper limit on the actual migration rate is primarily the pace in
92 : : * which the ports send and receive control messages, which in turn is
93 : : * largely a function of how much cycles are spent the processing of
94 : : * an event burst.
95 : : */
96 : : #define DSW_MIGRATION_INTERVAL (1000)
97 : : #define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70))
98 : : #define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95))
99 : : #define DSW_REBALANCE_THRESHOLD (DSW_LOAD_FROM_PERCENT(3))
100 : :
101 : : #define DSW_MAX_EVENTS_RECORDED (128)
102 : :
103 : : #define DSW_MAX_FLOWS_PER_MIGRATION (8)
104 : :
105 : : /* Only one outstanding migration per port is allowed */
106 : : #define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS*DSW_MAX_FLOWS_PER_MIGRATION)
107 : :
108 : : /* Enough room for pause request/confirm and unpaus request/confirm for
109 : : * all possible senders.
110 : : */
111 : : #define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4)
112 : :
113 : : /* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of
114 : : * dequeue(), arrange events so that events with the same flow id on
115 : : * the same queue forms a back-to-back "burst", and also so that such
116 : : * bursts of different flow ids, but on the same queue, also come
117 : : * consecutively. All this in an attempt to improve data and
118 : : * instruction cache usage for the application, at the cost of a
119 : : * scheduler overhead increase.
120 : : */
121 : :
122 : : /* #define DSW_SORT_DEQUEUED */
123 : :
124 : : struct dsw_queue_flow {
125 : : uint8_t queue_id;
126 : : uint16_t flow_hash;
127 : : };
128 : :
129 : : enum dsw_migration_state {
130 : : DSW_MIGRATION_STATE_IDLE,
131 : : DSW_MIGRATION_STATE_FINISH_PENDING,
132 : : DSW_MIGRATION_STATE_PAUSING,
133 : : DSW_MIGRATION_STATE_UNPAUSING
134 : : };
135 : :
136 : : struct __rte_cache_aligned dsw_port {
137 : : uint16_t id;
138 : :
139 : : /* Keeping a pointer here to avoid container_of() calls, which
140 : : * are expensive since they are very frequent and will result
141 : : * in an integer multiplication (since the port id is an index
142 : : * into the dsw_evdev port array).
143 : : */
144 : : struct dsw_evdev *dsw;
145 : :
146 : : uint16_t dequeue_depth;
147 : : uint16_t enqueue_depth;
148 : :
149 : : int32_t inflight_credits;
150 : :
151 : : int32_t new_event_threshold;
152 : :
153 : : bool implicit_release;
154 : :
155 : : uint16_t pending_releases;
156 : :
157 : : uint16_t next_parallel_flow_id;
158 : :
159 : : uint16_t ops_since_bg_task;
160 : :
161 : : /* most recent 'background' processing */
162 : : uint64_t last_bg;
163 : :
164 : : /* For port load measurement. */
165 : : uint64_t next_load_update;
166 : : uint64_t load_update_interval;
167 : : uint64_t measurement_start;
168 : : uint64_t busy_start;
169 : : uint64_t busy_cycles;
170 : : uint64_t total_busy_cycles;
171 : :
172 : : /* For the ctl interface and flow migration mechanism. */
173 : : uint64_t next_emigration;
174 : : uint64_t migration_interval;
175 : : enum dsw_migration_state migration_state;
176 : :
177 : : uint64_t emigration_start;
178 : : uint64_t emigrations;
179 : : uint64_t emigration_latency;
180 : :
181 : : uint8_t emigration_target_port_ids[DSW_MAX_FLOWS_PER_MIGRATION];
182 : : struct dsw_queue_flow
183 : : emigration_target_qfs[DSW_MAX_FLOWS_PER_MIGRATION];
184 : : uint8_t emigration_targets_len;
185 : : uint8_t cfm_cnt;
186 : :
187 : : uint64_t immigrations;
188 : :
189 : : uint16_t paused_flows_len;
190 : : struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS];
191 : :
192 : : /* In a very contrived worst case all inflight events can be
193 : : * laying around paused here.
194 : : */
195 : : uint16_t paused_events_len;
196 : : struct rte_event paused_events[DSW_MAX_EVENTS];
197 : :
198 : : uint16_t emigrating_events_len;
199 : : /* Buffer for not-yet-processed events pertaining to a flow
200 : : * emigrating from this port. These events will be forwarded
201 : : * to the target port.
202 : : */
203 : : struct rte_event emigrating_events[DSW_MAX_EVENTS];
204 : :
205 : : uint16_t seen_events_len;
206 : : uint16_t seen_events_idx;
207 : : struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED];
208 : :
209 : : uint64_t enqueue_calls;
210 : : uint64_t new_enqueued;
211 : : uint64_t forward_enqueued;
212 : : uint64_t release_enqueued;
213 : : uint64_t queue_enqueued[DSW_MAX_QUEUES];
214 : :
215 : : uint64_t dequeue_calls;
216 : : uint64_t dequeued;
217 : : uint64_t queue_dequeued[DSW_MAX_QUEUES];
218 : :
219 : : uint16_t out_buffer_len[DSW_MAX_PORTS];
220 : : struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
221 : :
222 : : uint16_t in_buffer_len;
223 : : uint16_t in_buffer_start;
224 : : /* This buffer may contain events that were read up from the
225 : : * in_ring during the flow migration process.
226 : : */
227 : : struct rte_event in_buffer[DSW_MAX_EVENTS];
228 : :
229 : : alignas(RTE_CACHE_LINE_SIZE) struct rte_event_ring *in_ring;
230 : :
231 : : alignas(RTE_CACHE_LINE_SIZE) struct rte_ring *ctl_in_ring;
232 : :
233 : : /* Estimate of current port load. */
234 : : alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int16_t) load;
235 : : /* Estimate of flows currently migrating to this port. */
236 : : alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int32_t) immigration_load;
237 : : };
238 : :
239 : : struct dsw_queue {
240 : : uint8_t schedule_type;
241 : : RTE_BITSET_DECLARE(serving_ports, DSW_MAX_PORTS);
242 : : uint16_t num_serving_ports;
243 : :
244 : : alignas(RTE_CACHE_LINE_SIZE) uint8_t flow_to_port_map[DSW_MAX_FLOWS];
245 : : };
246 : :
247 : : /* Limited by the size of the 'serving_ports' bitmask */
248 : : static_assert(DSW_MAX_PORTS <= 64, "Max compile-time port count exceeded");
249 : :
250 : : struct dsw_evdev {
251 : : struct rte_eventdev_data *data;
252 : :
253 : : struct dsw_port ports[DSW_MAX_PORTS];
254 : : uint16_t num_ports;
255 : : struct dsw_queue queues[DSW_MAX_QUEUES];
256 : : uint8_t num_queues;
257 : : int32_t max_inflight;
258 : :
259 : : alignas(RTE_CACHE_LINE_SIZE) RTE_ATOMIC(int32_t) credits_on_loan;
260 : : };
261 : :
262 : : #define DSW_CTL_PAUSE_REQ (0)
263 : : #define DSW_CTL_UNPAUSE_REQ (1)
264 : : #define DSW_CTL_CFM (2)
265 : :
266 : : struct __rte_aligned(4) dsw_ctl_msg {
267 : : uint8_t type;
268 : : uint8_t originating_port_id;
269 : : uint8_t qfs_len;
270 : : struct dsw_queue_flow qfs[DSW_MAX_FLOWS_PER_MIGRATION];
271 : : };
272 : :
273 : : uint16_t dsw_event_enqueue_burst(void *port,
274 : : const struct rte_event events[],
275 : : uint16_t events_len);
276 : : uint16_t dsw_event_enqueue_new_burst(void *port,
277 : : const struct rte_event events[],
278 : : uint16_t events_len);
279 : : uint16_t dsw_event_enqueue_forward_burst(void *port,
280 : : const struct rte_event events[],
281 : : uint16_t events_len);
282 : :
283 : : uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events,
284 : : uint16_t num, uint64_t wait);
285 : : void dsw_event_maintain(void *port, int op);
286 : :
287 : : int dsw_xstats_get_names(const struct rte_eventdev *dev,
288 : : enum rte_event_dev_xstats_mode mode,
289 : : uint8_t queue_port_id,
290 : : struct rte_event_dev_xstats_name *xstats_names,
291 : : uint64_t *ids, unsigned int size);
292 : : int dsw_xstats_get(const struct rte_eventdev *dev,
293 : : enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id,
294 : : const uint64_t ids[], uint64_t values[], unsigned int n);
295 : : uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev,
296 : : const char *name, uint64_t *id);
297 : :
298 : : static inline struct dsw_evdev *
299 : : dsw_pmd_priv(const struct rte_eventdev *eventdev)
300 : : {
301 [ # # # # ]: 0 : return eventdev->data->dev_private;
302 : : }
303 : :
304 : : extern int event_dsw_logtype;
305 : : #define RTE_LOGTYPE_EVENT_DSW event_dsw_logtype
306 : : #define DSW_LOG_DP_LINE(level, fmt, args...) \
307 : : RTE_LOG_DP_LINE(level, EVENT_DSW, "%s() line %u: " fmt, \
308 : : __func__, __LINE__, ## args)
309 : :
310 : : #define DSW_LOG_DP_PORT_LINE(level, port_id, fmt, args...) \
311 : : DSW_LOG_DP_LINE(level, "<Port %d> " fmt, port_id, ## args)
312 : :
313 : : #endif
|