Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause
2 : : * Copyright(c) 2010-2014 Intel Corporation
3 : : */
4 : : #include <stdint.h>
5 : : #include <stddef.h>
6 : : #include <stdlib.h>
7 : : #include <stdio.h>
8 : : #include <errno.h>
9 : : #include <sys/queue.h>
10 : :
11 : : #include <rte_memory.h>
12 : : #include <rte_errno.h>
13 : : #include <rte_eal.h>
14 : : #include <rte_eal_memconfig.h>
15 : : #include <rte_lcore.h>
16 : : #include <rte_common.h>
17 : : #include <rte_string_fns.h>
18 : : #include <rte_spinlock.h>
19 : : #include <rte_memzone.h>
20 : : #include <rte_fbarray.h>
21 : :
22 : : #include "eal_internal_cfg.h"
23 : : #include "eal_memalloc.h"
24 : : #include "eal_memcfg.h"
25 : : #include "eal_private.h"
26 : : #include "malloc_elem.h"
27 : : #include "malloc_heap.h"
28 : : #include "malloc_mp.h"
29 : :
30 : : /* start external socket ID's at a very high number */
31 : : #define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */
32 : : #define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES))
33 : :
34 : : static unsigned
35 : 118632 : check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
36 : : {
37 : : unsigned check_flag = 0;
38 : :
39 [ + + ]: 118632 : if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY))
40 : : return 1;
41 : :
42 [ - + - - : 1366 : switch (hugepage_sz) {
- - - -
+ ]
43 : 0 : case RTE_PGSIZE_256K:
44 : : check_flag = RTE_MEMZONE_256KB;
45 : 0 : break;
46 : 1281 : case RTE_PGSIZE_2M:
47 : : check_flag = RTE_MEMZONE_2MB;
48 : 1281 : break;
49 : 0 : case RTE_PGSIZE_16M:
50 : : check_flag = RTE_MEMZONE_16MB;
51 : 0 : break;
52 : 0 : case RTE_PGSIZE_256M:
53 : : check_flag = RTE_MEMZONE_256MB;
54 : 0 : break;
55 : 0 : case RTE_PGSIZE_512M:
56 : : check_flag = RTE_MEMZONE_512MB;
57 : 0 : break;
58 : 0 : case RTE_PGSIZE_1G:
59 : : check_flag = RTE_MEMZONE_1GB;
60 : 0 : break;
61 : 0 : case RTE_PGSIZE_4G:
62 : : check_flag = RTE_MEMZONE_4GB;
63 : 0 : break;
64 : 0 : case RTE_PGSIZE_16G:
65 : : check_flag = RTE_MEMZONE_16GB;
66 : : }
67 : :
68 : 1366 : return check_flag & flags;
69 : : }
70 : :
71 : : int
72 : 117177 : malloc_socket_to_heap_id(unsigned int socket_id)
73 : : {
74 : 117177 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
75 : : int i;
76 : :
77 [ + + ]: 118199 : for (i = 0; i < RTE_MAX_HEAPS; i++) {
78 : : struct malloc_heap *heap = &mcfg->malloc_heaps[i];
79 : :
80 [ + + ]: 118168 : if (heap->socket_id == socket_id)
81 : 117146 : return i;
82 : : }
83 : : return -1;
84 : : }
85 : :
86 : : /*
87 : : * Expand the heap with a memory area.
88 : : */
89 : : static struct malloc_elem *
90 : 597 : malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl,
91 : : void *start, size_t len, bool dirty)
92 : : {
93 : : struct malloc_elem *elem = start;
94 : :
95 : 597 : malloc_elem_init(elem, heap, msl, len, elem, len, dirty);
96 : :
97 : 597 : malloc_elem_insert(elem);
98 : :
99 : 597 : elem = malloc_elem_join_adjacent_free(elem);
100 : :
101 : 597 : malloc_elem_free_list_insert(elem);
102 : :
103 : 597 : return elem;
104 : : }
105 : :
106 : : static int
107 : 113 : malloc_add_seg(const struct rte_memseg_list *msl,
108 : : const struct rte_memseg *ms, size_t len, void *arg __rte_unused)
109 : : {
110 : 113 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
111 : : struct rte_memseg_list *found_msl;
112 : : struct malloc_heap *heap;
113 : : int msl_idx, heap_idx;
114 : :
115 [ + - ]: 113 : if (msl->external)
116 : : return 0;
117 : :
118 : 113 : heap_idx = malloc_socket_to_heap_id(msl->socket_id);
119 [ - + ]: 113 : if (heap_idx < 0) {
120 : 0 : EAL_LOG(ERR, "Memseg list has invalid socket id");
121 : 0 : return -1;
122 : : }
123 : 113 : heap = &mcfg->malloc_heaps[heap_idx];
124 : :
125 : : /* msl is const, so find it */
126 : 113 : msl_idx = msl - mcfg->memsegs;
127 : :
128 [ + - ]: 113 : if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
129 : : return -1;
130 : :
131 : 113 : found_msl = &mcfg->memsegs[msl_idx];
132 : :
133 : 113 : malloc_heap_add_memory(heap, found_msl, ms->addr, len,
134 : 113 : ms->flags & RTE_MEMSEG_FLAG_DIRTY);
135 : :
136 : 113 : heap->total_size += len;
137 : :
138 : 113 : EAL_LOG(DEBUG, "Added %zuM to heap on socket %i", len >> 20,
139 : : msl->socket_id);
140 : 113 : return 0;
141 : : }
142 : :
143 : : /*
144 : : * Iterates through the freelist for a heap to find a free element
145 : : * which can store data of the required size and with the requested alignment.
146 : : * If size is 0, find the biggest available elem.
147 : : * Returns null on failure, or pointer to element on success.
148 : : */
149 : : static struct malloc_elem *
150 : 118698 : find_suitable_element(struct malloc_heap *heap, size_t size,
151 : : unsigned int flags, size_t align, size_t bound, bool contig)
152 : : {
153 : : size_t idx;
154 : : struct malloc_elem *elem, *alt_elem = NULL;
155 : :
156 : 118698 : for (idx = malloc_elem_free_list_index(size);
157 [ + + ]: 554686 : idx < RTE_HEAP_NUM_FREELISTS; idx++) {
158 : 553253 : for (elem = LIST_FIRST(&heap->free_head[idx]);
159 [ + + ]: 555855 : !!elem; elem = LIST_NEXT(elem, free_list)) {
160 [ + + ]: 119867 : if (malloc_elem_can_hold(elem, size, align, bound,
161 : : contig)) {
162 : 118084 : if (check_hugepage_sz(flags,
163 [ + + ]: 118084 : elem->msl->page_sz))
164 : 117265 : return elem;
165 [ + + ]: 819 : if (alt_elem == NULL)
166 : : alt_elem = elem;
167 : : }
168 : : }
169 : : }
170 : :
171 [ + + ]: 1433 : if (flags & RTE_MEMZONE_SIZE_HINT_ONLY)
172 : 373 : return alt_elem;
173 : :
174 : : return NULL;
175 : : }
176 : :
177 : : /*
178 : : * Iterates through the freelist for a heap to find a free element with the
179 : : * biggest size and requested alignment. Will also set size to whatever element
180 : : * size that was found.
181 : : * Returns null on failure, or pointer to element on success.
182 : : */
183 : : static struct malloc_elem *
184 : 3 : find_biggest_element(struct malloc_heap *heap, size_t *size,
185 : : unsigned int flags, size_t align, bool contig)
186 : : {
187 : : struct malloc_elem *elem, *max_elem = NULL;
188 : : size_t idx, max_size = 0;
189 : :
190 [ + + ]: 42 : for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
191 : 39 : for (elem = LIST_FIRST(&heap->free_head[idx]);
192 [ + + ]: 43 : !!elem; elem = LIST_NEXT(elem, free_list)) {
193 : : size_t cur_size;
194 [ + - ]: 4 : if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 &&
195 : 4 : !check_hugepage_sz(flags,
196 [ - + ]: 4 : elem->msl->page_sz))
197 : 0 : continue;
198 [ - + ]: 4 : if (contig) {
199 : : cur_size =
200 : 0 : malloc_elem_find_max_iova_contig(elem,
201 : : align);
202 : : } else {
203 : 4 : void *data_start = RTE_PTR_ADD(elem,
204 : : MALLOC_ELEM_HEADER_LEN);
205 : 4 : void *data_end = RTE_PTR_ADD(elem, elem->size -
206 : : MALLOC_ELEM_TRAILER_LEN);
207 : 4 : void *aligned = RTE_PTR_ALIGN_CEIL(data_start,
208 : : align);
209 : : /* check if aligned data start is beyond end */
210 [ - + ]: 4 : if (aligned >= data_end)
211 : 0 : continue;
212 : 4 : cur_size = RTE_PTR_DIFF(data_end, aligned);
213 : : }
214 [ + - ]: 4 : if (cur_size > max_size) {
215 : : max_size = cur_size;
216 : : max_elem = elem;
217 : : }
218 : : }
219 : : }
220 : :
221 : 3 : *size = max_size;
222 : 3 : return max_elem;
223 : : }
224 : :
225 : : /*
226 : : * Main function to allocate a block of memory from the heap.
227 : : * It locks the free list, scans it, and adds a new memseg if the
228 : : * scan fails. Once the new memseg is added, it re-scans and should return
229 : : * the new element after releasing the lock.
230 : : */
231 : : static void *
232 : 117621 : heap_alloc(struct malloc_heap *heap, size_t size, unsigned int flags,
233 : : size_t align, size_t bound, bool contig)
234 : : {
235 : : struct malloc_elem *elem;
236 : : size_t user_size = size;
237 : :
238 : 117621 : size = RTE_CACHE_LINE_ROUNDUP(size);
239 : 117621 : align = RTE_CACHE_LINE_ROUNDUP(align);
240 : :
241 : : /* roundup might cause an overflow */
242 [ + - ]: 117621 : if (size == 0)
243 : : return NULL;
244 : 117621 : elem = find_suitable_element(heap, size, flags, align, bound, contig);
245 [ + + ]: 117621 : if (elem != NULL) {
246 : 117021 : elem = malloc_elem_alloc(elem, size, align, bound, contig);
247 : :
248 : : /* increase heap's count of allocated elements */
249 : 117021 : heap->alloc_count++;
250 : :
251 : : asan_set_redzone(elem, user_size);
252 : : }
253 : :
254 [ + + ]: 117621 : return elem == NULL ? NULL : (void *)(&elem[1]);
255 : : }
256 : :
257 : : static void *
258 : 3 : heap_alloc_biggest(struct malloc_heap *heap, unsigned int flags, size_t align, bool contig)
259 : : {
260 : : struct malloc_elem *elem;
261 : : size_t size;
262 : :
263 : 3 : align = RTE_CACHE_LINE_ROUNDUP(align);
264 : :
265 : 3 : elem = find_biggest_element(heap, &size, flags, align, contig);
266 [ + - ]: 3 : if (elem != NULL) {
267 : 3 : elem = malloc_elem_alloc(elem, size, align, 0, contig);
268 : :
269 : : /* increase heap's count of allocated elements */
270 : 3 : heap->alloc_count++;
271 : :
272 : : asan_set_redzone(elem, size);
273 : : }
274 : :
275 [ + - ]: 3 : return elem == NULL ? NULL : (void *)(&elem[1]);
276 : : }
277 : :
278 : : /* this function is exposed in malloc_mp.h */
279 : : void
280 : 0 : rollback_expand_heap(struct rte_memseg **ms, int n_segs,
281 : : struct malloc_elem *elem, void *map_addr, size_t map_len)
282 : : {
283 [ # # ]: 0 : if (elem != NULL) {
284 : 0 : malloc_elem_free_list_remove(elem);
285 : 0 : malloc_elem_hide_region(elem, map_addr, map_len);
286 : : }
287 : :
288 : 0 : eal_memalloc_free_seg_bulk(ms, n_segs);
289 : 0 : }
290 : :
291 : : /* this function is exposed in malloc_mp.h */
292 : : struct malloc_elem *
293 : 483 : alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
294 : : int socket, unsigned int flags, size_t align, size_t bound,
295 : : bool contig, struct rte_memseg **ms, int n_segs)
296 : : {
297 : 483 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
298 : : struct rte_memseg_list *msl;
299 : : struct malloc_elem *elem = NULL;
300 : : size_t alloc_sz;
301 : : int allocd_pages, i;
302 : : bool dirty = false;
303 : : void *ret, *map_addr;
304 : :
305 : 483 : alloc_sz = (size_t)pg_sz * n_segs;
306 : :
307 : : /* first, check if we're allowed to allocate this memory */
308 [ - + ]: 483 : if (eal_memalloc_mem_alloc_validate(socket,
309 : 483 : heap->total_size + alloc_sz) < 0) {
310 : 0 : EAL_LOG(DEBUG, "User has disallowed allocation");
311 : 0 : return NULL;
312 : : }
313 : :
314 : 483 : allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz,
315 : : socket, true);
316 : :
317 : : /* make sure we've allocated our pages... */
318 [ + - ]: 483 : if (allocd_pages < 0)
319 : : return NULL;
320 : :
321 : 483 : map_addr = ms[0]->addr;
322 : 483 : msl = rte_mem_virt2memseg_list(map_addr);
323 : :
324 : : /* check if we wanted contiguous memory but didn't get it */
325 [ - + - - ]: 483 : if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) {
326 : 0 : EAL_LOG(DEBUG, "%s(): couldn't allocate physically contiguous space",
327 : : __func__);
328 : 0 : goto fail;
329 : : }
330 : :
331 : : /*
332 : : * Once we have all the memseg lists configured, if there is a dma mask
333 : : * set, check iova addresses are not out of range. Otherwise the device
334 : : * setting the dma mask could have problems with the mapped memory.
335 : : *
336 : : * There are two situations when this can happen:
337 : : * 1) memory initialization
338 : : * 2) dynamic memory allocation
339 : : *
340 : : * For 1), an error when checking dma mask implies app can not be
341 : : * executed. For 2) implies the new memory can not be added.
342 : : */
343 [ - + - - ]: 483 : if (mcfg->dma_maskbits &&
344 : 0 : rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
345 : : /*
346 : : * Currently this can only happen if IOMMU is enabled
347 : : * and the address width supported by the IOMMU hw is
348 : : * not enough for using the memory mapped IOVAs.
349 : : *
350 : : * If IOVA is VA, advice to try with '--iova-mode pa'
351 : : * which could solve some situations when IOVA VA is not
352 : : * really needed.
353 : : */
354 : 0 : EAL_LOG(ERR,
355 : : "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask",
356 : : __func__);
357 : :
358 : : /*
359 : : * If IOVA is VA and it is possible to run with IOVA PA,
360 : : * because user is root, give and advice for solving the
361 : : * problem.
362 : : */
363 [ # # # # ]: 0 : if ((rte_eal_iova_mode() == RTE_IOVA_VA) &&
364 : 0 : rte_eal_using_phys_addrs())
365 : 0 : EAL_LOG(ERR,
366 : : "%s(): Please try initializing EAL with --iova-mode=pa parameter",
367 : : __func__);
368 : 0 : goto fail;
369 : : }
370 : :
371 : : /* Element is dirty if it contains at least one dirty page. */
372 [ + + ]: 1434 : for (i = 0; i < allocd_pages; i++)
373 : 951 : dirty |= ms[i]->flags & RTE_MEMSEG_FLAG_DIRTY;
374 : :
375 : : /* add newly minted memsegs to malloc heap */
376 : 483 : elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz, dirty);
377 : :
378 : : /* try once more, as now we have allocated new memory */
379 : 483 : ret = find_suitable_element(heap, elt_size, flags, align, bound,
380 : : contig);
381 : :
382 [ - + ]: 483 : if (ret == NULL)
383 : 0 : goto fail;
384 : :
385 : : return elem;
386 : :
387 : 0 : fail:
388 : 0 : rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
389 : 0 : return NULL;
390 : : }
391 : :
392 : : static int
393 : 482 : try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
394 : : size_t elt_size, int socket, unsigned int flags, size_t align,
395 : : size_t bound, bool contig)
396 : : {
397 : : struct malloc_elem *elem;
398 : : struct rte_memseg **ms;
399 : : void *map_addr;
400 : : size_t alloc_sz;
401 : : int n_segs;
402 : : bool callback_triggered = false;
403 : :
404 : 482 : alloc_sz = RTE_ALIGN_CEIL(RTE_ALIGN_CEIL(elt_size, align) +
405 : : MALLOC_ELEM_OVERHEAD, pg_sz);
406 : 482 : n_segs = alloc_sz / pg_sz;
407 : :
408 : : /* we can't know in advance how many pages we'll need, so we malloc */
409 : 482 : ms = malloc(sizeof(*ms) * n_segs);
410 [ + - ]: 482 : if (ms == NULL)
411 : : return -1;
412 : : memset(ms, 0, sizeof(*ms) * n_segs);
413 : :
414 : 482 : elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align,
415 : : bound, contig, ms, n_segs);
416 : :
417 [ - + ]: 482 : if (elem == NULL)
418 : 0 : goto free_ms;
419 : :
420 : 482 : map_addr = ms[0]->addr;
421 : :
422 : : /* notify user about changes in memory map */
423 : 482 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
424 : :
425 : : /* notify other processes that this has happened */
426 [ - + ]: 482 : if (request_sync()) {
427 : : /* we couldn't ensure all processes have mapped memory,
428 : : * so free it back and notify everyone that it's been
429 : : * freed back.
430 : : *
431 : : * technically, we could've avoided adding memory addresses to
432 : : * the map, but that would've led to inconsistent behavior
433 : : * between primary and secondary processes, as those get
434 : : * callbacks during sync. therefore, force primary process to
435 : : * do alloc-and-rollback syncs as well.
436 : : */
437 : : callback_triggered = true;
438 : 0 : goto free_elem;
439 : : }
440 : 482 : heap->total_size += alloc_sz;
441 : :
442 : 482 : EAL_LOG(DEBUG, "Heap on socket %d was expanded by %zdMB",
443 : : socket, alloc_sz >> 20ULL);
444 : :
445 : 482 : free(ms);
446 : :
447 : 482 : return 0;
448 : :
449 : : free_elem:
450 : : if (callback_triggered)
451 : 0 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
452 : : map_addr, alloc_sz);
453 : :
454 : 0 : rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
455 : :
456 : 0 : request_sync();
457 : 0 : free_ms:
458 : 0 : free(ms);
459 : :
460 : 0 : return -1;
461 : : }
462 : :
463 : : static int
464 : 1 : try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz,
465 : : size_t elt_size, int socket, unsigned int flags, size_t align,
466 : : size_t bound, bool contig)
467 : : {
468 : 1 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
469 : : struct malloc_mp_req req;
470 : : int req_result;
471 : :
472 : : memset(&req, 0, sizeof(req));
473 : :
474 : : req.t = REQ_TYPE_ALLOC;
475 : 1 : req.alloc_req.align = align;
476 : 1 : req.alloc_req.bound = bound;
477 : 1 : req.alloc_req.contig = contig;
478 : 1 : req.alloc_req.flags = flags;
479 : 1 : req.alloc_req.elt_size = elt_size;
480 : 1 : req.alloc_req.page_sz = pg_sz;
481 : 1 : req.alloc_req.socket = socket;
482 : 1 : req.alloc_req.malloc_heap_idx = heap - mcfg->malloc_heaps;
483 : :
484 : 1 : req_result = request_to_primary(&req);
485 : :
486 [ + - ]: 1 : if (req_result != 0)
487 : : return -1;
488 : :
489 [ - + ]: 1 : if (req.result != REQ_RESULT_SUCCESS)
490 : 0 : return -1;
491 : :
492 : : return 0;
493 : : }
494 : :
495 : : static int
496 : 483 : try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
497 : : int socket, unsigned int flags, size_t align, size_t bound,
498 : : bool contig)
499 : : {
500 : : int ret;
501 : :
502 : 483 : rte_mcfg_mem_write_lock();
503 : :
504 [ + + ]: 483 : if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
505 : 482 : ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket,
506 : : flags, align, bound, contig);
507 : : } else {
508 : 1 : ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket,
509 : : flags, align, bound, contig);
510 : : }
511 : :
512 : 483 : rte_mcfg_mem_write_unlock();
513 : 483 : return ret;
514 : : }
515 : :
516 : : static int
517 : 2376 : compare_pagesz(const void *a, const void *b)
518 : : {
519 : : const struct rte_memseg_list * const*mpa = a;
520 : : const struct rte_memseg_list * const*mpb = b;
521 : 2376 : const struct rte_memseg_list *msla = *mpa;
522 : 2376 : const struct rte_memseg_list *mslb = *mpb;
523 : 2376 : uint64_t pg_sz_a = msla->page_sz;
524 : 2376 : uint64_t pg_sz_b = mslb->page_sz;
525 : :
526 [ + - ]: 2376 : if (pg_sz_a < pg_sz_b)
527 : : return -1;
528 [ - + ]: 2376 : if (pg_sz_a > pg_sz_b)
529 : 0 : return 1;
530 : : return 0;
531 : : }
532 : :
533 : : static int
534 : 596 : alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
535 : : unsigned int flags, size_t align, size_t bound, bool contig)
536 : : {
537 : 596 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
538 : : struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS];
539 : : struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS];
540 : : uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS];
541 : : uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS];
542 : : uint64_t prev_pg_sz;
543 : : int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz;
544 : 596 : bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0;
545 : 596 : unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
546 : : void *ret;
547 : :
548 : : memset(requested_msls, 0, sizeof(requested_msls));
549 : : memset(other_msls, 0, sizeof(other_msls));
550 : : memset(requested_pg_sz, 0, sizeof(requested_pg_sz));
551 : : memset(other_pg_sz, 0, sizeof(other_pg_sz));
552 : :
553 : : /*
554 : : * go through memseg list and take note of all the page sizes available,
555 : : * and if any of them were specifically requested by the user.
556 : : */
557 : : n_requested_msls = 0;
558 : : n_other_msls = 0;
559 [ + + ]: 76884 : for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
560 : 76288 : struct rte_memseg_list *msl = &mcfg->memsegs[i];
561 : :
562 [ + + ]: 76288 : if (msl->socket_id != socket)
563 : 3104 : continue;
564 : :
565 [ + + ]: 73184 : if (msl->base_va == NULL)
566 : 70800 : continue;
567 : :
568 : : /* if pages of specific size were requested */
569 [ + + - + ]: 2384 : if (size_flags != 0 && check_hugepage_sz(size_flags,
570 : : msl->page_sz))
571 : 0 : requested_msls[n_requested_msls++] = msl;
572 [ + + ]: 2384 : else if (size_flags == 0 || size_hint)
573 : 2376 : other_msls[n_other_msls++] = msl;
574 : : }
575 : :
576 : : /* sort the lists, smallest first */
577 : 596 : qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]),
578 : : compare_pagesz);
579 : 596 : qsort(other_msls, n_other_msls, sizeof(other_msls[0]),
580 : : compare_pagesz);
581 : :
582 : : /* now, extract page sizes we are supposed to try */
583 : : prev_pg_sz = 0;
584 : : n_requested_pg_sz = 0;
585 [ - + ]: 596 : for (i = 0; i < n_requested_msls; i++) {
586 : 0 : uint64_t pg_sz = requested_msls[i]->page_sz;
587 : :
588 [ # # ]: 0 : if (prev_pg_sz != pg_sz) {
589 : 0 : requested_pg_sz[n_requested_pg_sz++] = pg_sz;
590 : : prev_pg_sz = pg_sz;
591 : : }
592 : : }
593 : : prev_pg_sz = 0;
594 : : n_other_pg_sz = 0;
595 [ + + ]: 2972 : for (i = 0; i < n_other_msls; i++) {
596 : 2376 : uint64_t pg_sz = other_msls[i]->page_sz;
597 : :
598 [ + + ]: 2376 : if (prev_pg_sz != pg_sz) {
599 : 594 : other_pg_sz[n_other_pg_sz++] = pg_sz;
600 : : prev_pg_sz = pg_sz;
601 : : }
602 : : }
603 : :
604 : : /* finally, try allocating memory of specified page sizes, starting from
605 : : * the smallest sizes
606 : : */
607 [ - + ]: 596 : for (i = 0; i < n_requested_pg_sz; i++) {
608 : 0 : uint64_t pg_sz = requested_pg_sz[i];
609 : :
610 : : /*
611 : : * do not pass the size hint here, as user expects other page
612 : : * sizes first, before resorting to best effort allocation.
613 : : */
614 [ # # ]: 0 : if (!try_expand_heap(heap, pg_sz, size, socket, size_flags,
615 : : align, bound, contig))
616 : : return 0;
617 : : }
618 [ + + ]: 596 : if (n_other_pg_sz == 0)
619 : : return -1;
620 : :
621 : : /* now, check if we can reserve anything with size hint */
622 : 594 : ret = find_suitable_element(heap, size, flags, align, bound, contig);
623 [ + + ]: 594 : if (ret != NULL)
624 : : return 0;
625 : :
626 : : /*
627 : : * we still couldn't reserve memory, so try expanding heap with other
628 : : * page sizes, if there are any
629 : : */
630 [ + - ]: 483 : for (i = 0; i < n_other_pg_sz; i++) {
631 : 483 : uint64_t pg_sz = other_pg_sz[i];
632 : :
633 [ - + ]: 483 : if (!try_expand_heap(heap, pg_sz, size, socket, flags,
634 : : align, bound, contig))
635 : : return 0;
636 : : }
637 : : return -1;
638 : : }
639 : :
640 : : /* this will try lower page sizes first */
641 : : static void *
642 : 117019 : malloc_heap_alloc_on_heap_id(size_t size, unsigned int heap_id, unsigned int flags, size_t align,
643 : : size_t bound, bool contig)
644 : : {
645 : 117019 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
646 : 117017 : struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
647 : 117017 : unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
648 : : int socket_id;
649 : : void *ret;
650 : : const struct internal_config *internal_conf =
651 : 117017 : eal_get_internal_configuration();
652 : :
653 : 117016 : rte_spinlock_lock(&(heap->lock));
654 : :
655 : : align = align == 0 ? 1 : align;
656 : :
657 : : /* for legacy mode, try once and with all flags */
658 [ + + ]: 117027 : if (internal_conf->legacy_mem) {
659 : 13495 : ret = heap_alloc(heap, size, flags, align, bound, contig);
660 : 13495 : goto alloc_unlock;
661 : : }
662 : :
663 : : /*
664 : : * we do not pass the size hint here, because even if allocation fails,
665 : : * we may still be able to allocate memory from appropriate page sizes,
666 : : * we just need to request more memory first.
667 : : */
668 : :
669 : 103532 : socket_id = rte_socket_id_by_idx(heap_id);
670 : : /*
671 : : * if socket ID is negative, we cannot find a socket ID for this heap -
672 : : * which means it's an external heap. those can have unexpected page
673 : : * sizes, so if the user asked to allocate from there - assume user
674 : : * knows what they're doing, and allow allocating from there with any
675 : : * page size flags.
676 : : */
677 [ + + ]: 103532 : if (socket_id < 0)
678 : 10 : size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
679 : :
680 : 103532 : ret = heap_alloc(heap, size, size_flags, align, bound, contig);
681 [ + + ]: 103532 : if (ret != NULL)
682 : 102936 : goto alloc_unlock;
683 : :
684 : : /* if socket ID is invalid, this is an external heap */
685 [ - + ]: 596 : if (socket_id < 0)
686 : 0 : goto alloc_unlock;
687 : :
688 [ + + ]: 596 : if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
689 : : bound, contig)) {
690 : 594 : ret = heap_alloc(heap, size, flags, align, bound, contig);
691 : :
692 : : /* this should have succeeded */
693 [ + - ]: 594 : if (ret == NULL)
694 : 0 : EAL_LOG(ERR, "Error allocating from heap");
695 : : }
696 : 117027 : alloc_unlock:
697 : : rte_spinlock_unlock(&(heap->lock));
698 : 117027 : return ret;
699 : : }
700 : :
701 : : static unsigned int
702 : 109199 : malloc_get_numa_socket(void)
703 : : {
704 : 109199 : const struct internal_config *conf = eal_get_internal_configuration();
705 : 109199 : unsigned int socket_id = rte_socket_id();
706 : : unsigned int idx;
707 : :
708 [ + + ]: 109198 : if (socket_id != (unsigned int)SOCKET_ID_ANY)
709 : : return socket_id;
710 : :
711 : : /* for control threads, return first socket where memory is available */
712 [ + + ]: 18 : for (idx = 0; idx < rte_socket_count(); idx++) {
713 : 12 : socket_id = rte_socket_id_by_idx(idx);
714 [ - + ]: 12 : if (conf->socket_mem[socket_id] != 0)
715 : 0 : return socket_id;
716 : : }
717 : : /* We couldn't quickly find a NUMA node where memory was available,
718 : : * so fall back to using main lcore socket ID.
719 : : */
720 : 6 : socket_id = rte_lcore_to_socket_id(rte_get_main_lcore());
721 : : /* Main lcore socket ID may be SOCKET_ID_ANY
722 : : * when main lcore thread is affinitized to multiple NUMA nodes.
723 : : */
724 [ - + ]: 6 : if (socket_id != (unsigned int)SOCKET_ID_ANY)
725 : : return socket_id;
726 : : /* Failed to find meaningful socket ID, so use the first one available. */
727 : 0 : return rte_socket_id_by_idx(0);
728 : : }
729 : :
730 : : void *
731 : 117053 : malloc_heap_alloc(size_t size, int socket_arg, unsigned int flags,
732 : : size_t align, size_t bound, bool contig)
733 : : {
734 : : int socket, heap_id, i;
735 : : void *ret;
736 : :
737 : : /* return NULL if size is 0 or alignment is not power-of-2 */
738 [ + - + - : 117053 : if (size == 0 || (align && !rte_is_power_of_2(align)))
+ - ]
739 : : return NULL;
740 : :
741 [ + + + + ]: 117053 : if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES)
742 : : socket_arg = SOCKET_ID_ANY;
743 : :
744 [ + + ]: 103561 : if (socket_arg == SOCKET_ID_ANY)
745 : 109199 : socket = malloc_get_numa_socket();
746 : : else
747 : : socket = socket_arg;
748 : :
749 : : /* turn socket ID into heap ID */
750 : 117048 : heap_id = malloc_socket_to_heap_id(socket);
751 : : /* if heap id is negative, socket ID was invalid */
752 [ + + ]: 117047 : if (heap_id < 0)
753 : : return NULL;
754 : :
755 : 117016 : ret = malloc_heap_alloc_on_heap_id(size, heap_id, flags, align, bound, contig);
756 [ + + ]: 117024 : if (ret != NULL || socket_arg != SOCKET_ID_ANY)
757 : : return ret;
758 : :
759 : : /* try other heaps. we are only iterating through native DPDK sockets,
760 : : * so external heaps won't be included.
761 : : */
762 [ + + ]: 9 : for (i = 0; i < (int) rte_socket_count(); i++) {
763 [ + + ]: 6 : if (i == heap_id)
764 : 3 : continue;
765 : 3 : ret = malloc_heap_alloc_on_heap_id(size, i, flags, align, bound, contig);
766 [ - + ]: 3 : if (ret != NULL)
767 : 0 : return ret;
768 : : }
769 : : return NULL;
770 : : }
771 : :
772 : : static void *
773 : 3 : heap_alloc_biggest_on_heap_id(unsigned int heap_id,
774 : : unsigned int flags, size_t align, bool contig)
775 : : {
776 : 3 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
777 : 3 : struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
778 : : void *ret;
779 : :
780 : 3 : rte_spinlock_lock(&(heap->lock));
781 : :
782 : : align = align == 0 ? 1 : align;
783 : :
784 : 3 : ret = heap_alloc_biggest(heap, flags, align, contig);
785 : :
786 : : rte_spinlock_unlock(&(heap->lock));
787 : :
788 : 3 : return ret;
789 : : }
790 : :
791 : : void *
792 : 3 : malloc_heap_alloc_biggest(int socket_arg, unsigned int flags, size_t align, bool contig)
793 : : {
794 : : int socket, i, cur_socket, heap_id;
795 : : void *ret;
796 : :
797 : : /* return NULL if align is not power-of-2 */
798 [ + - + - ]: 3 : if ((align && !rte_is_power_of_2(align)))
799 : : return NULL;
800 : :
801 [ + - ]: 3 : if (!rte_eal_has_hugepages())
802 : : socket_arg = SOCKET_ID_ANY;
803 : :
804 [ + + ]: 3 : if (socket_arg == SOCKET_ID_ANY)
805 : 1 : socket = malloc_get_numa_socket();
806 : : else
807 : : socket = socket_arg;
808 : :
809 : : /* turn socket ID into heap ID */
810 : 3 : heap_id = malloc_socket_to_heap_id(socket);
811 : : /* if heap id is negative, socket ID was invalid */
812 [ + - ]: 3 : if (heap_id < 0)
813 : : return NULL;
814 : :
815 : 3 : ret = heap_alloc_biggest_on_heap_id(heap_id, flags, align, contig);
816 [ - + ]: 3 : if (ret != NULL || socket_arg != SOCKET_ID_ANY)
817 : : return ret;
818 : :
819 : : /* try other heaps */
820 [ # # ]: 0 : for (i = 0; i < (int) rte_socket_count(); i++) {
821 : 0 : cur_socket = rte_socket_id_by_idx(i);
822 [ # # ]: 0 : if (cur_socket == socket)
823 : 0 : continue;
824 : 0 : ret = heap_alloc_biggest_on_heap_id(i, flags, align, contig);
825 [ # # ]: 0 : if (ret != NULL)
826 : 0 : return ret;
827 : : }
828 : : return NULL;
829 : : }
830 : :
831 : : /* this function is exposed in malloc_mp.h */
832 : : int
833 : 493 : malloc_heap_free_pages(void *aligned_start, size_t aligned_len)
834 : : {
835 : : int n_segs, seg_idx, max_seg_idx;
836 : : struct rte_memseg_list *msl;
837 : : size_t page_sz;
838 : :
839 : 493 : msl = rte_mem_virt2memseg_list(aligned_start);
840 [ + - ]: 493 : if (msl == NULL)
841 : : return -1;
842 : :
843 : 493 : page_sz = (size_t)msl->page_sz;
844 : 493 : n_segs = aligned_len / page_sz;
845 : 493 : seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz;
846 : 493 : max_seg_idx = seg_idx + n_segs;
847 : :
848 [ + + ]: 1515 : for (; seg_idx < max_seg_idx; seg_idx++) {
849 : : struct rte_memseg *ms;
850 : :
851 : 1022 : ms = rte_fbarray_get(&msl->memseg_arr, seg_idx);
852 : 1022 : eal_memalloc_free_seg(ms);
853 : : }
854 : : return 0;
855 : : }
856 : :
857 : : int
858 : 108518 : malloc_heap_free(struct malloc_elem *elem)
859 : : {
860 : : struct malloc_heap *heap;
861 : : void *start, *aligned_start, *end, *aligned_end;
862 : : size_t len, aligned_len, page_sz;
863 : : struct rte_memseg_list *msl;
864 : : unsigned int i, n_segs, before_space, after_space;
865 : : int ret;
866 : : bool unmapped = false;
867 : : const struct internal_config *internal_conf =
868 : 108518 : eal_get_internal_configuration();
869 : :
870 [ + - + - ]: 108518 : if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
871 : : return -1;
872 : :
873 : : asan_clear_redzone(elem);
874 : :
875 : : /* elem may be merged with previous element, so keep heap address */
876 : 108518 : heap = elem->heap;
877 : 108518 : msl = elem->msl;
878 : 108518 : page_sz = (size_t)msl->page_sz;
879 : :
880 : 108518 : rte_spinlock_lock(&(heap->lock));
881 : :
882 : : void *asan_ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN + elem->pad);
883 : : size_t asan_data_len = elem->size - MALLOC_ELEM_OVERHEAD - elem->pad;
884 : :
885 : : /* mark element as free */
886 : 108518 : elem->state = ELEM_FREE;
887 : :
888 : 108518 : elem = malloc_elem_free(elem);
889 : :
890 : : /* anything after this is a bonus */
891 : : ret = 0;
892 : :
893 : : /* ...of which we can't avail if we are in legacy mode, or if this is an
894 : : * externally allocated segment.
895 : : */
896 [ + + + + ]: 108518 : if (internal_conf->legacy_mem || (msl->external > 0))
897 : 5166 : goto free_unlock;
898 : :
899 : : /* check if we can free any memory back to the system */
900 [ + + ]: 103352 : if (elem->size < page_sz)
901 : 102841 : goto free_unlock;
902 : :
903 : : /* if user requested to match allocations, the sizes must match - if not,
904 : : * we will defer freeing these hugepages until the entire original allocation
905 : : * can be freed
906 : : */
907 [ - + - - ]: 511 : if (internal_conf->match_allocations && elem->size != elem->orig_size)
908 : 0 : goto free_unlock;
909 : :
910 : : /* probably, but let's make sure, as we may not be using up full page */
911 : : start = elem;
912 : : len = elem->size;
913 : 511 : aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz);
914 : 511 : end = RTE_PTR_ADD(elem, len);
915 : 511 : aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz);
916 : :
917 : 511 : aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
918 : :
919 : : /* can't free anything */
920 [ + + ]: 511 : if (aligned_len < page_sz)
921 : 7 : goto free_unlock;
922 : :
923 : : /* we can free something. however, some of these pages may be marked as
924 : : * unfreeable, so also check that as well
925 : : */
926 : 504 : n_segs = aligned_len / page_sz;
927 [ + + ]: 1615 : for (i = 0; i < n_segs; i++) {
928 : : const struct rte_memseg *tmp =
929 : 1111 : rte_mem_virt2memseg(aligned_start, msl);
930 : :
931 [ + + ]: 1111 : if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
932 : : /* this is an unfreeable segment, so move start */
933 : 89 : aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len);
934 : : }
935 : : }
936 : :
937 : : /* recalculate length and number of segments */
938 : 504 : aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
939 : 504 : n_segs = aligned_len / page_sz;
940 : :
941 : : /* check if we can still free some pages */
942 [ + + ]: 504 : if (n_segs == 0)
943 : 11 : goto free_unlock;
944 : :
945 : : /* We're not done yet. We also have to check if by freeing space we will
946 : : * be leaving free elements that are too small to store new elements.
947 : : * Check if we have enough space in the beginning and at the end, or if
948 : : * start/end are exactly page aligned.
949 : : */
950 : 493 : before_space = RTE_PTR_DIFF(aligned_start, elem);
951 : 493 : after_space = RTE_PTR_DIFF(end, aligned_end);
952 [ + + - + ]: 493 : if (before_space != 0 &&
953 : : before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
954 : : /* There is not enough space before start, but we may be able to
955 : : * move the start forward by one page.
956 : : */
957 [ # # ]: 0 : if (n_segs == 1)
958 : 0 : goto free_unlock;
959 : :
960 : : /* move start */
961 : 0 : aligned_start = RTE_PTR_ADD(aligned_start, page_sz);
962 : 0 : aligned_len -= page_sz;
963 : 0 : n_segs--;
964 : : }
965 [ + + - + ]: 493 : if (after_space != 0 && after_space <
966 : : MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
967 : : /* There is not enough space after end, but we may be able to
968 : : * move the end backwards by one page.
969 : : */
970 [ # # ]: 0 : if (n_segs == 1)
971 : 0 : goto free_unlock;
972 : :
973 : : /* move end */
974 : : aligned_end = RTE_PTR_SUB(aligned_end, page_sz);
975 : 0 : aligned_len -= page_sz;
976 : : n_segs--;
977 : : }
978 : :
979 : : /* now we can finally free us some pages */
980 : :
981 : 493 : rte_mcfg_mem_write_lock();
982 : :
983 : : /*
984 : : * we allow secondary processes to clear the heap of this allocated
985 : : * memory because it is safe to do so, as even if notifications about
986 : : * unmapped pages don't make it to other processes, heap is shared
987 : : * across all processes, and will become empty of this memory anyway,
988 : : * and nothing can allocate it back unless primary process will be able
989 : : * to deliver allocation message to every single running process.
990 : : */
991 : :
992 : 493 : malloc_elem_free_list_remove(elem);
993 : :
994 : 493 : malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len);
995 : :
996 : 493 : heap->total_size -= aligned_len;
997 : :
998 [ + - ]: 493 : if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
999 : : /* notify user about changes in memory map */
1000 : 493 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
1001 : : aligned_start, aligned_len);
1002 : :
1003 : : /* don't care if any of this fails */
1004 : 493 : malloc_heap_free_pages(aligned_start, aligned_len);
1005 : :
1006 : 493 : request_sync();
1007 : : } else {
1008 : : struct malloc_mp_req req;
1009 : :
1010 : : memset(&req, 0, sizeof(req));
1011 : :
1012 : 0 : req.t = REQ_TYPE_FREE;
1013 : 0 : req.free_req.addr = aligned_start;
1014 : 0 : req.free_req.len = aligned_len;
1015 : :
1016 : : /*
1017 : : * we request primary to deallocate pages, but we don't do it
1018 : : * in this thread. instead, we notify primary that we would like
1019 : : * to deallocate pages, and this process will receive another
1020 : : * request (in parallel) that will do it for us on another
1021 : : * thread.
1022 : : *
1023 : : * we also don't really care if this succeeds - the data is
1024 : : * already removed from the heap, so it is, for all intents and
1025 : : * purposes, hidden from the rest of DPDK even if some other
1026 : : * process (including this one) may have these pages mapped.
1027 : : *
1028 : : * notifications about deallocated memory happen during sync.
1029 : : */
1030 : 0 : request_to_primary(&req);
1031 : : }
1032 : :
1033 : : /* we didn't exit early, meaning we have unmapped some pages */
1034 : : unmapped = true;
1035 : :
1036 : 493 : EAL_LOG(DEBUG, "Heap on socket %d was shrunk by %zdMB",
1037 : : msl->socket_id, aligned_len >> 20ULL);
1038 : :
1039 : 493 : rte_mcfg_mem_write_unlock();
1040 : 108518 : free_unlock:
1041 : : asan_set_freezone(asan_ptr, asan_data_len);
1042 : :
1043 : : /* if we unmapped some memory, we need to do additional work for ASan */
1044 : : if (unmapped) {
1045 : : void *asan_end = RTE_PTR_ADD(asan_ptr, asan_data_len);
1046 : : void *aligned_end = RTE_PTR_ADD(aligned_start, aligned_len);
1047 : : void *aligned_trailer = RTE_PTR_SUB(aligned_start,
1048 : : MALLOC_ELEM_TRAILER_LEN);
1049 : :
1050 : : /*
1051 : : * There was a memory area that was unmapped. This memory area
1052 : : * will have to be marked as available for ASan, because we will
1053 : : * want to use it next time it gets mapped again. The OS memory
1054 : : * protection should trigger a fault on access to these areas
1055 : : * anyway, so we are not giving up any protection.
1056 : : */
1057 : : asan_set_zone(aligned_start, aligned_len, 0x00);
1058 : :
1059 : : /*
1060 : : * ...however, when we unmap pages, we create new free elements
1061 : : * which might have been marked as "freed" with an earlier
1062 : : * `asan_set_freezone` call. So, if there is an area past the
1063 : : * unmapped space that was marked as freezone for ASan, we need
1064 : : * to mark the malloc header as available.
1065 : : */
1066 : : if (asan_end > aligned_end)
1067 : : asan_set_zone(aligned_end, MALLOC_ELEM_HEADER_LEN, 0x00);
1068 : :
1069 : : /* if there's space before unmapped memory, mark as available */
1070 : : if (asan_ptr < aligned_start)
1071 : : asan_set_zone(aligned_trailer, MALLOC_ELEM_TRAILER_LEN, 0x00);
1072 : : }
1073 : :
1074 : : rte_spinlock_unlock(&(heap->lock));
1075 : 108518 : return ret;
1076 : : }
1077 : :
1078 : : int
1079 : 17 : malloc_heap_resize(struct malloc_elem *elem, size_t size)
1080 : : {
1081 : : int ret;
1082 : :
1083 [ + - + - ]: 17 : if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
1084 : : return -1;
1085 : :
1086 : 17 : rte_spinlock_lock(&(elem->heap->lock));
1087 : :
1088 : 17 : ret = malloc_elem_resize(elem, size);
1089 : :
1090 : 17 : rte_spinlock_unlock(&(elem->heap->lock));
1091 : :
1092 : 17 : return ret;
1093 : : }
1094 : :
1095 : : /*
1096 : : * Function to retrieve data for a given heap
1097 : : */
1098 : : int
1099 : 110 : malloc_heap_get_stats(struct malloc_heap *heap,
1100 : : struct rte_malloc_socket_stats *socket_stats)
1101 : : {
1102 : : size_t idx;
1103 : : struct malloc_elem *elem;
1104 : :
1105 : 110 : rte_spinlock_lock(&heap->lock);
1106 : :
1107 : : /* Initialise variables for heap */
1108 : 110 : socket_stats->free_count = 0;
1109 : 110 : socket_stats->heap_freesz_bytes = 0;
1110 : 110 : socket_stats->greatest_free_size = 0;
1111 : :
1112 : : /* Iterate through free list */
1113 [ + + ]: 1540 : for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
1114 : 1430 : for (elem = LIST_FIRST(&heap->free_head[idx]);
1115 [ + + ]: 1444 : !!elem; elem = LIST_NEXT(elem, free_list))
1116 : : {
1117 : 14 : socket_stats->free_count++;
1118 : 14 : socket_stats->heap_freesz_bytes += elem->size;
1119 [ + - ]: 14 : if (elem->size > socket_stats->greatest_free_size)
1120 : 14 : socket_stats->greatest_free_size = elem->size;
1121 : : }
1122 : : }
1123 : : /* Get stats on overall heap and allocated memory on this heap */
1124 : 110 : socket_stats->heap_totalsz_bytes = heap->total_size;
1125 : 110 : socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes -
1126 : 110 : socket_stats->heap_freesz_bytes);
1127 : 110 : socket_stats->alloc_count = heap->alloc_count;
1128 : :
1129 : : rte_spinlock_unlock(&heap->lock);
1130 : 110 : return 0;
1131 : : }
1132 : :
1133 : : /*
1134 : : * Function to retrieve data for a given heap
1135 : : */
1136 : : void
1137 : 0 : malloc_heap_dump(struct malloc_heap *heap, FILE *f)
1138 : : {
1139 : : struct malloc_elem *elem;
1140 : :
1141 : 0 : rte_spinlock_lock(&heap->lock);
1142 : :
1143 : 0 : fprintf(f, "Heap size: 0x%zx\n", heap->total_size);
1144 : 0 : fprintf(f, "Heap alloc count: %u\n", heap->alloc_count);
1145 : :
1146 : 0 : elem = heap->first;
1147 [ # # ]: 0 : while (elem) {
1148 : 0 : malloc_elem_dump(elem, f);
1149 : 0 : elem = elem->next;
1150 : : }
1151 : :
1152 : : rte_spinlock_unlock(&heap->lock);
1153 : 0 : }
1154 : :
1155 : : static int
1156 : 1 : destroy_elem(struct malloc_elem *elem, size_t len)
1157 : : {
1158 : 1 : struct malloc_heap *heap = elem->heap;
1159 : :
1160 : : /* notify all subscribers that a memory area is going to be removed */
1161 : 1 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len);
1162 : :
1163 : : /* this element can be removed */
1164 : 1 : malloc_elem_free_list_remove(elem);
1165 : 1 : malloc_elem_hide_region(elem, elem, len);
1166 : :
1167 : 1 : heap->total_size -= len;
1168 : :
1169 : : memset(elem, 0, sizeof(*elem));
1170 : :
1171 : 1 : return 0;
1172 : : }
1173 : :
1174 : : struct rte_memseg_list *
1175 : 1 : malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[],
1176 : : unsigned int n_pages, size_t page_sz, const char *seg_name,
1177 : : unsigned int socket_id)
1178 : : {
1179 : 1 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1180 : : char fbarray_name[RTE_FBARRAY_NAME_LEN];
1181 : : struct rte_memseg_list *msl = NULL;
1182 : : struct rte_fbarray *arr;
1183 : 1 : size_t seg_len = n_pages * page_sz;
1184 : : unsigned int i;
1185 : :
1186 : : /* first, find a free memseg list */
1187 [ + - ]: 9 : for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
1188 : 9 : struct rte_memseg_list *tmp = &mcfg->memsegs[i];
1189 [ + + ]: 9 : if (tmp->base_va == NULL) {
1190 : : msl = tmp;
1191 : : break;
1192 : : }
1193 : : }
1194 [ - + ]: 1 : if (msl == NULL) {
1195 : 0 : EAL_LOG(ERR, "Couldn't find empty memseg list");
1196 : 0 : rte_errno = ENOSPC;
1197 : 0 : return NULL;
1198 : : }
1199 : :
1200 : : snprintf(fbarray_name, sizeof(fbarray_name), "%s_%p",
1201 : : seg_name, va_addr);
1202 : :
1203 : : /* create the backing fbarray */
1204 [ - + ]: 1 : if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages,
1205 : : sizeof(struct rte_memseg)) < 0) {
1206 : 0 : EAL_LOG(ERR, "Couldn't create fbarray backing the memseg list");
1207 : 0 : return NULL;
1208 : : }
1209 : : arr = &msl->memseg_arr;
1210 : :
1211 : : /* fbarray created, fill it up */
1212 [ + + ]: 3 : for (i = 0; i < n_pages; i++) {
1213 : : struct rte_memseg *ms;
1214 : :
1215 : 2 : rte_fbarray_set_used(arr, i);
1216 : 2 : ms = rte_fbarray_get(arr, i);
1217 : 2 : ms->addr = RTE_PTR_ADD(va_addr, i * page_sz);
1218 [ - + ]: 2 : ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i];
1219 : 2 : ms->hugepage_sz = page_sz;
1220 : 2 : ms->len = page_sz;
1221 : 2 : ms->nchannel = rte_memory_get_nchannel();
1222 : 2 : ms->nrank = rte_memory_get_nrank();
1223 : 2 : ms->socket_id = socket_id;
1224 : : }
1225 : :
1226 : : /* set up the memseg list */
1227 : 1 : msl->base_va = va_addr;
1228 : 1 : msl->page_sz = page_sz;
1229 : 1 : msl->socket_id = socket_id;
1230 : 1 : msl->len = seg_len;
1231 : 1 : msl->version = 0;
1232 : 1 : msl->external = 1;
1233 : :
1234 : 1 : return msl;
1235 : : }
1236 : :
1237 : : struct extseg_walk_arg {
1238 : : void *va_addr;
1239 : : size_t len;
1240 : : struct rte_memseg_list *msl;
1241 : : };
1242 : :
1243 : : static int
1244 : 9 : extseg_walk(const struct rte_memseg_list *msl, void *arg)
1245 : : {
1246 : 9 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1247 : : struct extseg_walk_arg *wa = arg;
1248 : :
1249 [ + + + - ]: 9 : if (msl->base_va == wa->va_addr && msl->len == wa->len) {
1250 : : unsigned int found_idx;
1251 : :
1252 : : /* msl is const */
1253 : 1 : found_idx = msl - mcfg->memsegs;
1254 : 1 : wa->msl = &mcfg->memsegs[found_idx];
1255 : 1 : return 1;
1256 : : }
1257 : : return 0;
1258 : : }
1259 : :
1260 : : struct rte_memseg_list *
1261 : 1 : malloc_heap_find_external_seg(void *va_addr, size_t len)
1262 : : {
1263 : : struct extseg_walk_arg wa;
1264 : : int res;
1265 : :
1266 : 1 : wa.va_addr = va_addr;
1267 : 1 : wa.len = len;
1268 : :
1269 : 1 : res = rte_memseg_list_walk_thread_unsafe(extseg_walk, &wa);
1270 : :
1271 [ - + ]: 1 : if (res != 1) {
1272 : : /* 0 means nothing was found, -1 shouldn't happen */
1273 [ # # ]: 0 : if (res == 0)
1274 : 0 : rte_errno = ENOENT;
1275 : 0 : return NULL;
1276 : : }
1277 : 1 : return wa.msl;
1278 : : }
1279 : :
1280 : : int
1281 : 1 : malloc_heap_destroy_external_seg(struct rte_memseg_list *msl)
1282 : : {
1283 : : /* destroy the fbarray backing this memory */
1284 [ + - ]: 1 : if (rte_fbarray_destroy(&msl->memseg_arr) < 0)
1285 : : return -1;
1286 : :
1287 : : /* reset the memseg list */
1288 : : memset(msl, 0, sizeof(*msl));
1289 : :
1290 : 1 : return 0;
1291 : : }
1292 : :
1293 : : int
1294 : 1 : malloc_heap_add_external_memory(struct malloc_heap *heap,
1295 : : struct rte_memseg_list *msl)
1296 : : {
1297 : : /* erase contents of new memory */
1298 : 1 : memset(msl->base_va, 0, msl->len);
1299 : :
1300 : : /* now, add newly minted memory to the malloc heap */
1301 : 1 : malloc_heap_add_memory(heap, msl, msl->base_va, msl->len, false);
1302 : :
1303 : 1 : heap->total_size += msl->len;
1304 : :
1305 : : /* all done! */
1306 : 1 : EAL_LOG(DEBUG, "Added segment for heap %s starting at %p",
1307 : : heap->name, msl->base_va);
1308 : :
1309 : : /* notify all subscribers that a new memory area has been added */
1310 : 1 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
1311 : 1 : msl->base_va, msl->len);
1312 : :
1313 : 1 : return 0;
1314 : : }
1315 : :
1316 : : int
1317 : 1 : malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
1318 : : size_t len)
1319 : : {
1320 : 1 : struct malloc_elem *elem = heap->first;
1321 : :
1322 : : /* find element with specified va address */
1323 [ - + ]: 1 : while (elem != NULL && elem != va_addr) {
1324 : 0 : elem = elem->next;
1325 : : /* stop if we've blown past our VA */
1326 [ # # ]: 0 : if (elem > (struct malloc_elem *)va_addr) {
1327 : 0 : rte_errno = ENOENT;
1328 : 0 : return -1;
1329 : : }
1330 : : }
1331 : : /* check if element was found */
1332 [ + - - + ]: 1 : if (elem == NULL || elem->msl->len != len) {
1333 : 0 : rte_errno = ENOENT;
1334 : 0 : return -1;
1335 : : }
1336 : : /* if element's size is not equal to segment len, segment is busy */
1337 [ + - - + ]: 1 : if (elem->state == ELEM_BUSY || elem->size != len) {
1338 : 0 : rte_errno = EBUSY;
1339 : 0 : return -1;
1340 : : }
1341 : 1 : return destroy_elem(elem, len);
1342 : : }
1343 : :
1344 : : int
1345 : 1 : malloc_heap_create(struct malloc_heap *heap, const char *heap_name)
1346 : : {
1347 : 1 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1348 : 1 : uint32_t next_socket_id = mcfg->next_socket_id;
1349 : :
1350 : : /* prevent overflow. did you really create 2 billion heaps??? */
1351 [ - + ]: 1 : if (next_socket_id > INT32_MAX) {
1352 : 0 : EAL_LOG(ERR, "Cannot assign new socket ID's");
1353 : 0 : rte_errno = ENOSPC;
1354 : 0 : return -1;
1355 : : }
1356 : :
1357 : : /* initialize empty heap */
1358 : 1 : heap->alloc_count = 0;
1359 : 1 : heap->first = NULL;
1360 : 1 : heap->last = NULL;
1361 : 1 : LIST_INIT(heap->free_head);
1362 : : rte_spinlock_init(&heap->lock);
1363 : 1 : heap->total_size = 0;
1364 : 1 : heap->socket_id = next_socket_id;
1365 : :
1366 : : /* we hold a global mem hotplug writelock, so it's safe to increment */
1367 : 1 : mcfg->next_socket_id++;
1368 : :
1369 : : /* set up name */
1370 : 1 : strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
1371 : 1 : return 0;
1372 : : }
1373 : :
1374 : : int
1375 : 1 : malloc_heap_destroy(struct malloc_heap *heap)
1376 : : {
1377 [ - + ]: 1 : if (heap->alloc_count != 0) {
1378 : 0 : EAL_LOG(ERR, "Heap is still in use");
1379 : 0 : rte_errno = EBUSY;
1380 : 0 : return -1;
1381 : : }
1382 [ + - - + ]: 1 : if (heap->first != NULL || heap->last != NULL) {
1383 : 0 : EAL_LOG(ERR, "Heap still contains memory segments");
1384 : 0 : rte_errno = EBUSY;
1385 : 0 : return -1;
1386 : : }
1387 [ - + ]: 1 : if (heap->total_size != 0)
1388 : 0 : EAL_LOG(ERR, "Total size not zero, heap is likely corrupt");
1389 : :
1390 : : /* Reset all of the heap but the (hold) lock so caller can release it. */
1391 : : RTE_BUILD_BUG_ON(offsetof(struct malloc_heap, lock) != 0);
1392 : 1 : memset(RTE_PTR_ADD(heap, sizeof(heap->lock)), 0,
1393 : : sizeof(*heap) - sizeof(heap->lock));
1394 : :
1395 : 1 : return 0;
1396 : : }
1397 : :
1398 : : int
1399 : 180 : rte_eal_malloc_heap_init(void)
1400 : : {
1401 : 180 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1402 : : unsigned int i;
1403 : : const struct internal_config *internal_conf =
1404 : 180 : eal_get_internal_configuration();
1405 : :
1406 [ - + ]: 180 : if (internal_conf->match_allocations)
1407 : 0 : EAL_LOG(DEBUG, "Hugepages will be freed exactly as allocated.");
1408 : :
1409 [ + + ]: 180 : if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1410 : : /* assign min socket ID to external heaps */
1411 : 155 : mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID;
1412 : :
1413 : : /* assign names to default DPDK heaps */
1414 [ + + ]: 465 : for (i = 0; i < rte_socket_count(); i++) {
1415 : : struct malloc_heap *heap = &mcfg->malloc_heaps[i];
1416 : : char heap_name[RTE_HEAP_NAME_MAX_LEN];
1417 : 310 : int socket_id = rte_socket_id_by_idx(i);
1418 : :
1419 : : snprintf(heap_name, sizeof(heap_name),
1420 : : "socket_%i", socket_id);
1421 : 310 : strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
1422 : 310 : heap->socket_id = socket_id;
1423 : : }
1424 : : }
1425 : :
1426 [ - + ]: 180 : if (register_mp_requests()) {
1427 : 0 : EAL_LOG(ERR, "Couldn't register malloc multiprocess actions");
1428 : 0 : return -1;
1429 : : }
1430 : :
1431 : : return 0;
1432 : : }
1433 : :
1434 : 180 : int rte_eal_malloc_heap_populate(void)
1435 : : {
1436 : : /* mem hotplug is unlocked here. it's safe for primary as no requests can
1437 : : * even come before primary itself is fully initialized, and secondaries
1438 : : * do not need to initialize the heap.
1439 : : */
1440 : :
1441 : : /* secondary process does not need to initialize anything */
1442 [ + + ]: 180 : if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1443 : : return 0;
1444 : :
1445 : : /* add all IOVA-contiguous areas to the heap */
1446 : 155 : return rte_memseg_contig_walk(malloc_add_seg, NULL);
1447 : : }
1448 : :
1449 : : void
1450 : 252 : rte_eal_malloc_heap_cleanup(void)
1451 : : {
1452 : 252 : unregister_mp_requests();
1453 : 252 : }
|