Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause
2 : : * Copyright(c) 2010-2014 Intel Corporation
3 : : */
4 : : #include <stdint.h>
5 : : #include <stddef.h>
6 : : #include <stdlib.h>
7 : : #include <stdio.h>
8 : : #include <errno.h>
9 : : #include <sys/queue.h>
10 : :
11 : : #include <rte_memory.h>
12 : : #include <rte_errno.h>
13 : : #include <rte_eal.h>
14 : : #include <rte_eal_memconfig.h>
15 : : #include <rte_lcore.h>
16 : : #include <rte_common.h>
17 : : #include <rte_string_fns.h>
18 : : #include <rte_spinlock.h>
19 : : #include <rte_memzone.h>
20 : : #include <rte_fbarray.h>
21 : :
22 : : #include "eal_internal_cfg.h"
23 : : #include "eal_memalloc.h"
24 : : #include "eal_memcfg.h"
25 : : #include "eal_private.h"
26 : : #include "malloc_elem.h"
27 : : #include "malloc_heap.h"
28 : : #include "malloc_mp.h"
29 : :
30 : : /* start external socket ID's at a very high number */
31 : : #define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */
32 : : #define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES))
33 : :
34 : : static unsigned
35 : 118180 : check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
36 : : {
37 : : unsigned check_flag = 0;
38 : :
39 [ + + ]: 118180 : if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY))
40 : : return 1;
41 : :
42 [ - + - - : 1412 : switch (hugepage_sz) {
- - - -
+ ]
43 : 0 : case RTE_PGSIZE_256K:
44 : : check_flag = RTE_MEMZONE_256KB;
45 : 0 : break;
46 : 1281 : case RTE_PGSIZE_2M:
47 : : check_flag = RTE_MEMZONE_2MB;
48 : 1281 : break;
49 : 0 : case RTE_PGSIZE_16M:
50 : : check_flag = RTE_MEMZONE_16MB;
51 : 0 : break;
52 : 0 : case RTE_PGSIZE_256M:
53 : : check_flag = RTE_MEMZONE_256MB;
54 : 0 : break;
55 : 0 : case RTE_PGSIZE_512M:
56 : : check_flag = RTE_MEMZONE_512MB;
57 : 0 : break;
58 : 0 : case RTE_PGSIZE_1G:
59 : : check_flag = RTE_MEMZONE_1GB;
60 : 0 : break;
61 : 0 : case RTE_PGSIZE_4G:
62 : : check_flag = RTE_MEMZONE_4GB;
63 : 0 : break;
64 : 0 : case RTE_PGSIZE_16G:
65 : : check_flag = RTE_MEMZONE_16GB;
66 : : }
67 : :
68 : 1412 : return check_flag & flags;
69 : : }
70 : :
71 : : int
72 : 116718 : malloc_socket_to_heap_id(unsigned int socket_id)
73 : : {
74 : 116718 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
75 : : int i;
76 : :
77 [ + + ]: 117762 : for (i = 0; i < RTE_MAX_HEAPS; i++) {
78 : : struct malloc_heap *heap = &mcfg->malloc_heaps[i];
79 : :
80 [ + + ]: 117731 : if (heap->socket_id == socket_id)
81 : 116685 : return i;
82 : : }
83 : : return -1;
84 : : }
85 : :
86 : : /*
87 : : * Expand the heap with a memory area.
88 : : */
89 : : static struct malloc_elem *
90 : 580 : malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl,
91 : : void *start, size_t len, bool dirty)
92 : : {
93 : : struct malloc_elem *elem = start;
94 : :
95 : 580 : malloc_elem_init(elem, heap, msl, len, elem, len, dirty);
96 : :
97 : 580 : malloc_elem_insert(elem);
98 : :
99 : 580 : elem = malloc_elem_join_adjacent_free(elem);
100 : :
101 : 580 : malloc_elem_free_list_insert(elem);
102 : :
103 : 580 : return elem;
104 : : }
105 : :
106 : : static int
107 : 115 : malloc_add_seg(const struct rte_memseg_list *msl,
108 : : const struct rte_memseg *ms, size_t len, void *arg __rte_unused)
109 : : {
110 : 115 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
111 : : struct rte_memseg_list *found_msl;
112 : : struct malloc_heap *heap;
113 : : int msl_idx, heap_idx;
114 : :
115 [ + - ]: 115 : if (msl->external)
116 : : return 0;
117 : :
118 : 115 : heap_idx = malloc_socket_to_heap_id(msl->socket_id);
119 [ - + ]: 115 : if (heap_idx < 0) {
120 : 0 : EAL_LOG(ERR, "Memseg list has invalid socket id");
121 : 0 : return -1;
122 : : }
123 : 115 : heap = &mcfg->malloc_heaps[heap_idx];
124 : :
125 : : /* msl is const, so find it */
126 : 115 : msl_idx = msl - mcfg->memsegs;
127 : :
128 [ + - ]: 115 : if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
129 : : return -1;
130 : :
131 : 115 : found_msl = &mcfg->memsegs[msl_idx];
132 : :
133 : 115 : malloc_heap_add_memory(heap, found_msl, ms->addr, len,
134 : 115 : ms->flags & RTE_MEMSEG_FLAG_DIRTY);
135 : :
136 : 115 : heap->total_size += len;
137 : :
138 : 115 : EAL_LOG(DEBUG, "Added %zuM to heap on socket %i", len >> 20,
139 : : msl->socket_id);
140 : 115 : return 0;
141 : : }
142 : :
143 : : /*
144 : : * Iterates through the freelist for a heap to find a free element
145 : : * which can store data of the required size and with the requested alignment.
146 : : * If size is 0, find the biggest available elem.
147 : : * Returns null on failure, or pointer to element on success.
148 : : */
149 : : static struct malloc_elem *
150 : 118178 : find_suitable_element(struct malloc_heap *heap, size_t size,
151 : : unsigned int flags, size_t align, size_t bound, bool contig)
152 : : {
153 : : size_t idx;
154 : : struct malloc_elem *elem, *alt_elem = NULL;
155 : :
156 : 118178 : for (idx = malloc_elem_free_list_index(size);
157 [ + + ]: 553715 : idx < RTE_HEAP_NUM_FREELISTS; idx++) {
158 : 552304 : for (elem = LIST_FIRST(&heap->free_head[idx]);
159 [ + + ]: 555035 : !!elem; elem = LIST_NEXT(elem, free_list)) {
160 [ + + ]: 119498 : if (malloc_elem_can_hold(elem, size, align, bound,
161 : : contig)) {
162 : 117632 : if (check_hugepage_sz(flags,
163 [ + + ]: 117632 : elem->msl->page_sz))
164 : 116767 : return elem;
165 [ + + ]: 865 : if (alt_elem == NULL)
166 : : alt_elem = elem;
167 : : }
168 : : }
169 : : }
170 : :
171 [ + + ]: 1411 : if (flags & RTE_MEMZONE_SIZE_HINT_ONLY)
172 : 391 : return alt_elem;
173 : :
174 : : return NULL;
175 : : }
176 : :
177 : : /*
178 : : * Iterates through the freelist for a heap to find a free element with the
179 : : * biggest size and requested alignment. Will also set size to whatever element
180 : : * size that was found.
181 : : * Returns null on failure, or pointer to element on success.
182 : : */
183 : : static struct malloc_elem *
184 : 3 : find_biggest_element(struct malloc_heap *heap, size_t *size,
185 : : unsigned int flags, size_t align, bool contig)
186 : : {
187 : : struct malloc_elem *elem, *max_elem = NULL;
188 : : size_t idx, max_size = 0;
189 : :
190 [ + + ]: 42 : for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
191 : 39 : for (elem = LIST_FIRST(&heap->free_head[idx]);
192 [ + + ]: 43 : !!elem; elem = LIST_NEXT(elem, free_list)) {
193 : : size_t cur_size;
194 [ + - ]: 4 : if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 &&
195 : 4 : !check_hugepage_sz(flags,
196 [ - + ]: 4 : elem->msl->page_sz))
197 : 0 : continue;
198 [ - + ]: 4 : if (contig) {
199 : : cur_size =
200 : 0 : malloc_elem_find_max_iova_contig(elem,
201 : : align);
202 : : } else {
203 : 4 : void *data_start = RTE_PTR_ADD(elem,
204 : : MALLOC_ELEM_HEADER_LEN);
205 : 4 : void *data_end = RTE_PTR_ADD(elem, elem->size -
206 : : MALLOC_ELEM_TRAILER_LEN);
207 : 4 : void *aligned = RTE_PTR_ALIGN_CEIL(data_start,
208 : : align);
209 : : /* check if aligned data start is beyond end */
210 [ - + ]: 4 : if (aligned >= data_end)
211 : 0 : continue;
212 : 4 : cur_size = RTE_PTR_DIFF(data_end, aligned);
213 : : }
214 [ + - ]: 4 : if (cur_size > max_size) {
215 : : max_size = cur_size;
216 : : max_elem = elem;
217 : : }
218 : : }
219 : : }
220 : :
221 : 3 : *size = max_size;
222 : 3 : return max_elem;
223 : : }
224 : :
225 : : /*
226 : : * Main function to allocate a block of memory from the heap.
227 : : * It locks the free list, scans it, and adds a new memseg if the
228 : : * scan fails. Once the new memseg is added, it re-scans and should return
229 : : * the new element after releasing the lock.
230 : : */
231 : : static void *
232 : 117141 : heap_alloc(struct malloc_heap *heap, size_t size, unsigned int flags,
233 : : size_t align, size_t bound, bool contig)
234 : : {
235 : : struct malloc_elem *elem;
236 : : size_t user_size = size;
237 : :
238 : 117141 : size = RTE_CACHE_LINE_ROUNDUP(size);
239 : 117141 : align = RTE_CACHE_LINE_ROUNDUP(align);
240 : :
241 : : /* roundup might cause an overflow */
242 [ + - ]: 117141 : if (size == 0)
243 : : return NULL;
244 : 117141 : elem = find_suitable_element(heap, size, flags, align, bound, contig);
245 [ + + ]: 117141 : if (elem != NULL) {
246 : 116561 : elem = malloc_elem_alloc(elem, size, align, bound, contig);
247 : :
248 : : /* increase heap's count of allocated elements */
249 : 116561 : heap->alloc_count++;
250 : :
251 : : asan_set_redzone(elem, user_size);
252 : : }
253 : :
254 [ + + ]: 117141 : return elem == NULL ? NULL : (void *)(&elem[1]);
255 : : }
256 : :
257 : : static void *
258 : 3 : heap_alloc_biggest(struct malloc_heap *heap, unsigned int flags, size_t align, bool contig)
259 : : {
260 : : struct malloc_elem *elem;
261 : : size_t size;
262 : :
263 : 3 : align = RTE_CACHE_LINE_ROUNDUP(align);
264 : :
265 : 3 : elem = find_biggest_element(heap, &size, flags, align, contig);
266 [ + - ]: 3 : if (elem != NULL) {
267 : 3 : elem = malloc_elem_alloc(elem, size, align, 0, contig);
268 : :
269 : : /* increase heap's count of allocated elements */
270 : 3 : heap->alloc_count++;
271 : :
272 : : asan_set_redzone(elem, size);
273 : : }
274 : :
275 [ + - ]: 3 : return elem == NULL ? NULL : (void *)(&elem[1]);
276 : : }
277 : :
278 : : /* this function is exposed in malloc_mp.h */
279 : : void
280 : 0 : rollback_expand_heap(struct rte_memseg **ms, int n_segs,
281 : : struct malloc_elem *elem, void *map_addr, size_t map_len)
282 : : {
283 [ # # ]: 0 : if (elem != NULL) {
284 : 0 : malloc_elem_free_list_remove(elem);
285 : 0 : malloc_elem_hide_region(elem, map_addr, map_len);
286 : : }
287 : :
288 : 0 : eal_memalloc_free_seg_bulk(ms, n_segs);
289 : 0 : }
290 : :
291 : : /* this function is exposed in malloc_mp.h */
292 : : struct malloc_elem *
293 : 463 : alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
294 : : int socket, unsigned int flags, size_t align, size_t bound,
295 : : bool contig, struct rte_memseg **ms, int n_segs)
296 : : {
297 : 463 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
298 : : struct rte_memseg_list *msl;
299 : : struct malloc_elem *elem = NULL;
300 : : size_t alloc_sz;
301 : : int allocd_pages, i;
302 : : bool dirty = false;
303 : : void *ret, *map_addr;
304 : :
305 : 463 : alloc_sz = (size_t)pg_sz * n_segs;
306 : :
307 : : /* first, check if we're allowed to allocate this memory */
308 [ - + ]: 463 : if (eal_memalloc_mem_alloc_validate(socket,
309 : 463 : heap->total_size + alloc_sz) < 0) {
310 : 0 : EAL_LOG(DEBUG, "User has disallowed allocation");
311 : 0 : return NULL;
312 : : }
313 : :
314 : 463 : allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz,
315 : : socket, true);
316 : :
317 : : /* make sure we've allocated our pages... */
318 [ + - ]: 463 : if (allocd_pages < 0)
319 : : return NULL;
320 : :
321 : 463 : map_addr = ms[0]->addr;
322 : 463 : msl = rte_mem_virt2memseg_list(map_addr);
323 : :
324 : : /* check if we wanted contiguous memory but didn't get it */
325 [ - + - - ]: 463 : if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) {
326 : 0 : EAL_LOG(DEBUG, "%s(): couldn't allocate physically contiguous space",
327 : : __func__);
328 : 0 : goto fail;
329 : : }
330 : :
331 : : /*
332 : : * Once we have all the memseg lists configured, if there is a dma mask
333 : : * set, check iova addresses are not out of range. Otherwise the device
334 : : * setting the dma mask could have problems with the mapped memory.
335 : : *
336 : : * There are two situations when this can happen:
337 : : * 1) memory initialization
338 : : * 2) dynamic memory allocation
339 : : *
340 : : * For 1), an error when checking dma mask implies app can not be
341 : : * executed. For 2) implies the new memory can not be added.
342 : : */
343 [ - + - - ]: 463 : if (mcfg->dma_maskbits &&
344 : 0 : rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
345 : : /*
346 : : * Currently this can only happen if IOMMU is enabled
347 : : * and the address width supported by the IOMMU hw is
348 : : * not enough for using the memory mapped IOVAs.
349 : : *
350 : : * If IOVA is VA, advice to try with '--iova-mode pa'
351 : : * which could solve some situations when IOVA VA is not
352 : : * really needed.
353 : : */
354 : 0 : EAL_LOG(ERR,
355 : : "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask",
356 : : __func__);
357 : :
358 : : /*
359 : : * If IOVA is VA and it is possible to run with IOVA PA,
360 : : * because user is root, give and advice for solving the
361 : : * problem.
362 : : */
363 [ # # # # ]: 0 : if ((rte_eal_iova_mode() == RTE_IOVA_VA) &&
364 : 0 : rte_eal_using_phys_addrs())
365 : 0 : EAL_LOG(ERR,
366 : : "%s(): Please try initializing EAL with --iova-mode=pa parameter",
367 : : __func__);
368 : 0 : goto fail;
369 : : }
370 : :
371 : : /* Element is dirty if it contains at least one dirty page. */
372 [ + + ]: 1394 : for (i = 0; i < allocd_pages; i++)
373 : 931 : dirty |= ms[i]->flags & RTE_MEMSEG_FLAG_DIRTY;
374 : :
375 : : /* add newly minted memsegs to malloc heap */
376 : 463 : elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz, dirty);
377 : :
378 : : /* try once more, as now we have allocated new memory */
379 : 463 : ret = find_suitable_element(heap, elt_size, flags, align, bound,
380 : : contig);
381 : :
382 [ - + ]: 463 : if (ret == NULL)
383 : 0 : goto fail;
384 : :
385 : : return elem;
386 : :
387 : 0 : fail:
388 : 0 : rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
389 : 0 : return NULL;
390 : : }
391 : :
392 : : static int
393 : 462 : try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
394 : : size_t elt_size, int socket, unsigned int flags, size_t align,
395 : : size_t bound, bool contig)
396 : : {
397 : : struct malloc_elem *elem;
398 : : struct rte_memseg **ms;
399 : : void *map_addr;
400 : : size_t alloc_sz;
401 : : int n_segs;
402 : : bool callback_triggered = false;
403 : :
404 : 462 : alloc_sz = RTE_ALIGN_CEIL(RTE_ALIGN_CEIL(elt_size, align) +
405 : : MALLOC_ELEM_OVERHEAD, pg_sz);
406 : 462 : n_segs = alloc_sz / pg_sz;
407 : :
408 : : /* we can't know in advance how many pages we'll need, so we malloc */
409 : 462 : ms = malloc(sizeof(*ms) * n_segs);
410 [ + - ]: 462 : if (ms == NULL)
411 : : return -1;
412 : : memset(ms, 0, sizeof(*ms) * n_segs);
413 : :
414 : 462 : elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align,
415 : : bound, contig, ms, n_segs);
416 : :
417 [ - + ]: 462 : if (elem == NULL)
418 : 0 : goto free_ms;
419 : :
420 : 462 : map_addr = ms[0]->addr;
421 : :
422 : : /* notify user about changes in memory map */
423 : 462 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
424 : :
425 : : /* notify other processes that this has happened */
426 [ - + ]: 462 : if (request_sync()) {
427 : : /* we couldn't ensure all processes have mapped memory,
428 : : * so free it back and notify everyone that it's been
429 : : * freed back.
430 : : *
431 : : * technically, we could've avoided adding memory addresses to
432 : : * the map, but that would've led to inconsistent behavior
433 : : * between primary and secondary processes, as those get
434 : : * callbacks during sync. therefore, force primary process to
435 : : * do alloc-and-rollback syncs as well.
436 : : */
437 : : callback_triggered = true;
438 : 0 : goto free_elem;
439 : : }
440 : 462 : heap->total_size += alloc_sz;
441 : :
442 : 462 : EAL_LOG(DEBUG, "Heap on socket %d was expanded by %zdMB",
443 : : socket, alloc_sz >> 20ULL);
444 : :
445 : 462 : free(ms);
446 : :
447 : 462 : return 0;
448 : :
449 : : free_elem:
450 : : if (callback_triggered)
451 : 0 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
452 : : map_addr, alloc_sz);
453 : :
454 : 0 : rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
455 : :
456 : 0 : request_sync();
457 : 0 : free_ms:
458 : 0 : free(ms);
459 : :
460 : 0 : return -1;
461 : : }
462 : :
463 : : static int
464 : 1 : try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz,
465 : : size_t elt_size, int socket, unsigned int flags, size_t align,
466 : : size_t bound, bool contig)
467 : : {
468 : 1 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
469 : : struct malloc_mp_req req;
470 : : int req_result;
471 : :
472 : : memset(&req, 0, sizeof(req));
473 : :
474 : : req.t = REQ_TYPE_ALLOC;
475 : 1 : req.alloc_req.align = align;
476 : 1 : req.alloc_req.bound = bound;
477 : 1 : req.alloc_req.contig = contig;
478 : 1 : req.alloc_req.flags = flags;
479 : 1 : req.alloc_req.elt_size = elt_size;
480 : 1 : req.alloc_req.page_sz = pg_sz;
481 : 1 : req.alloc_req.socket = socket;
482 : 1 : req.alloc_req.malloc_heap_idx = heap - mcfg->malloc_heaps;
483 : :
484 : 1 : req_result = request_to_primary(&req);
485 : :
486 [ + - ]: 1 : if (req_result != 0)
487 : : return -1;
488 : :
489 [ - + ]: 1 : if (req.result != REQ_RESULT_SUCCESS)
490 : 0 : return -1;
491 : :
492 : : return 0;
493 : : }
494 : :
495 : : static int
496 : 463 : try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
497 : : int socket, unsigned int flags, size_t align, size_t bound,
498 : : bool contig)
499 : : {
500 : : int ret;
501 : :
502 : 463 : rte_mcfg_mem_write_lock();
503 : :
504 [ + + ]: 463 : if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
505 : 462 : ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket,
506 : : flags, align, bound, contig);
507 : : } else {
508 : 1 : ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket,
509 : : flags, align, bound, contig);
510 : : }
511 : :
512 : 463 : rte_mcfg_mem_write_unlock();
513 : 463 : return ret;
514 : : }
515 : :
516 : : static int
517 : 2296 : compare_pagesz(const void *a, const void *b)
518 : : {
519 : : const struct rte_memseg_list * const*mpa = a;
520 : : const struct rte_memseg_list * const*mpb = b;
521 : 2296 : const struct rte_memseg_list *msla = *mpa;
522 : 2296 : const struct rte_memseg_list *mslb = *mpb;
523 : 2296 : uint64_t pg_sz_a = msla->page_sz;
524 : 2296 : uint64_t pg_sz_b = mslb->page_sz;
525 : :
526 [ + - ]: 2296 : if (pg_sz_a < pg_sz_b)
527 : : return -1;
528 [ - + ]: 2296 : if (pg_sz_a > pg_sz_b)
529 : 0 : return 1;
530 : : return 0;
531 : : }
532 : :
533 : : static int
534 : 576 : alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
535 : : unsigned int flags, size_t align, size_t bound, bool contig)
536 : : {
537 : 576 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
538 : : struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS];
539 : : struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS];
540 : : uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS];
541 : : uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS];
542 : : uint64_t prev_pg_sz;
543 : : int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz;
544 : 576 : bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0;
545 : 576 : unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
546 : : void *ret;
547 : :
548 : : memset(requested_msls, 0, sizeof(requested_msls));
549 : : memset(other_msls, 0, sizeof(other_msls));
550 : : memset(requested_pg_sz, 0, sizeof(requested_pg_sz));
551 : : memset(other_pg_sz, 0, sizeof(other_pg_sz));
552 : :
553 : : /*
554 : : * go through memseg list and take note of all the page sizes available,
555 : : * and if any of them were specifically requested by the user.
556 : : */
557 : : n_requested_msls = 0;
558 : : n_other_msls = 0;
559 [ + + ]: 74304 : for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
560 : 73728 : struct rte_memseg_list *msl = &mcfg->memsegs[i];
561 : :
562 [ + + ]: 73728 : if (msl->socket_id != socket)
563 : 3024 : continue;
564 : :
565 [ + + ]: 70704 : if (msl->base_va == NULL)
566 : 68400 : continue;
567 : :
568 : : /* if pages of specific size were requested */
569 [ + + - + ]: 2304 : if (size_flags != 0 && check_hugepage_sz(size_flags,
570 : : msl->page_sz))
571 : 0 : requested_msls[n_requested_msls++] = msl;
572 [ + + ]: 2304 : else if (size_flags == 0 || size_hint)
573 : 2296 : other_msls[n_other_msls++] = msl;
574 : : }
575 : :
576 : : /* sort the lists, smallest first */
577 : 576 : qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]),
578 : : compare_pagesz);
579 : 576 : qsort(other_msls, n_other_msls, sizeof(other_msls[0]),
580 : : compare_pagesz);
581 : :
582 : : /* now, extract page sizes we are supposed to try */
583 : : prev_pg_sz = 0;
584 : : n_requested_pg_sz = 0;
585 [ - + ]: 576 : for (i = 0; i < n_requested_msls; i++) {
586 : 0 : uint64_t pg_sz = requested_msls[i]->page_sz;
587 : :
588 [ # # ]: 0 : if (prev_pg_sz != pg_sz) {
589 : 0 : requested_pg_sz[n_requested_pg_sz++] = pg_sz;
590 : : prev_pg_sz = pg_sz;
591 : : }
592 : : }
593 : : prev_pg_sz = 0;
594 : : n_other_pg_sz = 0;
595 [ + + ]: 2872 : for (i = 0; i < n_other_msls; i++) {
596 : 2296 : uint64_t pg_sz = other_msls[i]->page_sz;
597 : :
598 [ + + ]: 2296 : if (prev_pg_sz != pg_sz) {
599 : 574 : other_pg_sz[n_other_pg_sz++] = pg_sz;
600 : : prev_pg_sz = pg_sz;
601 : : }
602 : : }
603 : :
604 : : /* finally, try allocating memory of specified page sizes, starting from
605 : : * the smallest sizes
606 : : */
607 [ - + ]: 576 : for (i = 0; i < n_requested_pg_sz; i++) {
608 : 0 : uint64_t pg_sz = requested_pg_sz[i];
609 : :
610 : : /*
611 : : * do not pass the size hint here, as user expects other page
612 : : * sizes first, before resorting to best effort allocation.
613 : : */
614 [ # # ]: 0 : if (!try_expand_heap(heap, pg_sz, size, socket, size_flags,
615 : : align, bound, contig))
616 : : return 0;
617 : : }
618 [ + + ]: 576 : if (n_other_pg_sz == 0)
619 : : return -1;
620 : :
621 : : /* now, check if we can reserve anything with size hint */
622 : 574 : ret = find_suitable_element(heap, size, flags, align, bound, contig);
623 [ + + ]: 574 : if (ret != NULL)
624 : : return 0;
625 : :
626 : : /*
627 : : * we still couldn't reserve memory, so try expanding heap with other
628 : : * page sizes, if there are any
629 : : */
630 [ + - ]: 463 : for (i = 0; i < n_other_pg_sz; i++) {
631 : 463 : uint64_t pg_sz = other_pg_sz[i];
632 : :
633 [ - + ]: 463 : if (!try_expand_heap(heap, pg_sz, size, socket, flags,
634 : : align, bound, contig))
635 : : return 0;
636 : : }
637 : : return -1;
638 : : }
639 : :
640 : : /* this will try lower page sizes first */
641 : : static void *
642 : 116554 : malloc_heap_alloc_on_heap_id(size_t size, unsigned int heap_id, unsigned int flags, size_t align,
643 : : size_t bound, bool contig)
644 : : {
645 : 116554 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
646 : 116554 : struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
647 : 116554 : unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
648 : : int socket_id;
649 : : void *ret;
650 : : const struct internal_config *internal_conf =
651 : 116554 : eal_get_internal_configuration();
652 : :
653 : 116554 : rte_spinlock_lock(&(heap->lock));
654 : :
655 : : align = align == 0 ? 1 : align;
656 : :
657 : : /* for legacy mode, try once and with all flags */
658 [ + + ]: 116567 : if (internal_conf->legacy_mem) {
659 : 13711 : ret = heap_alloc(heap, size, flags, align, bound, contig);
660 : 13711 : goto alloc_unlock;
661 : : }
662 : :
663 : : /*
664 : : * we do not pass the size hint here, because even if allocation fails,
665 : : * we may still be able to allocate memory from appropriate page sizes,
666 : : * we just need to request more memory first.
667 : : */
668 : :
669 : 102856 : socket_id = rte_socket_id_by_idx(heap_id);
670 : : /*
671 : : * if socket ID is negative, we cannot find a socket ID for this heap -
672 : : * which means it's an external heap. those can have unexpected page
673 : : * sizes, so if the user asked to allocate from there - assume user
674 : : * knows what they're doing, and allow allocating from there with any
675 : : * page size flags.
676 : : */
677 [ + + ]: 102856 : if (socket_id < 0)
678 : 14 : size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
679 : :
680 : 102856 : ret = heap_alloc(heap, size, size_flags, align, bound, contig);
681 [ + + ]: 102856 : if (ret != NULL)
682 : 102280 : goto alloc_unlock;
683 : :
684 : : /* if socket ID is invalid, this is an external heap */
685 [ - + ]: 576 : if (socket_id < 0)
686 : 0 : goto alloc_unlock;
687 : :
688 [ + + ]: 576 : if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
689 : : bound, contig)) {
690 : 574 : ret = heap_alloc(heap, size, flags, align, bound, contig);
691 : :
692 : : /* this should have succeeded */
693 [ + - ]: 574 : if (ret == NULL)
694 : 0 : EAL_LOG(ERR, "Error allocating from heap");
695 : : }
696 : 116567 : alloc_unlock:
697 : : rte_spinlock_unlock(&(heap->lock));
698 : 116567 : return ret;
699 : : }
700 : :
701 : : static unsigned int
702 : 108708 : malloc_get_numa_socket(void)
703 : : {
704 : 108708 : const struct internal_config *conf = eal_get_internal_configuration();
705 : 108704 : unsigned int socket_id = rte_socket_id();
706 : : unsigned int idx;
707 : :
708 [ + + ]: 108704 : if (socket_id != (unsigned int)SOCKET_ID_ANY)
709 : : return socket_id;
710 : :
711 : : /* for control threads, return first socket where memory is available */
712 [ + + ]: 18 : for (idx = 0; idx < rte_socket_count(); idx++) {
713 : 12 : int ret = rte_socket_id_by_idx(idx);
714 : 12 : socket_id = (unsigned int)ret;
715 [ + - - + ]: 12 : if (ret != -1 && conf->numa_mem[socket_id] != 0)
716 : 0 : return socket_id;
717 : : }
718 : : /* We couldn't quickly find a NUMA node where memory was available,
719 : : * so fall back to using main lcore socket ID.
720 : : */
721 : 6 : socket_id = rte_lcore_to_socket_id(rte_get_main_lcore());
722 : : /* Main lcore socket ID may be SOCKET_ID_ANY
723 : : * when main lcore thread is affinitized to multiple NUMA nodes.
724 : : */
725 [ - + ]: 6 : if (socket_id != (unsigned int)SOCKET_ID_ANY)
726 : : return socket_id;
727 : : /* Failed to find meaningful socket ID, so use the first one available. */
728 : 0 : return rte_socket_id_by_idx(0);
729 : : }
730 : :
731 : : void *
732 : 116589 : malloc_heap_alloc(size_t size, int socket_arg, unsigned int flags,
733 : : size_t align, size_t bound, bool contig)
734 : : {
735 : : int socket, heap_id, i;
736 : : void *ret;
737 : :
738 : : /* return NULL if size is 0 or alignment is not power-of-2 */
739 [ + - + - : 116589 : if (size == 0 || (align && !rte_is_power_of_2(align)))
+ - ]
740 : : return NULL;
741 : :
742 [ + + + + ]: 116589 : if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES)
743 : : socket_arg = SOCKET_ID_ANY;
744 : :
745 [ + + ]: 102884 : if (socket_arg == SOCKET_ID_ANY)
746 : 108707 : socket = malloc_get_numa_socket();
747 : : else
748 : : socket = socket_arg;
749 : :
750 : : /* turn socket ID into heap ID */
751 : 116585 : heap_id = malloc_socket_to_heap_id(socket);
752 : : /* if heap id is negative, socket ID was invalid */
753 [ + + ]: 116585 : if (heap_id < 0)
754 : : return NULL;
755 : :
756 : 116554 : ret = malloc_heap_alloc_on_heap_id(size, heap_id, flags, align, bound, contig);
757 [ + + ]: 116564 : if (ret != NULL || socket_arg != SOCKET_ID_ANY)
758 : : return ret;
759 : :
760 : : /* try other heaps. we are only iterating through native DPDK sockets,
761 : : * so external heaps won't be included.
762 : : */
763 [ + + ]: 9 : for (i = 0; i < (int) rte_socket_count(); i++) {
764 [ + + ]: 6 : if (i == heap_id)
765 : 3 : continue;
766 : 3 : ret = malloc_heap_alloc_on_heap_id(size, i, flags, align, bound, contig);
767 [ - + ]: 3 : if (ret != NULL)
768 : 0 : return ret;
769 : : }
770 : : return NULL;
771 : : }
772 : :
773 : : static void *
774 : 3 : heap_alloc_biggest_on_heap_id(unsigned int heap_id,
775 : : unsigned int flags, size_t align, bool contig)
776 : : {
777 : 3 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
778 : 3 : struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
779 : : void *ret;
780 : :
781 : 3 : rte_spinlock_lock(&(heap->lock));
782 : :
783 : : align = align == 0 ? 1 : align;
784 : :
785 : 3 : ret = heap_alloc_biggest(heap, flags, align, contig);
786 : :
787 : : rte_spinlock_unlock(&(heap->lock));
788 : :
789 : 3 : return ret;
790 : : }
791 : :
792 : : void *
793 : 3 : malloc_heap_alloc_biggest(int socket_arg, unsigned int flags, size_t align, bool contig)
794 : : {
795 : : int socket, i, cur_socket, heap_id;
796 : : void *ret;
797 : :
798 : : /* return NULL if align is not power-of-2 */
799 [ + - + - ]: 3 : if ((align && !rte_is_power_of_2(align)))
800 : : return NULL;
801 : :
802 [ + - ]: 3 : if (!rte_eal_has_hugepages())
803 : : socket_arg = SOCKET_ID_ANY;
804 : :
805 [ + + ]: 3 : if (socket_arg == SOCKET_ID_ANY)
806 : 1 : socket = malloc_get_numa_socket();
807 : : else
808 : : socket = socket_arg;
809 : :
810 : : /* turn socket ID into heap ID */
811 : 3 : heap_id = malloc_socket_to_heap_id(socket);
812 : : /* if heap id is negative, socket ID was invalid */
813 [ + - ]: 3 : if (heap_id < 0)
814 : : return NULL;
815 : :
816 : 3 : ret = heap_alloc_biggest_on_heap_id(heap_id, flags, align, contig);
817 [ - + ]: 3 : if (ret != NULL || socket_arg != SOCKET_ID_ANY)
818 : : return ret;
819 : :
820 : : /* try other heaps */
821 [ # # ]: 0 : for (i = 0; i < (int) rte_socket_count(); i++) {
822 : 0 : cur_socket = rte_socket_id_by_idx(i);
823 [ # # ]: 0 : if (cur_socket == socket)
824 : 0 : continue;
825 : 0 : ret = heap_alloc_biggest_on_heap_id(i, flags, align, contig);
826 [ # # ]: 0 : if (ret != NULL)
827 : 0 : return ret;
828 : : }
829 : : return NULL;
830 : : }
831 : :
832 : : /* this function is exposed in malloc_mp.h */
833 : : int
834 : 470 : malloc_heap_free_pages(void *aligned_start, size_t aligned_len)
835 : : {
836 : : int n_segs, seg_idx, max_seg_idx;
837 : : struct rte_memseg_list *msl;
838 : : size_t page_sz;
839 : :
840 : 470 : msl = rte_mem_virt2memseg_list(aligned_start);
841 [ + - ]: 470 : if (msl == NULL)
842 : : return -1;
843 : :
844 : 470 : page_sz = (size_t)msl->page_sz;
845 : 470 : n_segs = aligned_len / page_sz;
846 : 470 : seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz;
847 : 470 : max_seg_idx = seg_idx + n_segs;
848 : :
849 [ + + ]: 1472 : for (; seg_idx < max_seg_idx; seg_idx++) {
850 : : struct rte_memseg *ms;
851 : :
852 : 1002 : ms = rte_fbarray_get(&msl->memseg_arr, seg_idx);
853 : 1002 : eal_memalloc_free_seg(ms);
854 : : }
855 : : return 0;
856 : : }
857 : :
858 : : int
859 : 108048 : malloc_heap_free(struct malloc_elem *elem)
860 : : {
861 : : struct malloc_heap *heap;
862 : : void *start, *aligned_start, *end, *aligned_end;
863 : : size_t len, aligned_len, page_sz;
864 : : struct rte_memseg_list *msl;
865 : : unsigned int i, n_segs, before_space, after_space;
866 : : int ret;
867 : : bool unmapped = false;
868 : : const struct internal_config *internal_conf =
869 : 108048 : eal_get_internal_configuration();
870 : :
871 [ + - + - ]: 108048 : if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
872 : : return -1;
873 : :
874 : : asan_clear_redzone(elem);
875 : :
876 : : /* elem may be merged with previous element, so keep heap address */
877 : 108048 : heap = elem->heap;
878 : 108048 : msl = elem->msl;
879 : 108048 : page_sz = (size_t)msl->page_sz;
880 : :
881 : 108048 : rte_spinlock_lock(&(heap->lock));
882 : :
883 : : void *asan_ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN + elem->pad);
884 : : size_t asan_data_len = elem->size - MALLOC_ELEM_OVERHEAD - elem->pad;
885 : :
886 : : /* mark element as free */
887 : 108049 : elem->state = ELEM_FREE;
888 : :
889 : 108049 : elem = malloc_elem_free(elem);
890 : :
891 : : /* anything after this is a bonus */
892 : : ret = 0;
893 : :
894 : : /* ...of which we can't avail if we are in legacy mode, or if this is an
895 : : * externally allocated segment.
896 : : */
897 [ + + + + ]: 108049 : if (internal_conf->legacy_mem || (msl->external > 0))
898 : 5377 : goto free_unlock;
899 : :
900 : : /* check if we can free any memory back to the system */
901 [ + + ]: 102672 : if (elem->size < page_sz)
902 : 102184 : goto free_unlock;
903 : :
904 : : /* if user requested to match allocations, the sizes must match - if not,
905 : : * we will defer freeing these hugepages until the entire original allocation
906 : : * can be freed
907 : : */
908 [ - + - - ]: 488 : if (internal_conf->match_allocations && elem->size != elem->orig_size)
909 : 0 : goto free_unlock;
910 : :
911 : : /* probably, but let's make sure, as we may not be using up full page */
912 : : start = elem;
913 : : len = elem->size;
914 : 488 : aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz);
915 : 488 : end = RTE_PTR_ADD(elem, len);
916 : 488 : aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz);
917 : :
918 : 488 : aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
919 : :
920 : : /* can't free anything */
921 [ + + ]: 488 : if (aligned_len < page_sz)
922 : 7 : goto free_unlock;
923 : :
924 : : /* we can free something. however, some of these pages may be marked as
925 : : * unfreeable, so also check that as well
926 : : */
927 : 481 : n_segs = aligned_len / page_sz;
928 [ + + ]: 1572 : for (i = 0; i < n_segs; i++) {
929 : : const struct rte_memseg *tmp =
930 : 1091 : rte_mem_virt2memseg(aligned_start, msl);
931 : :
932 [ + + ]: 1091 : if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
933 : : /* this is an unfreeable segment, so move start */
934 : 89 : aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len);
935 : : }
936 : : }
937 : :
938 : : /* recalculate length and number of segments */
939 : 481 : aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
940 : 481 : n_segs = aligned_len / page_sz;
941 : :
942 : : /* check if we can still free some pages */
943 [ + + ]: 481 : if (n_segs == 0)
944 : 11 : goto free_unlock;
945 : :
946 : : /* We're not done yet. We also have to check if by freeing space we will
947 : : * be leaving free elements that are too small to store new elements.
948 : : * Check if we have enough space in the beginning and at the end, or if
949 : : * start/end are exactly page aligned.
950 : : */
951 : 470 : before_space = RTE_PTR_DIFF(aligned_start, elem);
952 : 470 : after_space = RTE_PTR_DIFF(end, aligned_end);
953 [ + + - + ]: 470 : if (before_space != 0 &&
954 : : before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
955 : : /* There is not enough space before start, but we may be able to
956 : : * move the start forward by one page.
957 : : */
958 [ # # ]: 0 : if (n_segs == 1)
959 : 0 : goto free_unlock;
960 : :
961 : : /* move start */
962 : 0 : aligned_start = RTE_PTR_ADD(aligned_start, page_sz);
963 : 0 : aligned_len -= page_sz;
964 : 0 : n_segs--;
965 : : }
966 [ + + - + ]: 470 : if (after_space != 0 && after_space <
967 : : MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
968 : : /* There is not enough space after end, but we may be able to
969 : : * move the end backwards by one page.
970 : : */
971 [ # # ]: 0 : if (n_segs == 1)
972 : 0 : goto free_unlock;
973 : :
974 : : /* move end */
975 : : aligned_end = RTE_PTR_SUB(aligned_end, page_sz);
976 : 0 : aligned_len -= page_sz;
977 : : n_segs--;
978 : : }
979 : :
980 : : /* now we can finally free us some pages */
981 : :
982 : 470 : rte_mcfg_mem_write_lock();
983 : :
984 : : /*
985 : : * we allow secondary processes to clear the heap of this allocated
986 : : * memory because it is safe to do so, as even if notifications about
987 : : * unmapped pages don't make it to other processes, heap is shared
988 : : * across all processes, and will become empty of this memory anyway,
989 : : * and nothing can allocate it back unless primary process will be able
990 : : * to deliver allocation message to every single running process.
991 : : */
992 : :
993 : 470 : malloc_elem_free_list_remove(elem);
994 : :
995 : 470 : malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len);
996 : :
997 : 470 : heap->total_size -= aligned_len;
998 : :
999 [ + - ]: 470 : if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1000 : : /* notify user about changes in memory map */
1001 : 470 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
1002 : : aligned_start, aligned_len);
1003 : :
1004 : : /* don't care if any of this fails */
1005 : 470 : malloc_heap_free_pages(aligned_start, aligned_len);
1006 : :
1007 : 470 : request_sync();
1008 : : } else {
1009 : : struct malloc_mp_req req;
1010 : :
1011 : : memset(&req, 0, sizeof(req));
1012 : :
1013 : 0 : req.t = REQ_TYPE_FREE;
1014 : 0 : req.free_req.addr = aligned_start;
1015 : 0 : req.free_req.len = aligned_len;
1016 : :
1017 : : /*
1018 : : * we request primary to deallocate pages, but we don't do it
1019 : : * in this thread. instead, we notify primary that we would like
1020 : : * to deallocate pages, and this process will receive another
1021 : : * request (in parallel) that will do it for us on another
1022 : : * thread.
1023 : : *
1024 : : * we also don't really care if this succeeds - the data is
1025 : : * already removed from the heap, so it is, for all intents and
1026 : : * purposes, hidden from the rest of DPDK even if some other
1027 : : * process (including this one) may have these pages mapped.
1028 : : *
1029 : : * notifications about deallocated memory happen during sync.
1030 : : */
1031 : 0 : request_to_primary(&req);
1032 : : }
1033 : :
1034 : : /* we didn't exit early, meaning we have unmapped some pages */
1035 : : unmapped = true;
1036 : :
1037 : 470 : EAL_LOG(DEBUG, "Heap on socket %d was shrunk by %zdMB",
1038 : : msl->socket_id, aligned_len >> 20ULL);
1039 : :
1040 : 470 : rte_mcfg_mem_write_unlock();
1041 : 108049 : free_unlock:
1042 : : asan_set_freezone(asan_ptr, asan_data_len);
1043 : :
1044 : : /* if we unmapped some memory, we need to do additional work for ASan */
1045 : : if (unmapped) {
1046 : : void *asan_end = RTE_PTR_ADD(asan_ptr, asan_data_len);
1047 : : void *aligned_end = RTE_PTR_ADD(aligned_start, aligned_len);
1048 : : void *aligned_trailer = RTE_PTR_SUB(aligned_start,
1049 : : MALLOC_ELEM_TRAILER_LEN);
1050 : :
1051 : : /*
1052 : : * There was a memory area that was unmapped. This memory area
1053 : : * will have to be marked as available for ASan, because we will
1054 : : * want to use it next time it gets mapped again. The OS memory
1055 : : * protection should trigger a fault on access to these areas
1056 : : * anyway, so we are not giving up any protection.
1057 : : */
1058 : : asan_set_zone(aligned_start, aligned_len, 0x00);
1059 : :
1060 : : /*
1061 : : * ...however, when we unmap pages, we create new free elements
1062 : : * which might have been marked as "freed" with an earlier
1063 : : * `asan_set_freezone` call. So, if there is an area past the
1064 : : * unmapped space that was marked as freezone for ASan, we need
1065 : : * to mark the malloc header as available.
1066 : : */
1067 : : if (asan_end > aligned_end)
1068 : : asan_set_zone(aligned_end, MALLOC_ELEM_HEADER_LEN, 0x00);
1069 : :
1070 : : /* if there's space before unmapped memory, mark as available */
1071 : : if (asan_ptr < aligned_start)
1072 : : asan_set_zone(aligned_trailer, MALLOC_ELEM_TRAILER_LEN, 0x00);
1073 : : }
1074 : :
1075 : : rte_spinlock_unlock(&(heap->lock));
1076 : 108049 : return ret;
1077 : : }
1078 : :
1079 : : int
1080 : 20 : malloc_heap_resize(struct malloc_elem *elem, size_t size)
1081 : : {
1082 : : int ret;
1083 : :
1084 [ + - + - ]: 20 : if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
1085 : : return -1;
1086 : :
1087 : 20 : rte_spinlock_lock(&(elem->heap->lock));
1088 : :
1089 : 20 : ret = malloc_elem_resize(elem, size);
1090 : :
1091 : 20 : rte_spinlock_unlock(&(elem->heap->lock));
1092 : :
1093 : 20 : return ret;
1094 : : }
1095 : :
1096 : : /*
1097 : : * Function to retrieve data for a given heap
1098 : : */
1099 : : int
1100 : 110 : malloc_heap_get_stats(struct malloc_heap *heap,
1101 : : struct rte_malloc_socket_stats *socket_stats)
1102 : : {
1103 : : size_t idx;
1104 : : struct malloc_elem *elem;
1105 : :
1106 : 110 : rte_spinlock_lock(&heap->lock);
1107 : :
1108 : : /* Initialise variables for heap */
1109 : 110 : socket_stats->free_count = 0;
1110 : 110 : socket_stats->heap_freesz_bytes = 0;
1111 : 110 : socket_stats->greatest_free_size = 0;
1112 : :
1113 : : /* Iterate through free list */
1114 [ + + ]: 1540 : for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
1115 : 1430 : for (elem = LIST_FIRST(&heap->free_head[idx]);
1116 [ + + ]: 1445 : !!elem; elem = LIST_NEXT(elem, free_list))
1117 : : {
1118 : 15 : socket_stats->free_count++;
1119 : 15 : socket_stats->heap_freesz_bytes += elem->size;
1120 [ + - ]: 15 : if (elem->size > socket_stats->greatest_free_size)
1121 : 15 : socket_stats->greatest_free_size = elem->size;
1122 : : }
1123 : : }
1124 : : /* Get stats on overall heap and allocated memory on this heap */
1125 : 110 : socket_stats->heap_totalsz_bytes = heap->total_size;
1126 : 110 : socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes -
1127 : 110 : socket_stats->heap_freesz_bytes);
1128 : 110 : socket_stats->alloc_count = heap->alloc_count;
1129 : :
1130 : : rte_spinlock_unlock(&heap->lock);
1131 : 110 : return 0;
1132 : : }
1133 : :
1134 : : /*
1135 : : * Function to retrieve data for a given heap
1136 : : */
1137 : : void
1138 : 0 : malloc_heap_dump(struct malloc_heap *heap, FILE *f)
1139 : : {
1140 : : struct malloc_elem *elem;
1141 : :
1142 : 0 : rte_spinlock_lock(&heap->lock);
1143 : :
1144 : 0 : fprintf(f, "Heap size: 0x%zx\n", heap->total_size);
1145 : 0 : fprintf(f, "Heap alloc count: %u\n", heap->alloc_count);
1146 : :
1147 : 0 : elem = heap->first;
1148 [ # # ]: 0 : while (elem) {
1149 : 0 : malloc_elem_dump(elem, f);
1150 : 0 : elem = elem->next;
1151 : : }
1152 : :
1153 : : rte_spinlock_unlock(&heap->lock);
1154 : 0 : }
1155 : :
1156 : : static int
1157 : 2 : destroy_elem(struct malloc_elem *elem, size_t len)
1158 : : {
1159 : 2 : struct malloc_heap *heap = elem->heap;
1160 : :
1161 : : /* notify all subscribers that a memory area is going to be removed */
1162 : 2 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len);
1163 : :
1164 : : /* this element can be removed */
1165 : 2 : malloc_elem_free_list_remove(elem);
1166 : 2 : malloc_elem_hide_region(elem, elem, len);
1167 : :
1168 : 2 : heap->total_size -= len;
1169 : :
1170 : : memset(elem, 0, sizeof(*elem));
1171 : :
1172 : 2 : return 0;
1173 : : }
1174 : :
1175 : : struct rte_memseg_list *
1176 : 2 : malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[],
1177 : : unsigned int n_pages, size_t page_sz, const char *seg_name,
1178 : : unsigned int socket_id)
1179 : : {
1180 : 2 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1181 : : char fbarray_name[RTE_FBARRAY_NAME_LEN];
1182 : : struct rte_memseg_list *msl = NULL;
1183 : : struct rte_fbarray *arr;
1184 : 2 : size_t seg_len = n_pages * page_sz;
1185 : : unsigned int i;
1186 : :
1187 : : /* first, find a free memseg list */
1188 [ + - ]: 18 : for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
1189 : 18 : struct rte_memseg_list *tmp = &mcfg->memsegs[i];
1190 [ + + ]: 18 : if (tmp->base_va == NULL) {
1191 : : msl = tmp;
1192 : : break;
1193 : : }
1194 : : }
1195 [ - + ]: 2 : if (msl == NULL) {
1196 : 0 : EAL_LOG(ERR, "Couldn't find empty memseg list");
1197 : 0 : rte_errno = ENOSPC;
1198 : 0 : return NULL;
1199 : : }
1200 : :
1201 : : snprintf(fbarray_name, sizeof(fbarray_name), "%s_%p",
1202 : : seg_name, va_addr);
1203 : :
1204 : : /* create the backing fbarray */
1205 [ - + ]: 2 : if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages,
1206 : : sizeof(struct rte_memseg)) < 0) {
1207 : 0 : EAL_LOG(ERR, "Couldn't create fbarray backing the memseg list");
1208 : 0 : return NULL;
1209 : : }
1210 : : arr = &msl->memseg_arr;
1211 : :
1212 : : /* fbarray created, fill it up */
1213 [ + + ]: 1028 : for (i = 0; i < n_pages; i++) {
1214 : : struct rte_memseg *ms;
1215 : :
1216 : 1026 : rte_fbarray_set_used(arr, i);
1217 : 1026 : ms = rte_fbarray_get(arr, i);
1218 : 1026 : ms->addr = RTE_PTR_ADD(va_addr, i * page_sz);
1219 [ - + ]: 1026 : ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i];
1220 : 1026 : ms->hugepage_sz = page_sz;
1221 : 1026 : ms->len = page_sz;
1222 : 1026 : ms->nchannel = rte_memory_get_nchannel();
1223 : 1026 : ms->nrank = rte_memory_get_nrank();
1224 : 1026 : ms->socket_id = socket_id;
1225 : : }
1226 : :
1227 : : /* set up the memseg list */
1228 : 2 : msl->base_va = va_addr;
1229 : 2 : msl->page_sz = page_sz;
1230 : 2 : msl->socket_id = socket_id;
1231 : 2 : msl->len = seg_len;
1232 : 2 : msl->version = 0;
1233 : 2 : msl->external = 1;
1234 : :
1235 : 2 : return msl;
1236 : : }
1237 : :
1238 : : struct extseg_walk_arg {
1239 : : void *va_addr;
1240 : : size_t len;
1241 : : struct rte_memseg_list *msl;
1242 : : };
1243 : :
1244 : : static int
1245 : 18 : extseg_walk(const struct rte_memseg_list *msl, void *arg)
1246 : : {
1247 : 18 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1248 : : struct extseg_walk_arg *wa = arg;
1249 : :
1250 [ + + + - ]: 18 : if (msl->base_va == wa->va_addr && msl->len == wa->len) {
1251 : : unsigned int found_idx;
1252 : :
1253 : : /* msl is const */
1254 : 2 : found_idx = msl - mcfg->memsegs;
1255 : 2 : wa->msl = &mcfg->memsegs[found_idx];
1256 : 2 : return 1;
1257 : : }
1258 : : return 0;
1259 : : }
1260 : :
1261 : : struct rte_memseg_list *
1262 : 2 : malloc_heap_find_external_seg(void *va_addr, size_t len)
1263 : : {
1264 : : struct extseg_walk_arg wa;
1265 : : int res;
1266 : :
1267 : 2 : wa.va_addr = va_addr;
1268 : 2 : wa.len = len;
1269 : :
1270 : 2 : res = rte_memseg_list_walk_thread_unsafe(extseg_walk, &wa);
1271 : :
1272 [ - + ]: 2 : if (res != 1) {
1273 : : /* 0 means nothing was found, -1 shouldn't happen */
1274 [ # # ]: 0 : if (res == 0)
1275 : 0 : rte_errno = ENOENT;
1276 : 0 : return NULL;
1277 : : }
1278 : 2 : return wa.msl;
1279 : : }
1280 : :
1281 : : int
1282 : 2 : malloc_heap_destroy_external_seg(struct rte_memseg_list *msl)
1283 : : {
1284 : : /* destroy the fbarray backing this memory */
1285 [ + - ]: 2 : if (rte_fbarray_destroy(&msl->memseg_arr) < 0)
1286 : : return -1;
1287 : :
1288 : : /* reset the memseg list */
1289 : : memset(msl, 0, sizeof(*msl));
1290 : :
1291 : 2 : return 0;
1292 : : }
1293 : :
1294 : : int
1295 : 2 : malloc_heap_add_external_memory(struct malloc_heap *heap,
1296 : : struct rte_memseg_list *msl)
1297 : : {
1298 : : /* erase contents of new memory */
1299 : 2 : memset(msl->base_va, 0, msl->len);
1300 : :
1301 : : /* now, add newly minted memory to the malloc heap */
1302 : 2 : malloc_heap_add_memory(heap, msl, msl->base_va, msl->len, false);
1303 : :
1304 : 2 : heap->total_size += msl->len;
1305 : :
1306 : : /* all done! */
1307 : 2 : EAL_LOG(DEBUG, "Added segment for heap %s starting at %p",
1308 : : heap->name, msl->base_va);
1309 : :
1310 : : /* notify all subscribers that a new memory area has been added */
1311 : 2 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
1312 : 2 : msl->base_va, msl->len);
1313 : :
1314 : 2 : return 0;
1315 : : }
1316 : :
1317 : : int
1318 : 2 : malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
1319 : : size_t len)
1320 : : {
1321 : 2 : struct malloc_elem *elem = heap->first;
1322 : :
1323 : : /* find element with specified va address */
1324 [ - + ]: 2 : while (elem != NULL && elem != va_addr) {
1325 : 0 : elem = elem->next;
1326 : : /* stop if we've blown past our VA */
1327 [ # # ]: 0 : if (elem > (struct malloc_elem *)va_addr) {
1328 : 0 : rte_errno = ENOENT;
1329 : 0 : return -1;
1330 : : }
1331 : : }
1332 : : /* check if element was found */
1333 [ + - - + ]: 2 : if (elem == NULL || elem->msl->len != len) {
1334 : 0 : rte_errno = ENOENT;
1335 : 0 : return -1;
1336 : : }
1337 : : /* if element's size is not equal to segment len, segment is busy */
1338 [ + - - + ]: 2 : if (elem->state == ELEM_BUSY || elem->size != len) {
1339 : 0 : rte_errno = EBUSY;
1340 : 0 : return -1;
1341 : : }
1342 : 2 : return destroy_elem(elem, len);
1343 : : }
1344 : :
1345 : : int
1346 : 2 : malloc_heap_create(struct malloc_heap *heap, const char *heap_name)
1347 : : {
1348 : 2 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1349 : 2 : uint32_t next_socket_id = mcfg->next_socket_id;
1350 : :
1351 : : /* prevent overflow. did you really create 2 billion heaps??? */
1352 [ - + ]: 2 : if (next_socket_id > INT32_MAX) {
1353 : 0 : EAL_LOG(ERR, "Cannot assign new socket ID's");
1354 : 0 : rte_errno = ENOSPC;
1355 : 0 : return -1;
1356 : : }
1357 : :
1358 : : /* initialize empty heap */
1359 : 2 : heap->alloc_count = 0;
1360 : 2 : heap->first = NULL;
1361 : 2 : heap->last = NULL;
1362 : 2 : LIST_INIT(heap->free_head);
1363 : : rte_spinlock_init(&heap->lock);
1364 : 2 : heap->total_size = 0;
1365 : 2 : heap->socket_id = next_socket_id;
1366 : :
1367 : : /* we hold a global mem hotplug writelock, so it's safe to increment */
1368 : 2 : mcfg->next_socket_id++;
1369 : :
1370 : : /* set up name */
1371 : 2 : strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
1372 : 2 : return 0;
1373 : : }
1374 : :
1375 : : int
1376 : 2 : malloc_heap_destroy(struct malloc_heap *heap)
1377 : : {
1378 [ - + ]: 2 : if (heap->alloc_count != 0) {
1379 : 0 : EAL_LOG(ERR, "Heap is still in use");
1380 : 0 : rte_errno = EBUSY;
1381 : 0 : return -1;
1382 : : }
1383 [ + - - + ]: 2 : if (heap->first != NULL || heap->last != NULL) {
1384 : 0 : EAL_LOG(ERR, "Heap still contains memory segments");
1385 : 0 : rte_errno = EBUSY;
1386 : 0 : return -1;
1387 : : }
1388 [ - + ]: 2 : if (heap->total_size != 0)
1389 : 0 : EAL_LOG(ERR, "Total size not zero, heap is likely corrupt");
1390 : :
1391 : : /* Reset all of the heap but the (hold) lock so caller can release it. */
1392 : : RTE_BUILD_BUG_ON(offsetof(struct malloc_heap, lock) != 0);
1393 : 2 : memset(RTE_PTR_ADD(heap, sizeof(heap->lock)), 0,
1394 : : sizeof(*heap) - sizeof(heap->lock));
1395 : :
1396 : 2 : return 0;
1397 : : }
1398 : :
1399 : : int
1400 : 182 : rte_eal_malloc_heap_init(void)
1401 : : {
1402 : 182 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1403 : : unsigned int i;
1404 : : const struct internal_config *internal_conf =
1405 : 182 : eal_get_internal_configuration();
1406 : :
1407 [ - + ]: 182 : if (internal_conf->match_allocations)
1408 : 0 : EAL_LOG(DEBUG, "Hugepages will be freed exactly as allocated.");
1409 : :
1410 [ + + ]: 182 : if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1411 : : /* assign min socket ID to external heaps */
1412 : 157 : mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID;
1413 : :
1414 : : /* assign names to default DPDK heaps */
1415 [ + + ]: 471 : for (i = 0; i < rte_socket_count(); i++) {
1416 : : struct malloc_heap *heap = &mcfg->malloc_heaps[i];
1417 : : char heap_name[RTE_HEAP_NAME_MAX_LEN];
1418 : 314 : int socket_id = rte_socket_id_by_idx(i);
1419 : :
1420 : : snprintf(heap_name, sizeof(heap_name),
1421 : : "socket_%i", socket_id);
1422 : 314 : strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
1423 : 314 : heap->socket_id = socket_id;
1424 : : }
1425 : : }
1426 : :
1427 [ - + ]: 182 : if (register_mp_requests()) {
1428 : 0 : EAL_LOG(ERR, "Couldn't register malloc multiprocess actions");
1429 : 0 : return -1;
1430 : : }
1431 : :
1432 : : return 0;
1433 : : }
1434 : :
1435 : 182 : int rte_eal_malloc_heap_populate(void)
1436 : : {
1437 : : /* mem hotplug is unlocked here. it's safe for primary as no requests can
1438 : : * even come before primary itself is fully initialized, and secondaries
1439 : : * do not need to initialize the heap.
1440 : : */
1441 : :
1442 : : /* secondary process does not need to initialize anything */
1443 [ + + ]: 182 : if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1444 : : return 0;
1445 : :
1446 : : /* add all IOVA-contiguous areas to the heap */
1447 : 157 : return rte_memseg_contig_walk(malloc_add_seg, NULL);
1448 : : }
1449 : :
1450 : : void
1451 : 254 : rte_eal_malloc_heap_cleanup(void)
1452 : : {
1453 : 254 : unregister_mp_requests();
1454 : 254 : }
|