Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause
2 : : * Copyright(c) 2010-2014 Intel Corporation
3 : : */
4 : : #include <stdint.h>
5 : : #include <stddef.h>
6 : : #include <stdlib.h>
7 : : #include <stdio.h>
8 : : #include <errno.h>
9 : : #include <sys/queue.h>
10 : :
11 : : #include <rte_memory.h>
12 : : #include <rte_errno.h>
13 : : #include <rte_eal.h>
14 : : #include <rte_eal_memconfig.h>
15 : : #include <rte_lcore.h>
16 : : #include <rte_common.h>
17 : : #include <rte_string_fns.h>
18 : : #include <rte_spinlock.h>
19 : : #include <rte_memzone.h>
20 : : #include <rte_fbarray.h>
21 : :
22 : : #include "eal_internal_cfg.h"
23 : : #include "eal_memalloc.h"
24 : : #include "eal_memcfg.h"
25 : : #include "eal_private.h"
26 : : #include "malloc_elem.h"
27 : : #include "malloc_heap.h"
28 : : #include "malloc_mp.h"
29 : :
30 : : /* start external socket ID's at a very high number */
31 : : #define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */
32 : : #define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES))
33 : :
34 : : static unsigned
35 : 99795 : check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
36 : : {
37 : : unsigned check_flag = 0;
38 : :
39 [ + + ]: 99795 : if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY))
40 : : return 1;
41 : :
42 [ - + - - : 1192 : switch (hugepage_sz) {
- - - -
+ ]
43 : 0 : case RTE_PGSIZE_256K:
44 : : check_flag = RTE_MEMZONE_256KB;
45 : 0 : break;
46 : 1117 : case RTE_PGSIZE_2M:
47 : : check_flag = RTE_MEMZONE_2MB;
48 : 1117 : break;
49 : 0 : case RTE_PGSIZE_16M:
50 : : check_flag = RTE_MEMZONE_16MB;
51 : 0 : break;
52 : 0 : case RTE_PGSIZE_256M:
53 : : check_flag = RTE_MEMZONE_256MB;
54 : 0 : break;
55 : 0 : case RTE_PGSIZE_512M:
56 : : check_flag = RTE_MEMZONE_512MB;
57 : 0 : break;
58 : 0 : case RTE_PGSIZE_1G:
59 : : check_flag = RTE_MEMZONE_1GB;
60 : 0 : break;
61 : 0 : case RTE_PGSIZE_4G:
62 : : check_flag = RTE_MEMZONE_4GB;
63 : 0 : break;
64 : 0 : case RTE_PGSIZE_16G:
65 : : check_flag = RTE_MEMZONE_16GB;
66 : : }
67 : :
68 : 1192 : return check_flag & flags;
69 : : }
70 : :
71 : : int
72 : 97844 : malloc_socket_to_heap_id(unsigned int socket_id)
73 : : {
74 : 97844 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
75 : : int i;
76 : :
77 [ + + ]: 98865 : for (i = 0; i < RTE_MAX_HEAPS; i++) {
78 : : struct malloc_heap *heap = &mcfg->malloc_heaps[i];
79 : :
80 [ + + ]: 98834 : if (heap->socket_id == socket_id)
81 : 97812 : return i;
82 : : }
83 : : return -1;
84 : : }
85 : :
86 : : /*
87 : : * Expand the heap with a memory area.
88 : : */
89 : : static struct malloc_elem *
90 : 1224 : malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl,
91 : : void *start, size_t len, bool dirty)
92 : : {
93 : : struct malloc_elem *elem = start;
94 : :
95 : 1224 : malloc_elem_init(elem, heap, msl, len, elem, len, dirty);
96 : :
97 : 1224 : malloc_elem_insert(elem);
98 : :
99 : 1224 : elem = malloc_elem_join_adjacent_free(elem);
100 : :
101 : 1224 : malloc_elem_free_list_insert(elem);
102 : :
103 : 1224 : return elem;
104 : : }
105 : :
106 : : static int
107 : 106 : malloc_add_seg(const struct rte_memseg_list *msl,
108 : : const struct rte_memseg *ms, size_t len, void *arg __rte_unused)
109 : : {
110 : 106 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
111 : : struct rte_memseg_list *found_msl;
112 : : struct malloc_heap *heap;
113 : : int msl_idx, heap_idx;
114 : :
115 [ + - ]: 106 : if (msl->external)
116 : : return 0;
117 : :
118 : 106 : heap_idx = malloc_socket_to_heap_id(msl->socket_id);
119 [ - + ]: 106 : if (heap_idx < 0) {
120 : 0 : EAL_LOG(ERR, "Memseg list has invalid socket id");
121 : 0 : return -1;
122 : : }
123 : 106 : heap = &mcfg->malloc_heaps[heap_idx];
124 : :
125 : : /* msl is const, so find it */
126 : 106 : msl_idx = msl - mcfg->memsegs;
127 : :
128 [ + - ]: 106 : if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
129 : : return -1;
130 : :
131 : 106 : found_msl = &mcfg->memsegs[msl_idx];
132 : :
133 : 106 : malloc_heap_add_memory(heap, found_msl, ms->addr, len,
134 : 106 : ms->flags & RTE_MEMSEG_FLAG_DIRTY);
135 : :
136 : 106 : heap->total_size += len;
137 : :
138 : 106 : EAL_LOG(DEBUG, "Added %zuM to heap on socket %i", len >> 20,
139 : : msl->socket_id);
140 : 106 : return 0;
141 : : }
142 : :
143 : : /*
144 : : * Iterates through the freelist for a heap to find a free element
145 : : * which can store data of the required size and with the requested alignment.
146 : : * If size is 0, find the biggest available elem.
147 : : * Returns null on failure, or pointer to element on success.
148 : : */
149 : : static struct malloc_elem *
150 : 101256 : find_suitable_element(struct malloc_heap *heap, size_t size,
151 : : unsigned int flags, size_t align, size_t bound, bool contig)
152 : : {
153 : : size_t idx;
154 : : struct malloc_elem *elem, *alt_elem = NULL;
155 : :
156 : 101256 : for (idx = malloc_elem_free_list_index(size);
157 [ + + ]: 497636 : idx < RTE_HEAP_NUM_FREELISTS; idx++) {
158 : 494982 : for (elem = LIST_FIRST(&heap->free_head[idx]);
159 [ + + ]: 499608 : !!elem; elem = LIST_NEXT(elem, free_list)) {
160 [ + + ]: 103228 : if (malloc_elem_can_hold(elem, size, align, bound,
161 : : contig)) {
162 : 99303 : if (check_hugepage_sz(flags,
163 [ + + ]: 99303 : elem->msl->page_sz))
164 : 98602 : return elem;
165 [ + + ]: 701 : if (alt_elem == NULL)
166 : : alt_elem = elem;
167 : : }
168 : : }
169 : : }
170 : :
171 [ + + ]: 2654 : if (flags & RTE_MEMZONE_SIZE_HINT_ONLY)
172 : 334 : return alt_elem;
173 : :
174 : : return NULL;
175 : : }
176 : :
177 : : /*
178 : : * Iterates through the freelist for a heap to find a free element with the
179 : : * biggest size and requested alignment. Will also set size to whatever element
180 : : * size that was found.
181 : : * Returns null on failure, or pointer to element on success.
182 : : */
183 : : static struct malloc_elem *
184 : 3 : find_biggest_element(struct malloc_heap *heap, size_t *size,
185 : : unsigned int flags, size_t align, bool contig)
186 : : {
187 : : struct malloc_elem *elem, *max_elem = NULL;
188 : : size_t idx, max_size = 0;
189 : :
190 [ + + ]: 42 : for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
191 : 39 : for (elem = LIST_FIRST(&heap->free_head[idx]);
192 [ + + ]: 43 : !!elem; elem = LIST_NEXT(elem, free_list)) {
193 : : size_t cur_size;
194 [ + - ]: 4 : if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 &&
195 : 4 : !check_hugepage_sz(flags,
196 [ - + ]: 4 : elem->msl->page_sz))
197 : 0 : continue;
198 [ - + ]: 4 : if (contig) {
199 : : cur_size =
200 : 0 : malloc_elem_find_max_iova_contig(elem,
201 : : align);
202 : : } else {
203 : 4 : void *data_start = RTE_PTR_ADD(elem,
204 : : MALLOC_ELEM_HEADER_LEN);
205 : 4 : void *data_end = RTE_PTR_ADD(elem, elem->size -
206 : : MALLOC_ELEM_TRAILER_LEN);
207 : 4 : void *aligned = RTE_PTR_ALIGN_CEIL(data_start,
208 : : align);
209 : : /* check if aligned data start is beyond end */
210 [ - + ]: 4 : if (aligned >= data_end)
211 : 0 : continue;
212 : 4 : cur_size = RTE_PTR_DIFF(data_end, aligned);
213 : : }
214 [ + - ]: 4 : if (cur_size > max_size) {
215 : : max_size = cur_size;
216 : : max_elem = elem;
217 : : }
218 : : }
219 : : }
220 : :
221 : 3 : *size = max_size;
222 : 3 : return max_elem;
223 : : }
224 : :
225 : : /*
226 : : * Main function to allocate a block of memory from the heap.
227 : : * It locks the free list, scans it, and adds a new memseg if the
228 : : * scan fails. Once the new memseg is added, it re-scans and should return
229 : : * the new element after releasing the lock.
230 : : */
231 : : static void *
232 : 98922 : heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size,
233 : : unsigned int flags, size_t align, size_t bound, bool contig)
234 : : {
235 : : struct malloc_elem *elem;
236 : : size_t user_size = size;
237 : :
238 : 98922 : size = RTE_CACHE_LINE_ROUNDUP(size);
239 : 98922 : align = RTE_CACHE_LINE_ROUNDUP(align);
240 : :
241 : : /* roundup might cause an overflow */
242 [ + - ]: 98922 : if (size == 0)
243 : : return NULL;
244 : 98922 : elem = find_suitable_element(heap, size, flags, align, bound, contig);
245 [ + + ]: 98922 : if (elem != NULL) {
246 : 97699 : elem = malloc_elem_alloc(elem, size, align, bound, contig);
247 : :
248 : : /* increase heap's count of allocated elements */
249 : 97699 : heap->alloc_count++;
250 : :
251 : : asan_set_redzone(elem, user_size);
252 : : }
253 : :
254 [ + + ]: 98922 : return elem == NULL ? NULL : (void *)(&elem[1]);
255 : : }
256 : :
257 : : static void *
258 : 3 : heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused,
259 : : unsigned int flags, size_t align, bool contig)
260 : : {
261 : : struct malloc_elem *elem;
262 : : size_t size;
263 : :
264 : 3 : align = RTE_CACHE_LINE_ROUNDUP(align);
265 : :
266 : 3 : elem = find_biggest_element(heap, &size, flags, align, contig);
267 [ + - ]: 3 : if (elem != NULL) {
268 : 3 : elem = malloc_elem_alloc(elem, size, align, 0, contig);
269 : :
270 : : /* increase heap's count of allocated elements */
271 : 3 : heap->alloc_count++;
272 : :
273 : : asan_set_redzone(elem, size);
274 : : }
275 : :
276 [ + - ]: 3 : return elem == NULL ? NULL : (void *)(&elem[1]);
277 : : }
278 : :
279 : : /* this function is exposed in malloc_mp.h */
280 : : void
281 : 0 : rollback_expand_heap(struct rte_memseg **ms, int n_segs,
282 : : struct malloc_elem *elem, void *map_addr, size_t map_len)
283 : : {
284 [ # # ]: 0 : if (elem != NULL) {
285 : 0 : malloc_elem_free_list_remove(elem);
286 : 0 : malloc_elem_hide_region(elem, map_addr, map_len);
287 : : }
288 : :
289 : 0 : eal_memalloc_free_seg_bulk(ms, n_segs);
290 : 0 : }
291 : :
292 : : /* this function is exposed in malloc_mp.h */
293 : : struct malloc_elem *
294 : 1117 : alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
295 : : int socket, unsigned int flags, size_t align, size_t bound,
296 : : bool contig, struct rte_memseg **ms, int n_segs)
297 : : {
298 : 1117 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
299 : : struct rte_memseg_list *msl;
300 : : struct malloc_elem *elem = NULL;
301 : : size_t alloc_sz;
302 : : int allocd_pages, i;
303 : : bool dirty = false;
304 : : void *ret, *map_addr;
305 : :
306 : 1117 : alloc_sz = (size_t)pg_sz * n_segs;
307 : :
308 : : /* first, check if we're allowed to allocate this memory */
309 [ - + ]: 1117 : if (eal_memalloc_mem_alloc_validate(socket,
310 : 1117 : heap->total_size + alloc_sz) < 0) {
311 : 0 : EAL_LOG(DEBUG, "User has disallowed allocation");
312 : 0 : return NULL;
313 : : }
314 : :
315 : 1117 : allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz,
316 : : socket, true);
317 : :
318 : : /* make sure we've allocated our pages... */
319 [ + - ]: 1117 : if (allocd_pages < 0)
320 : : return NULL;
321 : :
322 : 1117 : map_addr = ms[0]->addr;
323 : 1117 : msl = rte_mem_virt2memseg_list(map_addr);
324 : :
325 : : /* check if we wanted contiguous memory but didn't get it */
326 [ - + - - ]: 1117 : if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) {
327 : 0 : EAL_LOG(DEBUG, "%s(): couldn't allocate physically contiguous space",
328 : : __func__);
329 : 0 : goto fail;
330 : : }
331 : :
332 : : /*
333 : : * Once we have all the memseg lists configured, if there is a dma mask
334 : : * set, check iova addresses are not out of range. Otherwise the device
335 : : * setting the dma mask could have problems with the mapped memory.
336 : : *
337 : : * There are two situations when this can happen:
338 : : * 1) memory initialization
339 : : * 2) dynamic memory allocation
340 : : *
341 : : * For 1), an error when checking dma mask implies app can not be
342 : : * executed. For 2) implies the new memory can not be added.
343 : : */
344 [ - + - - ]: 1117 : if (mcfg->dma_maskbits &&
345 : 0 : rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) {
346 : : /*
347 : : * Currently this can only happen if IOMMU is enabled
348 : : * and the address width supported by the IOMMU hw is
349 : : * not enough for using the memory mapped IOVAs.
350 : : *
351 : : * If IOVA is VA, advice to try with '--iova-mode pa'
352 : : * which could solve some situations when IOVA VA is not
353 : : * really needed.
354 : : */
355 : 0 : EAL_LOG(ERR,
356 : : "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask",
357 : : __func__);
358 : :
359 : : /*
360 : : * If IOVA is VA and it is possible to run with IOVA PA,
361 : : * because user is root, give and advice for solving the
362 : : * problem.
363 : : */
364 [ # # # # ]: 0 : if ((rte_eal_iova_mode() == RTE_IOVA_VA) &&
365 : 0 : rte_eal_using_phys_addrs())
366 : 0 : EAL_LOG(ERR,
367 : : "%s(): Please try initializing EAL with --iova-mode=pa parameter",
368 : : __func__);
369 : 0 : goto fail;
370 : : }
371 : :
372 : : /* Element is dirty if it contains at least one dirty page. */
373 [ + + ]: 2683 : for (i = 0; i < allocd_pages; i++)
374 : 1566 : dirty |= ms[i]->flags & RTE_MEMSEG_FLAG_DIRTY;
375 : :
376 : : /* add newly minted memsegs to malloc heap */
377 : 1117 : elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz, dirty);
378 : :
379 : : /* try once more, as now we have allocated new memory */
380 : 1117 : ret = find_suitable_element(heap, elt_size, flags, align, bound,
381 : : contig);
382 : :
383 [ - + ]: 1117 : if (ret == NULL)
384 : 0 : goto fail;
385 : :
386 : : return elem;
387 : :
388 : 0 : fail:
389 : 0 : rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
390 : 0 : return NULL;
391 : : }
392 : :
393 : : static int
394 : 1116 : try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
395 : : size_t elt_size, int socket, unsigned int flags, size_t align,
396 : : size_t bound, bool contig)
397 : : {
398 : : struct malloc_elem *elem;
399 : : struct rte_memseg **ms;
400 : : void *map_addr;
401 : : size_t alloc_sz;
402 : : int n_segs;
403 : : bool callback_triggered = false;
404 : :
405 : 1116 : alloc_sz = RTE_ALIGN_CEIL(RTE_ALIGN_CEIL(elt_size, align) +
406 : : MALLOC_ELEM_OVERHEAD, pg_sz);
407 : 1116 : n_segs = alloc_sz / pg_sz;
408 : :
409 : : /* we can't know in advance how many pages we'll need, so we malloc */
410 : 1116 : ms = malloc(sizeof(*ms) * n_segs);
411 [ + - ]: 1116 : if (ms == NULL)
412 : : return -1;
413 : : memset(ms, 0, sizeof(*ms) * n_segs);
414 : :
415 : 1116 : elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align,
416 : : bound, contig, ms, n_segs);
417 : :
418 [ - + ]: 1116 : if (elem == NULL)
419 : 0 : goto free_ms;
420 : :
421 : 1116 : map_addr = ms[0]->addr;
422 : :
423 : : /* notify user about changes in memory map */
424 : 1116 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
425 : :
426 : : /* notify other processes that this has happened */
427 [ - + ]: 1116 : if (request_sync()) {
428 : : /* we couldn't ensure all processes have mapped memory,
429 : : * so free it back and notify everyone that it's been
430 : : * freed back.
431 : : *
432 : : * technically, we could've avoided adding memory addresses to
433 : : * the map, but that would've led to inconsistent behavior
434 : : * between primary and secondary processes, as those get
435 : : * callbacks during sync. therefore, force primary process to
436 : : * do alloc-and-rollback syncs as well.
437 : : */
438 : : callback_triggered = true;
439 : 0 : goto free_elem;
440 : : }
441 : 1116 : heap->total_size += alloc_sz;
442 : :
443 : 1116 : EAL_LOG(DEBUG, "Heap on socket %d was expanded by %zdMB",
444 : : socket, alloc_sz >> 20ULL);
445 : :
446 : 1116 : free(ms);
447 : :
448 : 1116 : return 0;
449 : :
450 : : free_elem:
451 : : if (callback_triggered)
452 : 0 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
453 : : map_addr, alloc_sz);
454 : :
455 : 0 : rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
456 : :
457 : 0 : request_sync();
458 : 0 : free_ms:
459 : 0 : free(ms);
460 : :
461 : 0 : return -1;
462 : : }
463 : :
464 : : static int
465 : 1 : try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz,
466 : : size_t elt_size, int socket, unsigned int flags, size_t align,
467 : : size_t bound, bool contig)
468 : : {
469 : 1 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
470 : : struct malloc_mp_req req;
471 : : int req_result;
472 : :
473 : : memset(&req, 0, sizeof(req));
474 : :
475 : : req.t = REQ_TYPE_ALLOC;
476 : 1 : req.alloc_req.align = align;
477 : 1 : req.alloc_req.bound = bound;
478 : 1 : req.alloc_req.contig = contig;
479 : 1 : req.alloc_req.flags = flags;
480 : 1 : req.alloc_req.elt_size = elt_size;
481 : 1 : req.alloc_req.page_sz = pg_sz;
482 : 1 : req.alloc_req.socket = socket;
483 : 1 : req.alloc_req.malloc_heap_idx = heap - mcfg->malloc_heaps;
484 : :
485 : 1 : req_result = request_to_primary(&req);
486 : :
487 [ + - ]: 1 : if (req_result != 0)
488 : : return -1;
489 : :
490 [ - + ]: 1 : if (req.result != REQ_RESULT_SUCCESS)
491 : 0 : return -1;
492 : :
493 : : return 0;
494 : : }
495 : :
496 : : static int
497 : 1117 : try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
498 : : int socket, unsigned int flags, size_t align, size_t bound,
499 : : bool contig)
500 : : {
501 : : int ret;
502 : :
503 : 1117 : rte_mcfg_mem_write_lock();
504 : :
505 [ + + ]: 1117 : if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
506 : 1116 : ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket,
507 : : flags, align, bound, contig);
508 : : } else {
509 : 1 : ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket,
510 : : flags, align, bound, contig);
511 : : }
512 : :
513 : 1117 : rte_mcfg_mem_write_unlock();
514 : 1117 : return ret;
515 : : }
516 : :
517 : : static int
518 : 4868 : compare_pagesz(const void *a, const void *b)
519 : : {
520 : : const struct rte_memseg_list * const*mpa = a;
521 : : const struct rte_memseg_list * const*mpb = b;
522 : 4868 : const struct rte_memseg_list *msla = *mpa;
523 : 4868 : const struct rte_memseg_list *mslb = *mpb;
524 : 4868 : uint64_t pg_sz_a = msla->page_sz;
525 : 4868 : uint64_t pg_sz_b = mslb->page_sz;
526 : :
527 [ + - ]: 4868 : if (pg_sz_a < pg_sz_b)
528 : : return -1;
529 [ - + ]: 4868 : if (pg_sz_a > pg_sz_b)
530 : 0 : return 1;
531 : : return 0;
532 : : }
533 : :
534 : : static int
535 : 1219 : alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
536 : : unsigned int flags, size_t align, size_t bound, bool contig)
537 : : {
538 : 1219 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
539 : : struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS];
540 : : struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS];
541 : : uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS];
542 : : uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS];
543 : : uint64_t prev_pg_sz;
544 : : int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz;
545 : 1219 : bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0;
546 : 1219 : unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
547 : : void *ret;
548 : :
549 : : memset(requested_msls, 0, sizeof(requested_msls));
550 : : memset(other_msls, 0, sizeof(other_msls));
551 : : memset(requested_pg_sz, 0, sizeof(requested_pg_sz));
552 : : memset(other_pg_sz, 0, sizeof(other_pg_sz));
553 : :
554 : : /*
555 : : * go through memseg list and take note of all the page sizes available,
556 : : * and if any of them were specifically requested by the user.
557 : : */
558 : : n_requested_msls = 0;
559 : : n_other_msls = 0;
560 [ + + ]: 157251 : for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
561 : 156032 : struct rte_memseg_list *msl = &mcfg->memsegs[i];
562 : :
563 [ + + ]: 156032 : if (msl->socket_id != socket)
564 : 5596 : continue;
565 : :
566 [ + + ]: 150436 : if (msl->base_va == NULL)
567 : 145560 : continue;
568 : :
569 : : /* if pages of specific size were requested */
570 [ + + - + ]: 4876 : if (size_flags != 0 && check_hugepage_sz(size_flags,
571 : : msl->page_sz))
572 : 0 : requested_msls[n_requested_msls++] = msl;
573 [ + + ]: 4876 : else if (size_flags == 0 || size_hint)
574 : 4868 : other_msls[n_other_msls++] = msl;
575 : : }
576 : :
577 : : /* sort the lists, smallest first */
578 : 1219 : qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]),
579 : : compare_pagesz);
580 : 1219 : qsort(other_msls, n_other_msls, sizeof(other_msls[0]),
581 : : compare_pagesz);
582 : :
583 : : /* now, extract page sizes we are supposed to try */
584 : : prev_pg_sz = 0;
585 : : n_requested_pg_sz = 0;
586 [ - + ]: 1219 : for (i = 0; i < n_requested_msls; i++) {
587 : 0 : uint64_t pg_sz = requested_msls[i]->page_sz;
588 : :
589 [ # # ]: 0 : if (prev_pg_sz != pg_sz) {
590 : 0 : requested_pg_sz[n_requested_pg_sz++] = pg_sz;
591 : : prev_pg_sz = pg_sz;
592 : : }
593 : : }
594 : : prev_pg_sz = 0;
595 : : n_other_pg_sz = 0;
596 [ + + ]: 6087 : for (i = 0; i < n_other_msls; i++) {
597 : 4868 : uint64_t pg_sz = other_msls[i]->page_sz;
598 : :
599 [ + + ]: 4868 : if (prev_pg_sz != pg_sz) {
600 : 1217 : other_pg_sz[n_other_pg_sz++] = pg_sz;
601 : : prev_pg_sz = pg_sz;
602 : : }
603 : : }
604 : :
605 : : /* finally, try allocating memory of specified page sizes, starting from
606 : : * the smallest sizes
607 : : */
608 [ - + ]: 1219 : for (i = 0; i < n_requested_pg_sz; i++) {
609 : 0 : uint64_t pg_sz = requested_pg_sz[i];
610 : :
611 : : /*
612 : : * do not pass the size hint here, as user expects other page
613 : : * sizes first, before resorting to best effort allocation.
614 : : */
615 [ # # ]: 0 : if (!try_expand_heap(heap, pg_sz, size, socket, size_flags,
616 : : align, bound, contig))
617 : : return 0;
618 : : }
619 [ + + ]: 1219 : if (n_other_pg_sz == 0)
620 : : return -1;
621 : :
622 : : /* now, check if we can reserve anything with size hint */
623 : 1217 : ret = find_suitable_element(heap, size, flags, align, bound, contig);
624 [ + + ]: 1217 : if (ret != NULL)
625 : : return 0;
626 : :
627 : : /*
628 : : * we still couldn't reserve memory, so try expanding heap with other
629 : : * page sizes, if there are any
630 : : */
631 [ + - ]: 1117 : for (i = 0; i < n_other_pg_sz; i++) {
632 : 1117 : uint64_t pg_sz = other_pg_sz[i];
633 : :
634 [ - + ]: 1117 : if (!try_expand_heap(heap, pg_sz, size, socket, flags,
635 : : align, bound, contig))
636 : : return 0;
637 : : }
638 : : return -1;
639 : : }
640 : :
641 : : /* this will try lower page sizes first */
642 : : static void *
643 : 97692 : malloc_heap_alloc_on_heap_id(const char *type, size_t size,
644 : : unsigned int heap_id, unsigned int flags, size_t align,
645 : : size_t bound, bool contig)
646 : : {
647 : 97692 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
648 : 97689 : struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
649 : 97689 : unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
650 : : int socket_id;
651 : : void *ret;
652 : : const struct internal_config *internal_conf =
653 : 97689 : eal_get_internal_configuration();
654 : :
655 : 97687 : rte_spinlock_lock(&(heap->lock));
656 : :
657 : : align = align == 0 ? 1 : align;
658 : :
659 : : /* for legacy mode, try once and with all flags */
660 [ + + ]: 97705 : if (internal_conf->legacy_mem) {
661 : 13438 : ret = heap_alloc(heap, type, size, flags, align, bound, contig);
662 : 13438 : goto alloc_unlock;
663 : : }
664 : :
665 : : /*
666 : : * we do not pass the size hint here, because even if allocation fails,
667 : : * we may still be able to allocate memory from appropriate page sizes,
668 : : * we just need to request more memory first.
669 : : */
670 : :
671 : 84267 : socket_id = rte_socket_id_by_idx(heap_id);
672 : : /*
673 : : * if socket ID is negative, we cannot find a socket ID for this heap -
674 : : * which means it's an external heap. those can have unexpected page
675 : : * sizes, so if the user asked to allocate from there - assume user
676 : : * knows what they're doing, and allow allocating from there with any
677 : : * page size flags.
678 : : */
679 [ + + ]: 84267 : if (socket_id < 0)
680 : 10 : size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
681 : :
682 : 84267 : ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
683 [ + + ]: 84267 : if (ret != NULL)
684 : 83048 : goto alloc_unlock;
685 : :
686 : : /* if socket ID is invalid, this is an external heap */
687 [ - + ]: 1219 : if (socket_id < 0)
688 : 0 : goto alloc_unlock;
689 : :
690 [ + + ]: 1219 : if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
691 : : bound, contig)) {
692 : 1217 : ret = heap_alloc(heap, type, size, flags, align, bound, contig);
693 : :
694 : : /* this should have succeeded */
695 [ + - ]: 1217 : if (ret == NULL)
696 : 0 : EAL_LOG(ERR, "Error allocating from heap");
697 : : }
698 : 97705 : alloc_unlock:
699 : : rte_spinlock_unlock(&(heap->lock));
700 : 97705 : return ret;
701 : : }
702 : :
703 : : static unsigned int
704 : 97272 : malloc_get_numa_socket(void)
705 : : {
706 : 97272 : const struct internal_config *conf = eal_get_internal_configuration();
707 : 97272 : unsigned int socket_id = rte_socket_id();
708 : : unsigned int idx;
709 : :
710 [ + + ]: 97272 : if (socket_id != (unsigned int)SOCKET_ID_ANY)
711 : : return socket_id;
712 : :
713 : : /* for control threads, return first socket where memory is available */
714 [ + + ]: 18 : for (idx = 0; idx < rte_socket_count(); idx++) {
715 : 12 : socket_id = rte_socket_id_by_idx(idx);
716 [ - + ]: 12 : if (conf->socket_mem[socket_id] != 0)
717 : 0 : return socket_id;
718 : : }
719 : : /* We couldn't quickly find a NUMA node where memory was available,
720 : : * so fall back to using main lcore socket ID.
721 : : */
722 : 6 : socket_id = rte_lcore_to_socket_id(rte_get_main_lcore());
723 : : /* Main lcore socket ID may be SOCKET_ID_ANY
724 : : * when main lcore thread is affinitized to multiple NUMA nodes.
725 : : */
726 [ - + ]: 6 : if (socket_id != (unsigned int)SOCKET_ID_ANY)
727 : : return socket_id;
728 : : /* Failed to find meaningful socket ID, so use the first one available. */
729 : 0 : return rte_socket_id_by_idx(0);
730 : : }
731 : :
732 : : void *
733 : 97721 : malloc_heap_alloc(const char *type, size_t size, int socket_arg,
734 : : unsigned int flags, size_t align, size_t bound, bool contig)
735 : : {
736 : : int socket, heap_id, i;
737 : : void *ret;
738 : :
739 : : /* return NULL if size is 0 or alignment is not power-of-2 */
740 [ + - + - : 97721 : if (size == 0 || (align && !rte_is_power_of_2(align)))
+ - ]
741 : : return NULL;
742 : :
743 [ + + + + ]: 97721 : if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES)
744 : : socket_arg = SOCKET_ID_ANY;
745 : :
746 [ + + ]: 84298 : if (socket_arg == SOCKET_ID_ANY)
747 : 97271 : socket = malloc_get_numa_socket();
748 : : else
749 : : socket = socket_arg;
750 : :
751 : : /* turn socket ID into heap ID */
752 : 97728 : heap_id = malloc_socket_to_heap_id(socket);
753 : : /* if heap id is negative, socket ID was invalid */
754 [ + + ]: 97721 : if (heap_id < 0)
755 : : return NULL;
756 : :
757 : 97690 : ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align,
758 : : bound, contig);
759 [ + + ]: 97702 : if (ret != NULL || socket_arg != SOCKET_ID_ANY)
760 : : return ret;
761 : :
762 : : /* try other heaps. we are only iterating through native DPDK sockets,
763 : : * so external heaps won't be included.
764 : : */
765 [ + + ]: 9 : for (i = 0; i < (int) rte_socket_count(); i++) {
766 [ + + ]: 6 : if (i == heap_id)
767 : 3 : continue;
768 : 3 : ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align,
769 : : bound, contig);
770 [ - + ]: 3 : if (ret != NULL)
771 : 0 : return ret;
772 : : }
773 : : return NULL;
774 : : }
775 : :
776 : : static void *
777 : 3 : heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id,
778 : : unsigned int flags, size_t align, bool contig)
779 : : {
780 : 3 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
781 : 3 : struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
782 : : void *ret;
783 : :
784 : 3 : rte_spinlock_lock(&(heap->lock));
785 : :
786 : : align = align == 0 ? 1 : align;
787 : :
788 : 3 : ret = heap_alloc_biggest(heap, type, flags, align, contig);
789 : :
790 : : rte_spinlock_unlock(&(heap->lock));
791 : :
792 : 3 : return ret;
793 : : }
794 : :
795 : : void *
796 : 3 : malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
797 : : size_t align, bool contig)
798 : : {
799 : : int socket, i, cur_socket, heap_id;
800 : : void *ret;
801 : :
802 : : /* return NULL if align is not power-of-2 */
803 [ + - + - ]: 3 : if ((align && !rte_is_power_of_2(align)))
804 : : return NULL;
805 : :
806 [ + - ]: 3 : if (!rte_eal_has_hugepages())
807 : : socket_arg = SOCKET_ID_ANY;
808 : :
809 [ + + ]: 3 : if (socket_arg == SOCKET_ID_ANY)
810 : 1 : socket = malloc_get_numa_socket();
811 : : else
812 : : socket = socket_arg;
813 : :
814 : : /* turn socket ID into heap ID */
815 : 3 : heap_id = malloc_socket_to_heap_id(socket);
816 : : /* if heap id is negative, socket ID was invalid */
817 [ + - ]: 3 : if (heap_id < 0)
818 : : return NULL;
819 : :
820 : 3 : ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
821 : : contig);
822 [ - + ]: 3 : if (ret != NULL || socket_arg != SOCKET_ID_ANY)
823 : : return ret;
824 : :
825 : : /* try other heaps */
826 [ # # ]: 0 : for (i = 0; i < (int) rte_socket_count(); i++) {
827 : 0 : cur_socket = rte_socket_id_by_idx(i);
828 [ # # ]: 0 : if (cur_socket == socket)
829 : 0 : continue;
830 : 0 : ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
831 : : contig);
832 [ # # ]: 0 : if (ret != NULL)
833 : 0 : return ret;
834 : : }
835 : : return NULL;
836 : : }
837 : :
838 : : /* this function is exposed in malloc_mp.h */
839 : : int
840 : 1127 : malloc_heap_free_pages(void *aligned_start, size_t aligned_len)
841 : : {
842 : : int n_segs, seg_idx, max_seg_idx;
843 : : struct rte_memseg_list *msl;
844 : : size_t page_sz;
845 : :
846 : 1127 : msl = rte_mem_virt2memseg_list(aligned_start);
847 [ + - ]: 1127 : if (msl == NULL)
848 : : return -1;
849 : :
850 : 1127 : page_sz = (size_t)msl->page_sz;
851 : 1127 : n_segs = aligned_len / page_sz;
852 : 1127 : seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz;
853 : 1127 : max_seg_idx = seg_idx + n_segs;
854 : :
855 [ + + ]: 2766 : for (; seg_idx < max_seg_idx; seg_idx++) {
856 : : struct rte_memseg *ms;
857 : :
858 : 1639 : ms = rte_fbarray_get(&msl->memseg_arr, seg_idx);
859 : 1639 : eal_memalloc_free_seg(ms);
860 : : }
861 : : return 0;
862 : : }
863 : :
864 : : int
865 : 89323 : malloc_heap_free(struct malloc_elem *elem)
866 : : {
867 : : struct malloc_heap *heap;
868 : : void *start, *aligned_start, *end, *aligned_end;
869 : : size_t len, aligned_len, page_sz;
870 : : struct rte_memseg_list *msl;
871 : : unsigned int i, n_segs, before_space, after_space;
872 : : int ret;
873 : : bool unmapped = false;
874 : : const struct internal_config *internal_conf =
875 : 89323 : eal_get_internal_configuration();
876 : :
877 [ + - + - ]: 89323 : if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
878 : : return -1;
879 : :
880 : : asan_clear_redzone(elem);
881 : :
882 : : /* elem may be merged with previous element, so keep heap address */
883 : 89323 : heap = elem->heap;
884 : 89323 : msl = elem->msl;
885 : 89323 : page_sz = (size_t)msl->page_sz;
886 : :
887 : 89323 : rte_spinlock_lock(&(heap->lock));
888 : :
889 : : void *asan_ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN + elem->pad);
890 : : size_t asan_data_len = elem->size - MALLOC_ELEM_OVERHEAD - elem->pad;
891 : :
892 : : /* mark element as free */
893 : 89329 : elem->state = ELEM_FREE;
894 : :
895 : 89329 : elem = malloc_elem_free(elem);
896 : :
897 : : /* anything after this is a bonus */
898 : : ret = 0;
899 : :
900 : : /* ...of which we can't avail if we are in legacy mode, or if this is an
901 : : * externally allocated segment.
902 : : */
903 [ + + + + ]: 89329 : if (internal_conf->legacy_mem || (msl->external > 0))
904 : 5110 : goto free_unlock;
905 : :
906 : : /* check if we can free any memory back to the system */
907 [ + + ]: 84219 : if (elem->size < page_sz)
908 : 83074 : goto free_unlock;
909 : :
910 : : /* if user requested to match allocations, the sizes must match - if not,
911 : : * we will defer freeing these hugepages until the entire original allocation
912 : : * can be freed
913 : : */
914 [ - + - - ]: 1145 : if (internal_conf->match_allocations && elem->size != elem->orig_size)
915 : 0 : goto free_unlock;
916 : :
917 : : /* probably, but let's make sure, as we may not be using up full page */
918 : : start = elem;
919 : : len = elem->size;
920 : 1145 : aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz);
921 : 1145 : end = RTE_PTR_ADD(elem, len);
922 : 1145 : aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz);
923 : :
924 : 1145 : aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
925 : :
926 : : /* can't free anything */
927 [ + + ]: 1145 : if (aligned_len < page_sz)
928 : 7 : goto free_unlock;
929 : :
930 : : /* we can free something. however, some of these pages may be marked as
931 : : * unfreeable, so also check that as well
932 : : */
933 : 1138 : n_segs = aligned_len / page_sz;
934 [ + + ]: 2866 : for (i = 0; i < n_segs; i++) {
935 : : const struct rte_memseg *tmp =
936 : 1728 : rte_mem_virt2memseg(aligned_start, msl);
937 : :
938 [ + + ]: 1728 : if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
939 : : /* this is an unfreeable segment, so move start */
940 : 89 : aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len);
941 : : }
942 : : }
943 : :
944 : : /* recalculate length and number of segments */
945 : 1138 : aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
946 : 1138 : n_segs = aligned_len / page_sz;
947 : :
948 : : /* check if we can still free some pages */
949 [ + + ]: 1138 : if (n_segs == 0)
950 : 11 : goto free_unlock;
951 : :
952 : : /* We're not done yet. We also have to check if by freeing space we will
953 : : * be leaving free elements that are too small to store new elements.
954 : : * Check if we have enough space in the beginning and at the end, or if
955 : : * start/end are exactly page aligned.
956 : : */
957 : 1127 : before_space = RTE_PTR_DIFF(aligned_start, elem);
958 : 1127 : after_space = RTE_PTR_DIFF(end, aligned_end);
959 [ + + - + ]: 1127 : if (before_space != 0 &&
960 : : before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
961 : : /* There is not enough space before start, but we may be able to
962 : : * move the start forward by one page.
963 : : */
964 [ # # ]: 0 : if (n_segs == 1)
965 : 0 : goto free_unlock;
966 : :
967 : : /* move start */
968 : 0 : aligned_start = RTE_PTR_ADD(aligned_start, page_sz);
969 : 0 : aligned_len -= page_sz;
970 : 0 : n_segs--;
971 : : }
972 [ + + - + ]: 1127 : if (after_space != 0 && after_space <
973 : : MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
974 : : /* There is not enough space after end, but we may be able to
975 : : * move the end backwards by one page.
976 : : */
977 [ # # ]: 0 : if (n_segs == 1)
978 : 0 : goto free_unlock;
979 : :
980 : : /* move end */
981 : : aligned_end = RTE_PTR_SUB(aligned_end, page_sz);
982 : 0 : aligned_len -= page_sz;
983 : : n_segs--;
984 : : }
985 : :
986 : : /* now we can finally free us some pages */
987 : :
988 : 1127 : rte_mcfg_mem_write_lock();
989 : :
990 : : /*
991 : : * we allow secondary processes to clear the heap of this allocated
992 : : * memory because it is safe to do so, as even if notifications about
993 : : * unmapped pages don't make it to other processes, heap is shared
994 : : * across all processes, and will become empty of this memory anyway,
995 : : * and nothing can allocate it back unless primary process will be able
996 : : * to deliver allocation message to every single running process.
997 : : */
998 : :
999 : 1127 : malloc_elem_free_list_remove(elem);
1000 : :
1001 : 1127 : malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len);
1002 : :
1003 : 1127 : heap->total_size -= aligned_len;
1004 : :
1005 [ + - ]: 1127 : if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1006 : : /* notify user about changes in memory map */
1007 : 1127 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
1008 : : aligned_start, aligned_len);
1009 : :
1010 : : /* don't care if any of this fails */
1011 : 1127 : malloc_heap_free_pages(aligned_start, aligned_len);
1012 : :
1013 : 1127 : request_sync();
1014 : : } else {
1015 : : struct malloc_mp_req req;
1016 : :
1017 : : memset(&req, 0, sizeof(req));
1018 : :
1019 : 0 : req.t = REQ_TYPE_FREE;
1020 : 0 : req.free_req.addr = aligned_start;
1021 : 0 : req.free_req.len = aligned_len;
1022 : :
1023 : : /*
1024 : : * we request primary to deallocate pages, but we don't do it
1025 : : * in this thread. instead, we notify primary that we would like
1026 : : * to deallocate pages, and this process will receive another
1027 : : * request (in parallel) that will do it for us on another
1028 : : * thread.
1029 : : *
1030 : : * we also don't really care if this succeeds - the data is
1031 : : * already removed from the heap, so it is, for all intents and
1032 : : * purposes, hidden from the rest of DPDK even if some other
1033 : : * process (including this one) may have these pages mapped.
1034 : : *
1035 : : * notifications about deallocated memory happen during sync.
1036 : : */
1037 : 0 : request_to_primary(&req);
1038 : : }
1039 : :
1040 : : /* we didn't exit early, meaning we have unmapped some pages */
1041 : : unmapped = true;
1042 : :
1043 : 1127 : EAL_LOG(DEBUG, "Heap on socket %d was shrunk by %zdMB",
1044 : : msl->socket_id, aligned_len >> 20ULL);
1045 : :
1046 : 1127 : rte_mcfg_mem_write_unlock();
1047 : 89329 : free_unlock:
1048 : : asan_set_freezone(asan_ptr, asan_data_len);
1049 : :
1050 : : /* if we unmapped some memory, we need to do additional work for ASan */
1051 : : if (unmapped) {
1052 : : void *asan_end = RTE_PTR_ADD(asan_ptr, asan_data_len);
1053 : : void *aligned_end = RTE_PTR_ADD(aligned_start, aligned_len);
1054 : : void *aligned_trailer = RTE_PTR_SUB(aligned_start,
1055 : : MALLOC_ELEM_TRAILER_LEN);
1056 : :
1057 : : /*
1058 : : * There was a memory area that was unmapped. This memory area
1059 : : * will have to be marked as available for ASan, because we will
1060 : : * want to use it next time it gets mapped again. The OS memory
1061 : : * protection should trigger a fault on access to these areas
1062 : : * anyway, so we are not giving up any protection.
1063 : : */
1064 : : asan_set_zone(aligned_start, aligned_len, 0x00);
1065 : :
1066 : : /*
1067 : : * ...however, when we unmap pages, we create new free elements
1068 : : * which might have been marked as "freed" with an earlier
1069 : : * `asan_set_freezone` call. So, if there is an area past the
1070 : : * unmapped space that was marked as freezone for ASan, we need
1071 : : * to mark the malloc header as available.
1072 : : */
1073 : : if (asan_end > aligned_end)
1074 : : asan_set_zone(aligned_end, MALLOC_ELEM_HEADER_LEN, 0x00);
1075 : :
1076 : : /* if there's space before unmapped memory, mark as available */
1077 : : if (asan_ptr < aligned_start)
1078 : : asan_set_zone(aligned_trailer, MALLOC_ELEM_TRAILER_LEN, 0x00);
1079 : : }
1080 : :
1081 : : rte_spinlock_unlock(&(heap->lock));
1082 : 89329 : return ret;
1083 : : }
1084 : :
1085 : : int
1086 : 15 : malloc_heap_resize(struct malloc_elem *elem, size_t size)
1087 : : {
1088 : : int ret;
1089 : :
1090 [ + - + - ]: 15 : if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
1091 : : return -1;
1092 : :
1093 : 15 : rte_spinlock_lock(&(elem->heap->lock));
1094 : :
1095 : 15 : ret = malloc_elem_resize(elem, size);
1096 : :
1097 : 15 : rte_spinlock_unlock(&(elem->heap->lock));
1098 : :
1099 : 15 : return ret;
1100 : : }
1101 : :
1102 : : /*
1103 : : * Function to retrieve data for a given heap
1104 : : */
1105 : : int
1106 : 110 : malloc_heap_get_stats(struct malloc_heap *heap,
1107 : : struct rte_malloc_socket_stats *socket_stats)
1108 : : {
1109 : : size_t idx;
1110 : : struct malloc_elem *elem;
1111 : :
1112 : 110 : rte_spinlock_lock(&heap->lock);
1113 : :
1114 : : /* Initialise variables for heap */
1115 : 110 : socket_stats->free_count = 0;
1116 : 110 : socket_stats->heap_freesz_bytes = 0;
1117 : 110 : socket_stats->greatest_free_size = 0;
1118 : :
1119 : : /* Iterate through free list */
1120 [ + + ]: 1540 : for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
1121 : 1430 : for (elem = LIST_FIRST(&heap->free_head[idx]);
1122 [ + + ]: 1444 : !!elem; elem = LIST_NEXT(elem, free_list))
1123 : : {
1124 : 14 : socket_stats->free_count++;
1125 : 14 : socket_stats->heap_freesz_bytes += elem->size;
1126 [ + - ]: 14 : if (elem->size > socket_stats->greatest_free_size)
1127 : 14 : socket_stats->greatest_free_size = elem->size;
1128 : : }
1129 : : }
1130 : : /* Get stats on overall heap and allocated memory on this heap */
1131 : 110 : socket_stats->heap_totalsz_bytes = heap->total_size;
1132 : 110 : socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes -
1133 : 110 : socket_stats->heap_freesz_bytes);
1134 : 110 : socket_stats->alloc_count = heap->alloc_count;
1135 : :
1136 : : rte_spinlock_unlock(&heap->lock);
1137 : 110 : return 0;
1138 : : }
1139 : :
1140 : : /*
1141 : : * Function to retrieve data for a given heap
1142 : : */
1143 : : void
1144 : 0 : malloc_heap_dump(struct malloc_heap *heap, FILE *f)
1145 : : {
1146 : : struct malloc_elem *elem;
1147 : :
1148 : 0 : rte_spinlock_lock(&heap->lock);
1149 : :
1150 : 0 : fprintf(f, "Heap size: 0x%zx\n", heap->total_size);
1151 : 0 : fprintf(f, "Heap alloc count: %u\n", heap->alloc_count);
1152 : :
1153 : 0 : elem = heap->first;
1154 [ # # ]: 0 : while (elem) {
1155 : 0 : malloc_elem_dump(elem, f);
1156 : 0 : elem = elem->next;
1157 : : }
1158 : :
1159 : : rte_spinlock_unlock(&heap->lock);
1160 : 0 : }
1161 : :
1162 : : static int
1163 : 1 : destroy_elem(struct malloc_elem *elem, size_t len)
1164 : : {
1165 : 1 : struct malloc_heap *heap = elem->heap;
1166 : :
1167 : : /* notify all subscribers that a memory area is going to be removed */
1168 : 1 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len);
1169 : :
1170 : : /* this element can be removed */
1171 : 1 : malloc_elem_free_list_remove(elem);
1172 : 1 : malloc_elem_hide_region(elem, elem, len);
1173 : :
1174 : 1 : heap->total_size -= len;
1175 : :
1176 : : memset(elem, 0, sizeof(*elem));
1177 : :
1178 : 1 : return 0;
1179 : : }
1180 : :
1181 : : struct rte_memseg_list *
1182 : 1 : malloc_heap_create_external_seg(void *va_addr, rte_iova_t iova_addrs[],
1183 : : unsigned int n_pages, size_t page_sz, const char *seg_name,
1184 : : unsigned int socket_id)
1185 : : {
1186 : 1 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1187 : : char fbarray_name[RTE_FBARRAY_NAME_LEN];
1188 : : struct rte_memseg_list *msl = NULL;
1189 : : struct rte_fbarray *arr;
1190 : 1 : size_t seg_len = n_pages * page_sz;
1191 : : unsigned int i;
1192 : :
1193 : : /* first, find a free memseg list */
1194 [ + - ]: 9 : for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
1195 : 9 : struct rte_memseg_list *tmp = &mcfg->memsegs[i];
1196 [ + + ]: 9 : if (tmp->base_va == NULL) {
1197 : : msl = tmp;
1198 : : break;
1199 : : }
1200 : : }
1201 [ - + ]: 1 : if (msl == NULL) {
1202 : 0 : EAL_LOG(ERR, "Couldn't find empty memseg list");
1203 : 0 : rte_errno = ENOSPC;
1204 : 0 : return NULL;
1205 : : }
1206 : :
1207 : : snprintf(fbarray_name, sizeof(fbarray_name), "%s_%p",
1208 : : seg_name, va_addr);
1209 : :
1210 : : /* create the backing fbarray */
1211 [ - + ]: 1 : if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages,
1212 : : sizeof(struct rte_memseg)) < 0) {
1213 : 0 : EAL_LOG(ERR, "Couldn't create fbarray backing the memseg list");
1214 : 0 : return NULL;
1215 : : }
1216 : : arr = &msl->memseg_arr;
1217 : :
1218 : : /* fbarray created, fill it up */
1219 [ + + ]: 3 : for (i = 0; i < n_pages; i++) {
1220 : : struct rte_memseg *ms;
1221 : :
1222 : 2 : rte_fbarray_set_used(arr, i);
1223 : 2 : ms = rte_fbarray_get(arr, i);
1224 : 2 : ms->addr = RTE_PTR_ADD(va_addr, i * page_sz);
1225 [ - + ]: 2 : ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i];
1226 : 2 : ms->hugepage_sz = page_sz;
1227 : 2 : ms->len = page_sz;
1228 : 2 : ms->nchannel = rte_memory_get_nchannel();
1229 : 2 : ms->nrank = rte_memory_get_nrank();
1230 : 2 : ms->socket_id = socket_id;
1231 : : }
1232 : :
1233 : : /* set up the memseg list */
1234 : 1 : msl->base_va = va_addr;
1235 : 1 : msl->page_sz = page_sz;
1236 : 1 : msl->socket_id = socket_id;
1237 : 1 : msl->len = seg_len;
1238 : 1 : msl->version = 0;
1239 : 1 : msl->external = 1;
1240 : :
1241 : 1 : return msl;
1242 : : }
1243 : :
1244 : : struct extseg_walk_arg {
1245 : : void *va_addr;
1246 : : size_t len;
1247 : : struct rte_memseg_list *msl;
1248 : : };
1249 : :
1250 : : static int
1251 : 9 : extseg_walk(const struct rte_memseg_list *msl, void *arg)
1252 : : {
1253 : 9 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1254 : : struct extseg_walk_arg *wa = arg;
1255 : :
1256 [ + + + - ]: 9 : if (msl->base_va == wa->va_addr && msl->len == wa->len) {
1257 : : unsigned int found_idx;
1258 : :
1259 : : /* msl is const */
1260 : 1 : found_idx = msl - mcfg->memsegs;
1261 : 1 : wa->msl = &mcfg->memsegs[found_idx];
1262 : 1 : return 1;
1263 : : }
1264 : : return 0;
1265 : : }
1266 : :
1267 : : struct rte_memseg_list *
1268 : 1 : malloc_heap_find_external_seg(void *va_addr, size_t len)
1269 : : {
1270 : : struct extseg_walk_arg wa;
1271 : : int res;
1272 : :
1273 : 1 : wa.va_addr = va_addr;
1274 : 1 : wa.len = len;
1275 : :
1276 : 1 : res = rte_memseg_list_walk_thread_unsafe(extseg_walk, &wa);
1277 : :
1278 [ - + ]: 1 : if (res != 1) {
1279 : : /* 0 means nothing was found, -1 shouldn't happen */
1280 [ # # ]: 0 : if (res == 0)
1281 : 0 : rte_errno = ENOENT;
1282 : 0 : return NULL;
1283 : : }
1284 : 1 : return wa.msl;
1285 : : }
1286 : :
1287 : : int
1288 : 1 : malloc_heap_destroy_external_seg(struct rte_memseg_list *msl)
1289 : : {
1290 : : /* destroy the fbarray backing this memory */
1291 [ + - ]: 1 : if (rte_fbarray_destroy(&msl->memseg_arr) < 0)
1292 : : return -1;
1293 : :
1294 : : /* reset the memseg list */
1295 : : memset(msl, 0, sizeof(*msl));
1296 : :
1297 : 1 : return 0;
1298 : : }
1299 : :
1300 : : int
1301 : 1 : malloc_heap_add_external_memory(struct malloc_heap *heap,
1302 : : struct rte_memseg_list *msl)
1303 : : {
1304 : : /* erase contents of new memory */
1305 : 1 : memset(msl->base_va, 0, msl->len);
1306 : :
1307 : : /* now, add newly minted memory to the malloc heap */
1308 : 1 : malloc_heap_add_memory(heap, msl, msl->base_va, msl->len, false);
1309 : :
1310 : 1 : heap->total_size += msl->len;
1311 : :
1312 : : /* all done! */
1313 : 1 : EAL_LOG(DEBUG, "Added segment for heap %s starting at %p",
1314 : : heap->name, msl->base_va);
1315 : :
1316 : : /* notify all subscribers that a new memory area has been added */
1317 : 1 : eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
1318 : 1 : msl->base_va, msl->len);
1319 : :
1320 : 1 : return 0;
1321 : : }
1322 : :
1323 : : int
1324 : 1 : malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
1325 : : size_t len)
1326 : : {
1327 : 1 : struct malloc_elem *elem = heap->first;
1328 : :
1329 : : /* find element with specified va address */
1330 [ - + ]: 1 : while (elem != NULL && elem != va_addr) {
1331 : 0 : elem = elem->next;
1332 : : /* stop if we've blown past our VA */
1333 [ # # ]: 0 : if (elem > (struct malloc_elem *)va_addr) {
1334 : 0 : rte_errno = ENOENT;
1335 : 0 : return -1;
1336 : : }
1337 : : }
1338 : : /* check if element was found */
1339 [ + - - + ]: 1 : if (elem == NULL || elem->msl->len != len) {
1340 : 0 : rte_errno = ENOENT;
1341 : 0 : return -1;
1342 : : }
1343 : : /* if element's size is not equal to segment len, segment is busy */
1344 [ + - - + ]: 1 : if (elem->state == ELEM_BUSY || elem->size != len) {
1345 : 0 : rte_errno = EBUSY;
1346 : 0 : return -1;
1347 : : }
1348 : 1 : return destroy_elem(elem, len);
1349 : : }
1350 : :
1351 : : int
1352 : 1 : malloc_heap_create(struct malloc_heap *heap, const char *heap_name)
1353 : : {
1354 : 1 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1355 : 1 : uint32_t next_socket_id = mcfg->next_socket_id;
1356 : :
1357 : : /* prevent overflow. did you really create 2 billion heaps??? */
1358 [ - + ]: 1 : if (next_socket_id > INT32_MAX) {
1359 : 0 : EAL_LOG(ERR, "Cannot assign new socket ID's");
1360 : 0 : rte_errno = ENOSPC;
1361 : 0 : return -1;
1362 : : }
1363 : :
1364 : : /* initialize empty heap */
1365 : 1 : heap->alloc_count = 0;
1366 : 1 : heap->first = NULL;
1367 : 1 : heap->last = NULL;
1368 : 1 : LIST_INIT(heap->free_head);
1369 : : rte_spinlock_init(&heap->lock);
1370 : 1 : heap->total_size = 0;
1371 : 1 : heap->socket_id = next_socket_id;
1372 : :
1373 : : /* we hold a global mem hotplug writelock, so it's safe to increment */
1374 : 1 : mcfg->next_socket_id++;
1375 : :
1376 : : /* set up name */
1377 : 1 : strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
1378 : 1 : return 0;
1379 : : }
1380 : :
1381 : : int
1382 : 1 : malloc_heap_destroy(struct malloc_heap *heap)
1383 : : {
1384 [ - + ]: 1 : if (heap->alloc_count != 0) {
1385 : 0 : EAL_LOG(ERR, "Heap is still in use");
1386 : 0 : rte_errno = EBUSY;
1387 : 0 : return -1;
1388 : : }
1389 [ + - - + ]: 1 : if (heap->first != NULL || heap->last != NULL) {
1390 : 0 : EAL_LOG(ERR, "Heap still contains memory segments");
1391 : 0 : rte_errno = EBUSY;
1392 : 0 : return -1;
1393 : : }
1394 [ - + ]: 1 : if (heap->total_size != 0)
1395 : 0 : EAL_LOG(ERR, "Total size not zero, heap is likely corrupt");
1396 : :
1397 : : /* Reset all of the heap but the (hold) lock so caller can release it. */
1398 : : RTE_BUILD_BUG_ON(offsetof(struct malloc_heap, lock) != 0);
1399 : 1 : memset(RTE_PTR_ADD(heap, sizeof(heap->lock)), 0,
1400 : : sizeof(*heap) - sizeof(heap->lock));
1401 : :
1402 : 1 : return 0;
1403 : : }
1404 : :
1405 : : int
1406 : 164 : rte_eal_malloc_heap_init(void)
1407 : : {
1408 : 164 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
1409 : : unsigned int i;
1410 : : const struct internal_config *internal_conf =
1411 : 164 : eal_get_internal_configuration();
1412 : :
1413 [ - + ]: 164 : if (internal_conf->match_allocations)
1414 : 0 : EAL_LOG(DEBUG, "Hugepages will be freed exactly as allocated.");
1415 : :
1416 [ + + ]: 164 : if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
1417 : : /* assign min socket ID to external heaps */
1418 : 144 : mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID;
1419 : :
1420 : : /* assign names to default DPDK heaps */
1421 [ + + ]: 432 : for (i = 0; i < rte_socket_count(); i++) {
1422 : : struct malloc_heap *heap = &mcfg->malloc_heaps[i];
1423 : : char heap_name[RTE_HEAP_NAME_MAX_LEN];
1424 : 288 : int socket_id = rte_socket_id_by_idx(i);
1425 : :
1426 : : snprintf(heap_name, sizeof(heap_name),
1427 : : "socket_%i", socket_id);
1428 : 288 : strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
1429 : 288 : heap->socket_id = socket_id;
1430 : : }
1431 : : }
1432 : :
1433 [ - + ]: 164 : if (register_mp_requests()) {
1434 : 0 : EAL_LOG(ERR, "Couldn't register malloc multiprocess actions");
1435 : 0 : return -1;
1436 : : }
1437 : :
1438 : : return 0;
1439 : : }
1440 : :
1441 : 164 : int rte_eal_malloc_heap_populate(void)
1442 : : {
1443 : : /* mem hotplug is unlocked here. it's safe for primary as no requests can
1444 : : * even come before primary itself is fully initialized, and secondaries
1445 : : * do not need to initialize the heap.
1446 : : */
1447 : :
1448 : : /* secondary process does not need to initialize anything */
1449 [ + + ]: 164 : if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1450 : : return 0;
1451 : :
1452 : : /* add all IOVA-contiguous areas to the heap */
1453 : 144 : return rte_memseg_contig_walk(malloc_add_seg, NULL);
1454 : : }
1455 : :
1456 : : void
1457 : 235 : rte_eal_malloc_heap_cleanup(void)
1458 : : {
1459 : 235 : unregister_mp_requests();
1460 : 235 : }
|