Branch data Line data Source code
1 : : /* SPDX-License-Identifier: BSD-3-Clause
2 : : * Copyright(c) 2010-2014 Intel Corporation.
3 : : * Copyright(c) 2013 6WIND S.A.
4 : : */
5 : :
6 : : #include <inttypes.h>
7 : : #include <stdlib.h>
8 : : #include <string.h>
9 : :
10 : : #include <rte_log.h>
11 : : #include <rte_string_fns.h>
12 : :
13 : : #include "eal_internal_cfg.h"
14 : : #include "eal_memalloc.h"
15 : : #include "eal_memcfg.h"
16 : : #include "eal_private.h"
17 : :
18 : : /** @file Functions common to EALs that support dynamic memory allocation. */
19 : :
20 : : int
21 : 156 : eal_dynmem_memseg_lists_init(void)
22 : : {
23 : 156 : struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
24 : : struct memtype {
25 : : uint64_t page_sz;
26 : : int socket_id;
27 : : } *memtypes = NULL;
28 : : int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */
29 : : struct rte_memseg_list *msl;
30 : : uint64_t max_mem, max_mem_per_type;
31 : : unsigned int max_seglists_per_type;
32 : : unsigned int n_memtypes, cur_type;
33 : : struct internal_config *internal_conf =
34 : 156 : eal_get_internal_configuration();
35 : :
36 : : /* no-huge does not need this at all */
37 [ + + ]: 156 : if (internal_conf->no_hugetlbfs)
38 : : return 0;
39 : :
40 : : /*
41 : : * figuring out amount of memory we're going to have is a long and very
42 : : * involved process. the basic element we're operating with is a memory
43 : : * type, defined as a combination of NUMA node ID and page size (so that
44 : : * e.g. 2 sockets with 2 page sizes yield 4 memory types in total).
45 : : *
46 : : * deciding amount of memory going towards each memory type is a
47 : : * balancing act between maximum segments per type, maximum memory per
48 : : * type, and number of detected NUMA nodes. the goal is to make sure
49 : : * each memory type gets at least one memseg list.
50 : : *
51 : : * the total amount of memory is limited by RTE_MAX_MEM_MB value.
52 : : *
53 : : * the total amount of memory per type is limited by either
54 : : * RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number
55 : : * of detected NUMA nodes. additionally, maximum number of segments per
56 : : * type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for
57 : : * smaller page sizes, it can take hundreds of thousands of segments to
58 : : * reach the above specified per-type memory limits.
59 : : *
60 : : * additionally, each type may have multiple memseg lists associated
61 : : * with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger
62 : : * page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones.
63 : : *
64 : : * the number of memseg lists per type is decided based on the above
65 : : * limits, and also taking number of detected NUMA nodes, to make sure
66 : : * that we don't run out of memseg lists before we populate all NUMA
67 : : * nodes with memory.
68 : : *
69 : : * we do this in three stages. first, we collect the number of types.
70 : : * then, we figure out memory constraints and populate the list of
71 : : * would-be memseg lists. then, we go ahead and allocate the memseg
72 : : * lists.
73 : : */
74 : :
75 : : /* create space for mem types */
76 : 57 : n_memtypes = internal_conf->num_hugepage_sizes * rte_socket_count();
77 : 57 : memtypes = calloc(n_memtypes, sizeof(*memtypes));
78 [ - + ]: 57 : if (memtypes == NULL) {
79 : 0 : EAL_LOG(ERR, "Cannot allocate space for memory types");
80 : 0 : return -1;
81 : : }
82 : :
83 : : /* populate mem types */
84 : : cur_type = 0;
85 [ + + ]: 117 : for (hpi_idx = 0; hpi_idx < (int) internal_conf->num_hugepage_sizes;
86 : 60 : hpi_idx++) {
87 : : struct hugepage_info *hpi;
88 : : uint64_t hugepage_sz;
89 : :
90 : : hpi = &internal_conf->hugepage_info[hpi_idx];
91 : 60 : hugepage_sz = hpi->hugepage_sz;
92 : :
93 [ + + ]: 180 : for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) {
94 : 120 : int socket_id = rte_socket_id_by_idx(i);
95 : :
96 : : #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
97 : : /* we can still sort pages by socket in legacy mode */
98 : : if (!internal_conf->legacy_mem && socket_id > 0)
99 : : break;
100 : : #endif
101 : 120 : memtypes[cur_type].page_sz = hugepage_sz;
102 : 120 : memtypes[cur_type].socket_id = socket_id;
103 : :
104 : 120 : EAL_LOG(DEBUG, "Detected memory type: "
105 : : "socket_id:%u hugepage_sz:%" PRIu64,
106 : : socket_id, hugepage_sz);
107 : : }
108 : : }
109 : : /* number of memtypes could have been lower due to no NUMA support */
110 : : n_memtypes = cur_type;
111 : :
112 : : /* set up limits for types */
113 : : max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
114 : 57 : max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20,
115 : : max_mem / n_memtypes);
116 : : /*
117 : : * limit maximum number of segment lists per type to ensure there's
118 : : * space for memseg lists for all NUMA nodes with all page sizes
119 : : */
120 : 57 : max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes;
121 : :
122 [ - + ]: 57 : if (max_seglists_per_type == 0) {
123 : 0 : EAL_LOG(ERR, "Cannot accommodate all memory types, please increase RTE_MAX_MEMSEG_LISTS");
124 : 0 : goto out;
125 : : }
126 : :
127 : : /* go through all mem types and create segment lists */
128 : : msl_idx = 0;
129 [ + + ]: 177 : for (cur_type = 0; cur_type < n_memtypes; cur_type++) {
130 : : unsigned int cur_seglist, n_seglists, n_segs;
131 : : unsigned int max_segs_per_type, max_segs_per_list;
132 : 120 : struct memtype *type = &memtypes[cur_type];
133 : : uint64_t max_mem_per_list, pagesz;
134 : : int socket_id;
135 : :
136 : 120 : pagesz = type->page_sz;
137 : 120 : socket_id = type->socket_id;
138 : :
139 : : /*
140 : : * we need to create segment lists for this type. we must take
141 : : * into account the following things:
142 : : *
143 : : * 1. total amount of memory we can use for this memory type
144 : : * 2. total amount of memory per memseg list allowed
145 : : * 3. number of segments needed to fit the amount of memory
146 : : * 4. number of segments allowed per type
147 : : * 5. number of segments allowed per memseg list
148 : : * 6. number of memseg lists we are allowed to take up
149 : : */
150 : :
151 : : /* calculate how much segments we will need in total */
152 : 120 : max_segs_per_type = max_mem_per_type / pagesz;
153 : : /* limit number of segments to maximum allowed per type */
154 : 120 : max_segs_per_type = RTE_MIN(max_segs_per_type,
155 : : (unsigned int)RTE_MAX_MEMSEG_PER_TYPE);
156 : : /* limit number of segments to maximum allowed per list */
157 : 120 : max_segs_per_list = RTE_MIN(max_segs_per_type,
158 : : (unsigned int)RTE_MAX_MEMSEG_PER_LIST);
159 : :
160 : : /* calculate how much memory we can have per segment list */
161 : 120 : max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz,
162 : : (uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20);
163 : :
164 : : /* calculate how many segments each segment list will have */
165 : 120 : n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz);
166 : :
167 : : /* calculate how many segment lists we can have */
168 : 120 : n_seglists = RTE_MIN(max_segs_per_type / n_segs,
169 : : max_mem_per_type / max_mem_per_list);
170 : :
171 : : /* limit number of segment lists according to our maximum */
172 : 120 : n_seglists = RTE_MIN(n_seglists, max_seglists_per_type);
173 : :
174 : 120 : EAL_LOG(DEBUG, "Creating %i segment lists: "
175 : : "n_segs:%i socket_id:%i hugepage_sz:%" PRIu64,
176 : : n_seglists, n_segs, socket_id, pagesz);
177 : :
178 : : /* create all segment lists */
179 [ + + ]: 588 : for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) {
180 [ - + ]: 468 : if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
181 : 0 : EAL_LOG(ERR,
182 : : "No more space in memseg lists, please increase RTE_MAX_MEMSEG_LISTS");
183 : 0 : goto out;
184 : : }
185 : 468 : msl = &mcfg->memsegs[msl_idx++];
186 : :
187 [ - + ]: 468 : if (eal_memseg_list_init(msl, pagesz, n_segs,
188 : : socket_id, cur_seglist, true))
189 : 0 : goto out;
190 : :
191 [ - + ]: 468 : if (eal_memseg_list_alloc(msl, 0)) {
192 : 0 : EAL_LOG(ERR, "Cannot allocate VA space for memseg list");
193 : 0 : goto out;
194 : : }
195 : : }
196 : : }
197 : : /* we're successful */
198 : : ret = 0;
199 : 57 : out:
200 : 57 : free(memtypes);
201 : 57 : return ret;
202 : : }
203 : :
204 : : static int __rte_unused
205 : : hugepage_count_walk(const struct rte_memseg_list *msl, void *arg)
206 : : {
207 : : struct hugepage_info *hpi = arg;
208 : :
209 : : if (msl->page_sz != hpi->hugepage_sz)
210 : : return 0;
211 : :
212 : : hpi->num_pages[msl->socket_id] += msl->memseg_arr.len;
213 : : return 0;
214 : : }
215 : :
216 : : static int
217 : 0 : limits_callback(int socket_id, size_t cur_limit, size_t new_len)
218 : : {
219 : : RTE_SET_USED(socket_id);
220 : : RTE_SET_USED(cur_limit);
221 : : RTE_SET_USED(new_len);
222 : 0 : return -1;
223 : : }
224 : :
225 : : int
226 : 55 : eal_dynmem_hugepage_init(void)
227 : : {
228 : : struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
229 : : uint64_t memory[RTE_MAX_NUMA_NODES];
230 : : int hp_sz_idx, socket_id;
231 : : struct internal_config *internal_conf =
232 : 55 : eal_get_internal_configuration();
233 : :
234 : : memset(used_hp, 0, sizeof(used_hp));
235 : :
236 : 55 : for (hp_sz_idx = 0;
237 [ + + ]: 113 : hp_sz_idx < (int) internal_conf->num_hugepage_sizes;
238 : 58 : hp_sz_idx++) {
239 : : #ifndef RTE_ARCH_64
240 : : struct hugepage_info dummy;
241 : : unsigned int i;
242 : : #endif
243 : : /* also initialize used_hp hugepage sizes in used_hp */
244 : : struct hugepage_info *hpi;
245 : : hpi = &internal_conf->hugepage_info[hp_sz_idx];
246 : 58 : used_hp[hp_sz_idx].hugepage_sz = hpi->hugepage_sz;
247 : :
248 : : #ifndef RTE_ARCH_64
249 : : /* for 32-bit, limit number of pages on socket to whatever we've
250 : : * preallocated, as we cannot allocate more.
251 : : */
252 : : memset(&dummy, 0, sizeof(dummy));
253 : : dummy.hugepage_sz = hpi->hugepage_sz;
254 : : /* memory_hotplug_lock is held during initialization, so it's
255 : : * safe to call thread-unsafe version.
256 : : */
257 : : if (rte_memseg_list_walk_thread_unsafe(hugepage_count_walk, &dummy) < 0)
258 : : return -1;
259 : :
260 : : for (i = 0; i < RTE_DIM(dummy.num_pages); i++) {
261 : : hpi->num_pages[i] = RTE_MIN(hpi->num_pages[i],
262 : : dummy.num_pages[i]);
263 : : }
264 : : #endif
265 : : }
266 : :
267 : : /* make a copy of socket_mem, needed for balanced allocation. */
268 [ + + ]: 1815 : for (hp_sz_idx = 0; hp_sz_idx < RTE_MAX_NUMA_NODES; hp_sz_idx++)
269 : 1760 : memory[hp_sz_idx] = internal_conf->socket_mem[hp_sz_idx];
270 : :
271 : : /* calculate final number of pages */
272 [ + + ]: 55 : if (eal_dynmem_calc_num_pages_per_socket(memory,
273 : 55 : internal_conf->hugepage_info, used_hp,
274 : : internal_conf->num_hugepage_sizes) < 0)
275 : : return -1;
276 : :
277 : : for (hp_sz_idx = 0;
278 [ + + ]: 111 : hp_sz_idx < (int)internal_conf->num_hugepage_sizes;
279 : 57 : hp_sz_idx++) {
280 [ + + ]: 1881 : for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES;
281 : 1824 : socket_id++) {
282 : : struct rte_memseg **pages;
283 : : struct hugepage_info *hpi = &used_hp[hp_sz_idx];
284 : 1824 : unsigned int num_pages = hpi->num_pages[socket_id];
285 : : unsigned int num_pages_alloc;
286 : :
287 [ + + ]: 1824 : if (num_pages == 0)
288 : 1812 : continue;
289 : :
290 : 12 : EAL_LOG(DEBUG,
291 : : "Allocating %u pages of size %" PRIu64 "M "
292 : : "on socket %i",
293 : : num_pages, hpi->hugepage_sz >> 20, socket_id);
294 : :
295 : : /* we may not be able to allocate all pages in one go,
296 : : * because we break up our memory map into multiple
297 : : * memseg lists. therefore, try allocating multiple
298 : : * times and see if we can get the desired number of
299 : : * pages from multiple allocations.
300 : : */
301 : :
302 : : num_pages_alloc = 0;
303 : : do {
304 : : int i, cur_pages, needed;
305 : :
306 : 12 : needed = num_pages - num_pages_alloc;
307 : :
308 : 12 : pages = malloc(sizeof(*pages) * needed);
309 [ - + ]: 12 : if (pages == NULL) {
310 : 0 : EAL_LOG(ERR, "Failed to malloc pages");
311 : 0 : return -1;
312 : : }
313 : :
314 : : /* do not request exact number of pages */
315 : 12 : cur_pages = eal_memalloc_alloc_seg_bulk(pages,
316 : : needed, hpi->hugepage_sz,
317 : : socket_id, false);
318 [ - + ]: 12 : if (cur_pages <= 0) {
319 : 0 : free(pages);
320 : 0 : return -1;
321 : : }
322 : :
323 : : /* mark preallocated pages as unfreeable */
324 [ + + ]: 104 : for (i = 0; i < cur_pages; i++) {
325 : 92 : struct rte_memseg *ms = pages[i];
326 : 92 : ms->flags |=
327 : : RTE_MEMSEG_FLAG_DO_NOT_FREE;
328 : : }
329 : 12 : free(pages);
330 : :
331 : 12 : num_pages_alloc += cur_pages;
332 [ - + ]: 12 : } while (num_pages_alloc != num_pages);
333 : : }
334 : : }
335 : :
336 : : /* if socket limits were specified, set them */
337 [ - + ]: 54 : if (internal_conf->force_socket_limits) {
338 : : unsigned int i;
339 [ # # ]: 0 : for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
340 : 0 : uint64_t limit = internal_conf->socket_limit[i];
341 [ # # ]: 0 : if (limit == 0)
342 : 0 : continue;
343 [ # # ]: 0 : if (rte_mem_alloc_validator_register("socket-limit",
344 : : limits_callback, i, limit))
345 : 0 : EAL_LOG(ERR, "Failed to register socket limits validator callback");
346 : : }
347 : : }
348 : : return 0;
349 : : }
350 : :
351 : : __rte_unused /* function is unused on 32-bit builds */
352 : : static inline uint64_t
353 : 12 : get_socket_mem_size(int socket)
354 : : {
355 : : uint64_t size = 0;
356 : : unsigned int i;
357 : : struct internal_config *internal_conf =
358 : 12 : eal_get_internal_configuration();
359 : :
360 [ + + ]: 27 : for (i = 0; i < internal_conf->num_hugepage_sizes; i++) {
361 : : struct hugepage_info *hpi = &internal_conf->hugepage_info[i];
362 : 15 : size += hpi->hugepage_sz * hpi->num_pages[socket];
363 : : }
364 : :
365 : 12 : return size;
366 : : }
367 : :
368 : : int
369 : 57 : eal_dynmem_calc_num_pages_per_socket(
370 : : uint64_t *memory, struct hugepage_info *hp_info,
371 : : struct hugepage_info *hp_used, unsigned int num_hp_info)
372 : : {
373 : : unsigned int socket, j, i = 0;
374 : : unsigned int requested, available;
375 : : int total_num_pages = 0;
376 : : uint64_t remaining_mem, cur_mem;
377 : : const struct internal_config *internal_conf =
378 : 57 : eal_get_internal_configuration();
379 : 57 : uint64_t total_mem = internal_conf->memory;
380 : :
381 [ + - ]: 57 : if (num_hp_info == 0)
382 : : return -1;
383 : :
384 : : /* if specific memory amounts per socket weren't requested */
385 [ + + ]: 57 : if (internal_conf->force_sockets == 0) {
386 : : size_t total_size;
387 : : #ifdef RTE_ARCH_64
388 : : int cpu_per_socket[RTE_MAX_NUMA_NODES];
389 : : size_t default_size;
390 : : unsigned int lcore_id;
391 : :
392 : : /* Compute number of cores per socket */
393 : : memset(cpu_per_socket, 0, sizeof(cpu_per_socket));
394 [ + + ]: 144 : RTE_LCORE_FOREACH(lcore_id) {
395 : 90 : cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++;
396 : : }
397 : :
398 : : /*
399 : : * Automatically spread requested memory amongst detected
400 : : * sockets according to number of cores from CPU mask present
401 : : * on each socket.
402 : : */
403 : 54 : total_size = internal_conf->memory;
404 [ + + ]: 66 : for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
405 : 12 : socket++) {
406 : :
407 : : /* Set memory amount per socket */
408 : 24 : default_size = internal_conf->memory *
409 : 12 : cpu_per_socket[socket] / rte_lcore_count();
410 : :
411 : : /* Limit to maximum available memory on socket */
412 : 12 : default_size = RTE_MIN(
413 : : default_size, get_socket_mem_size(socket));
414 : :
415 : : /* Update sizes */
416 : 12 : memory[socket] = default_size;
417 : 12 : total_size -= default_size;
418 : : }
419 : :
420 : : /*
421 : : * If some memory is remaining, try to allocate it by getting
422 : : * all available memory from sockets, one after the other.
423 : : */
424 [ - + ]: 54 : for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
425 : 0 : socket++) {
426 : : /* take whatever is available */
427 : 0 : default_size = RTE_MIN(
428 : : get_socket_mem_size(socket) - memory[socket],
429 : : total_size);
430 : :
431 : : /* Update sizes */
432 : 0 : memory[socket] += default_size;
433 : 0 : total_size -= default_size;
434 : : }
435 : : #else
436 : : /* in 32-bit mode, allocate all of the memory only on main
437 : : * lcore socket
438 : : */
439 : : total_size = internal_conf->memory;
440 : : for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0;
441 : : socket++) {
442 : : struct rte_config *cfg = rte_eal_get_configuration();
443 : : unsigned int main_lcore_socket;
444 : :
445 : : main_lcore_socket =
446 : : rte_lcore_to_socket_id(cfg->main_lcore);
447 : :
448 : : if (main_lcore_socket != socket)
449 : : continue;
450 : :
451 : : /* Update sizes */
452 : : memory[socket] = total_size;
453 : : break;
454 : : }
455 : : #endif
456 : : }
457 : :
458 [ + + ]: 73 : for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0;
459 : 16 : socket++) {
460 : : /* skips if the memory on specific socket wasn't requested */
461 [ + + + - ]: 21 : for (i = 0; i < num_hp_info && memory[socket] != 0; i++) {
462 : 20 : rte_strscpy(hp_used[i].hugedir, hp_info[i].hugedir,
463 : : sizeof(hp_used[i].hugedir));
464 : 20 : hp_used[i].num_pages[socket] = RTE_MIN(
465 : : memory[socket] / hp_info[i].hugepage_sz,
466 : : hp_info[i].num_pages[socket]);
467 : :
468 : 20 : cur_mem = hp_used[i].num_pages[socket] *
469 : 20 : hp_used[i].hugepage_sz;
470 : :
471 : 20 : memory[socket] -= cur_mem;
472 : 20 : total_mem -= cur_mem;
473 : :
474 : 20 : total_num_pages += hp_used[i].num_pages[socket];
475 : :
476 : : /* check if we have met all memory requests */
477 [ + + ]: 20 : if (memory[socket] == 0)
478 : : break;
479 : :
480 : : /* Check if we have any more pages left at this size,
481 : : * if so, move on to next size.
482 : : */
483 : 8 : if (hp_used[i].num_pages[socket] ==
484 [ + - ]: 4 : hp_info[i].num_pages[socket])
485 : 4 : continue;
486 : : /* At this point we know that there are more pages
487 : : * available that are bigger than the memory we want,
488 : : * so lets see if we can get enough from other page
489 : : * sizes.
490 : : */
491 : : remaining_mem = 0;
492 [ # # ]: 0 : for (j = i+1; j < num_hp_info; j++)
493 : 0 : remaining_mem += hp_info[j].hugepage_sz *
494 : 0 : hp_info[j].num_pages[socket];
495 : :
496 : : /* Is there enough other memory?
497 : : * If not, allocate another page and quit.
498 : : */
499 [ # # ]: 0 : if (remaining_mem < memory[socket]) {
500 : 0 : cur_mem = RTE_MIN(
501 : : memory[socket], hp_info[i].hugepage_sz);
502 : 0 : memory[socket] -= cur_mem;
503 : 0 : total_mem -= cur_mem;
504 : 0 : hp_used[i].num_pages[socket]++;
505 : 0 : total_num_pages++;
506 : 0 : break; /* we are done with this socket*/
507 : : }
508 : : }
509 : :
510 : : /* if we didn't satisfy all memory requirements per socket */
511 [ + + ]: 17 : if (memory[socket] > 0 &&
512 [ + - ]: 1 : internal_conf->socket_mem[socket] != 0) {
513 : 1 : requested = internal_conf->socket_mem[socket] / 0x100000;
514 : 1 : available = requested - (memory[socket] / 0x100000);
515 : 1 : EAL_LOG(ERR, "Not enough memory available on socket %u! Requested: %uMB, available: %uMB",
516 : : socket, requested, available);
517 : 1 : return -1;
518 : : }
519 : : }
520 : :
521 : : /* if we didn't satisfy total memory requirements */
522 [ - + ]: 56 : if (total_mem > 0) {
523 : 0 : requested = internal_conf->memory / 0x100000;
524 : 0 : available = requested - (total_mem / 0x100000);
525 : 0 : EAL_LOG(ERR, "Not enough memory available! Requested: %uMB, available: %uMB",
526 : : requested, available);
527 : 0 : return -1;
528 : : }
529 : : return total_num_pages;
530 : : }
|