1/*-
2 *   BSD LICENSE
3 *
4 *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 *   All rights reserved.
6 *
7 *   Redistribution and use in source and binary forms, with or without
8 *   modification, are permitted provided that the following conditions
9 *   are met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 *       notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above copyright
14 *       notice, this list of conditions and the following disclaimer in
15 *       the documentation and/or other materials provided with the
16 *       distribution.
17 *     * Neither the name of Intel Corporation nor the names of its
18 *       contributors may be used to endorse or promote products derived
19 *       from this software without specific prior written permission.
20 *
21 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <stdlib.h>
35#include <stdio.h>
36#include <stdint.h>
37#include <stdarg.h>
38#include <inttypes.h>
39#include <string.h>
40#include <errno.h>
41#include <sys/queue.h>
42
43#include <rte_log.h>
44#include <rte_memory.h>
45#include <rte_memzone.h>
46#include <rte_eal.h>
47#include <rte_eal_memconfig.h>
48#include <rte_per_lcore.h>
49#include <rte_errno.h>
50#include <rte_string_fns.h>
51#include <rte_common.h>
52
53#include "malloc_heap.h"
54#include "malloc_elem.h"
55#include "eal_private.h"
56
57static inline const struct rte_memzone *
58memzone_lookup_thread_unsafe(const char *name)
59{
60	const struct rte_mem_config *mcfg;
61	const struct rte_memzone *mz;
62	unsigned i = 0;
63
64	/* get pointer to global configuration */
65	mcfg = rte_eal_get_configuration()->mem_config;
66
67	/*
68	 * the algorithm is not optimal (linear), but there are few
69	 * zones and this function should be called at init only
70	 */
71	for (i = 0; i < RTE_MAX_MEMZONE; i++) {
72		mz = &mcfg->memzone[i];
73		if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
74			return &mcfg->memzone[i];
75	}
76
77	return NULL;
78}
79
80static inline struct rte_memzone *
81get_next_free_memzone(void)
82{
83	struct rte_mem_config *mcfg;
84	unsigned i = 0;
85
86	/* get pointer to global configuration */
87	mcfg = rte_eal_get_configuration()->mem_config;
88
89	for (i = 0; i < RTE_MAX_MEMZONE; i++) {
90		if (mcfg->memzone[i].addr == NULL)
91			return &mcfg->memzone[i];
92	}
93
94	return NULL;
95}
96
97/* This function will return the greatest free block if a heap has been
98 * specified. If no heap has been specified, it will return the heap and
99 * length of the greatest free block available in all heaps */
100static size_t
101find_heap_max_free_elem(int *s, unsigned align)
102{
103	struct rte_mem_config *mcfg;
104	struct rte_malloc_socket_stats stats;
105	int i, socket = *s;
106	size_t len = 0;
107
108	/* get pointer to global configuration */
109	mcfg = rte_eal_get_configuration()->mem_config;
110
111	for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
112		if ((socket != SOCKET_ID_ANY) && (socket != i))
113			continue;
114
115		malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats);
116		if (stats.greatest_free_size > len) {
117			len = stats.greatest_free_size;
118			*s = i;
119		}
120	}
121
122	if (len < MALLOC_ELEM_OVERHEAD + align)
123		return 0;
124
125	return len - MALLOC_ELEM_OVERHEAD - align;
126}
127
128static const struct rte_memzone *
129memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
130		int socket_id, unsigned flags, unsigned align, unsigned bound)
131{
132	struct rte_memzone *mz;
133	struct rte_mem_config *mcfg;
134	size_t requested_len;
135	int socket, i;
136
137	/* get pointer to global configuration */
138	mcfg = rte_eal_get_configuration()->mem_config;
139
140	/* no more room in config */
141	if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
142		RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
143		rte_errno = ENOSPC;
144		return NULL;
145	}
146
147	if (strlen(name) > sizeof(mz->name) - 1) {
148		RTE_LOG(DEBUG, EAL, "%s(): memzone <%s>: name too long\n",
149			__func__, name);
150		rte_errno = ENAMETOOLONG;
151		return NULL;
152	}
153
154	/* zone already exist */
155	if ((memzone_lookup_thread_unsafe(name)) != NULL) {
156		RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
157			__func__, name);
158		rte_errno = EEXIST;
159		return NULL;
160	}
161
162	/* if alignment is not a power of two */
163	if (align && !rte_is_power_of_2(align)) {
164		RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
165				align);
166		rte_errno = EINVAL;
167		return NULL;
168	}
169
170	/* alignment less than cache size is not allowed */
171	if (align < RTE_CACHE_LINE_SIZE)
172		align = RTE_CACHE_LINE_SIZE;
173
174	/* align length on cache boundary. Check for overflow before doing so */
175	if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
176		rte_errno = EINVAL; /* requested size too big */
177		return NULL;
178	}
179
180	len += RTE_CACHE_LINE_MASK;
181	len &= ~((size_t) RTE_CACHE_LINE_MASK);
182
183	/* save minimal requested  length */
184	requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE,  len);
185
186	/* check that boundary condition is valid */
187	if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) {
188		rte_errno = EINVAL;
189		return NULL;
190	}
191
192	if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) {
193		rte_errno = EINVAL;
194		return NULL;
195	}
196
197	if (!rte_eal_has_hugepages())
198		socket_id = SOCKET_ID_ANY;
199
200	if (len == 0) {
201		if (bound != 0)
202			requested_len = bound;
203		else {
204			requested_len = find_heap_max_free_elem(&socket_id, align);
205			if (requested_len == 0) {
206				rte_errno = ENOMEM;
207				return NULL;
208			}
209		}
210	}
211
212	if (socket_id == SOCKET_ID_ANY)
213		socket = malloc_get_numa_socket();
214	else
215		socket = socket_id;
216
217	/* allocate memory on heap */
218	void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
219			requested_len, flags, align, bound);
220
221	if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
222		/* try other heaps */
223		for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
224			if (socket == i)
225				continue;
226
227			mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
228					NULL, requested_len, flags, align, bound);
229			if (mz_addr != NULL)
230				break;
231		}
232	}
233
234	if (mz_addr == NULL) {
235		rte_errno = ENOMEM;
236		return NULL;
237	}
238
239	const struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
240
241	/* fill the zone in config */
242	mz = get_next_free_memzone();
243
244	if (mz == NULL) {
245		RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room "
246				"in config!\n", __func__);
247		rte_errno = ENOSPC;
248		return NULL;
249	}
250
251	mcfg->memzone_cnt++;
252	snprintf(mz->name, sizeof(mz->name), "%s", name);
253	mz->phys_addr = rte_malloc_virt2phy(mz_addr);
254	mz->addr = mz_addr;
255	mz->len = (requested_len == 0 ? elem->size : requested_len);
256	mz->hugepage_sz = elem->ms->hugepage_sz;
257	mz->socket_id = elem->ms->socket_id;
258	mz->flags = 0;
259	mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;
260
261	return mz;
262}
263
264static const struct rte_memzone *
265rte_memzone_reserve_thread_safe(const char *name, size_t len,
266				int socket_id, unsigned flags, unsigned align,
267				unsigned bound)
268{
269	struct rte_mem_config *mcfg;
270	const struct rte_memzone *mz = NULL;
271
272	/* get pointer to global configuration */
273	mcfg = rte_eal_get_configuration()->mem_config;
274
275	rte_rwlock_write_lock(&mcfg->mlock);
276
277	mz = memzone_reserve_aligned_thread_unsafe(
278		name, len, socket_id, flags, align, bound);
279
280	rte_rwlock_write_unlock(&mcfg->mlock);
281
282	return mz;
283}
284
285/*
286 * Return a pointer to a correctly filled memzone descriptor (with a
287 * specified alignment and boundary). If the allocation cannot be done,
288 * return NULL.
289 */
290const struct rte_memzone *
291rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id,
292			    unsigned flags, unsigned align, unsigned bound)
293{
294	return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
295					       align, bound);
296}
297
298/*
299 * Return a pointer to a correctly filled memzone descriptor (with a
300 * specified alignment). If the allocation cannot be done, return NULL.
301 */
302const struct rte_memzone *
303rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id,
304			    unsigned flags, unsigned align)
305{
306	return rte_memzone_reserve_thread_safe(name, len, socket_id, flags,
307					       align, 0);
308}
309
310/*
311 * Return a pointer to a correctly filled memzone descriptor. If the
312 * allocation cannot be done, return NULL.
313 */
314const struct rte_memzone *
315rte_memzone_reserve(const char *name, size_t len, int socket_id,
316		    unsigned flags)
317{
318	return rte_memzone_reserve_thread_safe(name, len, socket_id,
319					       flags, RTE_CACHE_LINE_SIZE, 0);
320}
321
322int
323rte_memzone_free(const struct rte_memzone *mz)
324{
325	struct rte_mem_config *mcfg;
326	int ret = 0;
327	void *addr;
328	unsigned idx;
329
330	if (mz == NULL)
331		return -EINVAL;
332
333	mcfg = rte_eal_get_configuration()->mem_config;
334
335	rte_rwlock_write_lock(&mcfg->mlock);
336
337	idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
338	idx = idx / sizeof(struct rte_memzone);
339
340	addr = mcfg->memzone[idx].addr;
341	if (addr == NULL)
342		ret = -EINVAL;
343	else if (mcfg->memzone_cnt == 0) {
344		rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n",
345				__func__);
346	} else {
347		memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx]));
348		mcfg->memzone_cnt--;
349	}
350
351	rte_rwlock_write_unlock(&mcfg->mlock);
352
353	rte_free(addr);
354
355	return ret;
356}
357
358/*
359 * Lookup for the memzone identified by the given name
360 */
361const struct rte_memzone *
362rte_memzone_lookup(const char *name)
363{
364	struct rte_mem_config *mcfg;
365	const struct rte_memzone *memzone = NULL;
366
367	mcfg = rte_eal_get_configuration()->mem_config;
368
369	rte_rwlock_read_lock(&mcfg->mlock);
370
371	memzone = memzone_lookup_thread_unsafe(name);
372
373	rte_rwlock_read_unlock(&mcfg->mlock);
374
375	return memzone;
376}
377
378/* Dump all reserved memory zones on console */
379void
380rte_memzone_dump(FILE *f)
381{
382	struct rte_mem_config *mcfg;
383	unsigned i = 0;
384
385	/* get pointer to global configuration */
386	mcfg = rte_eal_get_configuration()->mem_config;
387
388	rte_rwlock_read_lock(&mcfg->mlock);
389	/* dump all zones */
390	for (i=0; i<RTE_MAX_MEMZONE; i++) {
391		if (mcfg->memzone[i].addr == NULL)
392			break;
393		fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx"
394		       ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
395		       mcfg->memzone[i].name,
396		       mcfg->memzone[i].phys_addr,
397		       mcfg->memzone[i].len,
398		       mcfg->memzone[i].addr,
399		       mcfg->memzone[i].socket_id,
400		       mcfg->memzone[i].flags);
401	}
402	rte_rwlock_read_unlock(&mcfg->mlock);
403}
404
405/*
406 * Init the memzone subsystem
407 */
408int
409rte_eal_memzone_init(void)
410{
411	struct rte_mem_config *mcfg;
412	const struct rte_memseg *memseg;
413
414	/* get pointer to global configuration */
415	mcfg = rte_eal_get_configuration()->mem_config;
416
417	/* secondary processes don't need to initialise anything */
418	if (rte_eal_process_type() == RTE_PROC_SECONDARY)
419		return 0;
420
421	memseg = rte_eal_get_physmem_layout();
422	if (memseg == NULL) {
423		RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__);
424		return -1;
425	}
426
427	rte_rwlock_write_lock(&mcfg->mlock);
428
429	/* delete all zones */
430	mcfg->memzone_cnt = 0;
431	memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
432
433	rte_rwlock_write_unlock(&mcfg->mlock);
434
435	return rte_eal_malloc_heap_init();
436}
437
438/* Walk all reserved memory zones */
439void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
440		      void *arg)
441{
442	struct rte_mem_config *mcfg;
443	unsigned i;
444
445	mcfg = rte_eal_get_configuration()->mem_config;
446
447	rte_rwlock_read_lock(&mcfg->mlock);
448	for (i=0; i<RTE_MAX_MEMZONE; i++) {
449		if (mcfg->memzone[i].addr != NULL)
450			(*func)(&mcfg->memzone[i], arg);
451	}
452	rte_rwlock_read_unlock(&mcfg->mlock);
453}
454