1/*-
2 *   BSD LICENSE
3 *
4 *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5 *   All rights reserved.
6 *
7 *   Redistribution and use in source and binary forms, with or without
8 *   modification, are permitted provided that the following conditions
9 *   are met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 *       notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above copyright
14 *       notice, this list of conditions and the following disclaimer in
15 *       the documentation and/or other materials provided with the
16 *       distribution.
17 *     * Neither the name of Intel Corporation nor the names of its
18 *       contributors may be used to endorse or promote products derived
19 *       from this software without specific prior written permission.
20 *
21 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <stdint.h>
35#include <unistd.h>
36#include <stdlib.h>
37#include <string.h>
38#include <sys/types.h>
39#include <sys/mman.h>
40#include <errno.h>
41#include <sys/user.h>
42#ifndef PAGE_SIZE
43#define PAGE_SIZE sysconf(_SC_PAGE_SIZE)
44#endif
45#include <linux/binfmts.h>
46#include <xen/xen-compat.h>
47#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
48#include <xs.h>
49#else
50#include <xenstore.h>
51#endif
52#include <linux/virtio_ring.h>
53
54#include <rte_mbuf.h>
55#include <rte_ethdev.h>
56#include <rte_malloc.h>
57#include <rte_memcpy.h>
58#include <rte_string_fns.h>
59#include <rte_vdev.h>
60#include <cmdline_parse.h>
61#include <cmdline_parse_etheraddr.h>
62
63#include "rte_xen_lib.h"
64#include "virtqueue.h"
65#include "rte_eth_xenvirt.h"
66
67#define VQ_DESC_NUM 256
68#define VIRTIO_MBUF_BURST_SZ 64
69
70/* virtio_idx is increased after new device is created.*/
71static int virtio_idx = 0;
72
73static const char *drivername = "xen virtio PMD";
74
75static struct rte_eth_link pmd_link = {
76		.link_speed = ETH_SPEED_NUM_10G,
77		.link_duplex = ETH_LINK_FULL_DUPLEX,
78		.link_status = ETH_LINK_DOWN,
79		.link_autoneg = ETH_LINK_SPEED_FIXED
80};
81
82static void
83eth_xenvirt_free_queues(struct rte_eth_dev *dev);
84
85static uint16_t
86eth_xenvirt_rx(void *q, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
87{
88	struct virtqueue *rxvq = q;
89	struct rte_mbuf *rxm, *new_mbuf;
90	uint16_t nb_used, num;
91	uint32_t len[VIRTIO_MBUF_BURST_SZ];
92	uint32_t i;
93	struct pmd_internals *pi = rxvq->internals;
94
95	nb_used = VIRTQUEUE_NUSED(rxvq);
96
97	rte_smp_rmb();
98	num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts);
99	num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ);
100	if (unlikely(num == 0)) return 0;
101
102	num = virtqueue_dequeue_burst(rxvq, rx_pkts, len, num);
103	PMD_RX_LOG(DEBUG, "used:%d dequeue:%d\n", nb_used, num);
104	for (i = 0; i < num ; i ++) {
105		rxm = rx_pkts[i];
106		PMD_RX_LOG(DEBUG, "packet len:%d\n", len[i]);
107		rxm->next = NULL;
108		rxm->data_off = RTE_PKTMBUF_HEADROOM;
109		rxm->data_len = (uint16_t)(len[i] - sizeof(struct virtio_net_hdr));
110		rxm->nb_segs = 1;
111		rxm->port = pi->port_id;
112		rxm->pkt_len  = (uint32_t)(len[i] - sizeof(struct virtio_net_hdr));
113	}
114	/* allocate new mbuf for the used descriptor */
115	while (likely(!virtqueue_full(rxvq))) {
116		new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool);
117		if (unlikely(new_mbuf == NULL)) {
118			break;
119		}
120		if (unlikely(virtqueue_enqueue_recv_refill(rxvq, new_mbuf))) {
121			rte_pktmbuf_free_seg(new_mbuf);
122			break;
123		}
124	}
125	pi->eth_stats.ipackets += num;
126	return num;
127}
128
129static uint16_t
130eth_xenvirt_tx(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
131{
132	struct virtqueue *txvq = tx_queue;
133	struct rte_mbuf *txm;
134	uint16_t nb_used, nb_tx, num, i;
135	int error;
136	uint32_t len[VIRTIO_MBUF_BURST_SZ];
137	struct rte_mbuf *snd_pkts[VIRTIO_MBUF_BURST_SZ];
138	struct pmd_internals *pi = txvq->internals;
139
140	nb_tx = 0;
141
142	if (unlikely(nb_pkts == 0))
143		return 0;
144
145	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
146	nb_used = VIRTQUEUE_NUSED(txvq);
147
148	rte_smp_rmb();
149
150	num = (uint16_t)(likely(nb_used <= VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ);
151	num = virtqueue_dequeue_burst(txvq, snd_pkts, len, num);
152
153	for (i = 0; i < num ; i ++) {
154		/* mergable not supported, one segment only */
155		rte_pktmbuf_free_seg(snd_pkts[i]);
156	}
157
158	while (nb_tx < nb_pkts) {
159		if (likely(!virtqueue_full(txvq))) {
160		/* TODO drop tx_pkts if it contains multiple segments */
161			txm = tx_pkts[nb_tx];
162			error = virtqueue_enqueue_xmit(txvq, txm);
163			if (unlikely(error)) {
164				if (error == ENOSPC)
165					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count = 0\n");
166				else if (error == EMSGSIZE)
167					PMD_TX_LOG(ERR, "virtqueue_enqueue Free count < 1\n");
168				else
169					PMD_TX_LOG(ERR, "virtqueue_enqueue error: %d\n", error);
170				break;
171			}
172			nb_tx++;
173		} else {
174			PMD_TX_LOG(ERR, "No free tx descriptors to transmit\n");
175			/* virtqueue_notify not needed in our para-virt solution */
176			break;
177		}
178	}
179	pi->eth_stats.opackets += nb_tx;
180	return nb_tx;
181}
182
183static int
184eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
185{
186	RTE_LOG(ERR, PMD, "%s\n", __func__);
187	return 0;
188}
189
190/*
191 * Create a shared page between guest and host.
192 * Host monitors this page if it is cleared on unmap, and then
193 * do necessary clean up.
194 */
195static void
196gntalloc_vring_flag(int vtidx)
197{
198	char key_str[PATH_MAX];
199	char val_str[PATH_MAX];
200	uint32_t gref_tmp;
201	void *ptr;
202
203	if (grefwatch_from_alloc(&gref_tmp, &ptr)) {
204		RTE_LOG(ERR, PMD, "grefwatch_from_alloc error\n");
205		exit(0);
206	}
207
208	*(uint8_t *)ptr = MAP_FLAG;
209	snprintf(val_str, sizeof(val_str), "%u", gref_tmp);
210	snprintf(key_str, sizeof(key_str),
211		DPDK_XENSTORE_PATH"%d"VRING_FLAG_STR, vtidx);
212	xenstore_write(key_str, val_str);
213}
214
215/*
216 * Notify host this virtio device is started.
217 * Host could start polling this device.
218 */
219static void
220dev_start_notify(int vtidx)
221{
222	char key_str[PATH_MAX];
223	char val_str[PATH_MAX];
224
225	RTE_LOG(INFO, PMD, "%s: virtio %d is started\n", __func__, vtidx);
226	gntalloc_vring_flag(vtidx);
227
228	snprintf(key_str, sizeof(key_str), "%s%s%d",
229		DPDK_XENSTORE_PATH, EVENT_TYPE_START_STR,
230			vtidx);
231	snprintf(val_str, sizeof(val_str), "1");
232	xenstore_write(key_str, val_str);
233}
234
235/*
236 * Notify host this virtio device is stopped.
237 * Host could stop polling this device.
238 */
239static void
240dev_stop_notify(int vtidx)
241{
242	RTE_SET_USED(vtidx);
243}
244
245
246static int
247update_mac_address(struct ether_addr *mac_addrs, int vtidx)
248{
249	char key_str[PATH_MAX];
250	char val_str[PATH_MAX];
251	int rv;
252
253	if (mac_addrs == NULL) {
254		RTE_LOG(ERR, PMD, "%s: NULL pointer mac specified\n", __func__);
255		return -1;
256	}
257	rv = snprintf(key_str, sizeof(key_str),
258			DPDK_XENSTORE_PATH"%d_ether_addr", vtidx);
259	if (rv == -1)
260		return rv;
261	rv = snprintf(val_str, sizeof(val_str), "%02x:%02x:%02x:%02x:%02x:%02x",
262			mac_addrs->addr_bytes[0],
263			mac_addrs->addr_bytes[1],
264			mac_addrs->addr_bytes[2],
265			mac_addrs->addr_bytes[3],
266			mac_addrs->addr_bytes[4],
267			mac_addrs->addr_bytes[5]);
268	if (rv == -1)
269		return rv;
270	if (xenstore_write(key_str, val_str))
271		return rv;
272	return 0;
273}
274
275
276static int
277eth_dev_start(struct rte_eth_dev *dev)
278{
279	struct virtqueue *rxvq = dev->data->rx_queues[0];
280	struct virtqueue *txvq = dev->data->tx_queues[0];
281	struct rte_mbuf *m;
282	struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private;
283	int rv;
284
285	dev->data->dev_link.link_status = ETH_LINK_UP;
286	while (!virtqueue_full(rxvq)) {
287		m = rte_mbuf_raw_alloc(rxvq->mpool);
288		if (m == NULL)
289			break;
290		/* Enqueue allocated buffers. */
291		if (virtqueue_enqueue_recv_refill(rxvq, m)) {
292			rte_pktmbuf_free_seg(m);
293			break;
294		}
295	}
296
297	rxvq->internals = pi;
298	txvq->internals = pi;
299
300	rv = update_mac_address(dev->data->mac_addrs, pi->virtio_idx);
301	if (rv)
302		return -1;
303	dev_start_notify(pi->virtio_idx);
304
305	return 0;
306}
307
308static void
309eth_dev_stop(struct rte_eth_dev *dev)
310{
311	struct pmd_internals *pi = (struct pmd_internals *)dev->data->dev_private;
312
313	dev->data->dev_link.link_status = ETH_LINK_DOWN;
314	dev_stop_notify(pi->virtio_idx);
315}
316
317/*
318 * Notify host this virtio device is closed.
319 * Host could do necessary clean up to this device.
320 */
321static void
322eth_dev_close(struct rte_eth_dev *dev)
323{
324	eth_xenvirt_free_queues(dev);
325}
326
327static void
328eth_dev_info(struct rte_eth_dev *dev,
329		struct rte_eth_dev_info *dev_info)
330{
331	struct pmd_internals *internals = dev->data->dev_private;
332
333	RTE_SET_USED(internals);
334	dev_info->driver_name = drivername;
335	dev_info->max_mac_addrs = 1;
336	dev_info->max_rx_pktlen = (uint32_t)2048;
337	dev_info->max_rx_queues = (uint16_t)1;
338	dev_info->max_tx_queues = (uint16_t)1;
339	dev_info->min_rx_bufsize = 0;
340	dev_info->pci_dev = NULL;
341}
342
343static void
344eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
345{
346	struct pmd_internals *internals = dev->data->dev_private;
347	if(stats)
348		rte_memcpy(stats, &internals->eth_stats, sizeof(*stats));
349}
350
351static void
352eth_stats_reset(struct rte_eth_dev *dev)
353{
354	struct pmd_internals *internals = dev->data->dev_private;
355	/* Reset software totals */
356	memset(&internals->eth_stats, 0, sizeof(internals->eth_stats));
357}
358
359static void
360eth_queue_release(void *q)
361{
362	rte_free(q);
363}
364
365static int
366eth_link_update(struct rte_eth_dev *dev __rte_unused,
367		int wait_to_complete __rte_unused)
368{
369	return 0;
370}
371
372/*
373 * Create shared vring between guest and host.
374 * Memory is allocated through grant alloc driver, so it is not physical continuous.
375 */
376static void *
377gntalloc_vring_create(int queue_type, uint32_t size, int vtidx)
378{
379	char key_str[PATH_MAX] = {0};
380	char val_str[PATH_MAX] = {0};
381	void *va = NULL;
382	int pg_size;
383	uint32_t pg_num;
384	uint32_t *gref_arr = NULL;
385	phys_addr_t *pa_arr = NULL;
386	uint64_t start_index;
387	int rv;
388
389	pg_size = getpagesize();
390	size    = RTE_ALIGN_CEIL(size, pg_size);
391	pg_num  = size / pg_size;
392
393	gref_arr = calloc(pg_num, sizeof(gref_arr[0]));
394	pa_arr  = calloc(pg_num, sizeof(pa_arr[0]));
395
396	if (gref_arr == NULL || pa_arr == NULL) {
397		RTE_LOG(ERR, PMD, "%s: calloc failed\n", __func__);
398		goto out;
399	}
400
401	va  = gntalloc(size, gref_arr, &start_index);
402	if (va == NULL) {
403		RTE_LOG(ERR, PMD, "%s: gntalloc failed\n", __func__);
404		goto out;
405	}
406
407	if (get_phys_map(va, pa_arr, pg_num, pg_size))
408		goto out;
409
410	/* write in xenstore gref and pfn for each page of vring */
411	if (grant_node_create(pg_num, gref_arr, pa_arr, val_str, sizeof(val_str))) {
412		gntfree(va, size, start_index);
413		va = NULL;
414		goto out;
415	}
416
417	if (queue_type == VTNET_RQ)
418		rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"RXVRING_XENSTORE_STR, vtidx);
419	else
420		rv = snprintf(key_str, sizeof(key_str), DPDK_XENSTORE_PATH"%d"TXVRING_XENSTORE_STR, vtidx);
421	if (rv == -1 || xenstore_write(key_str, val_str) == -1) {
422		gntfree(va, size, start_index);
423		va = NULL;
424	}
425out:
426	free(pa_arr);
427	free(gref_arr);
428
429	return va;
430}
431
432
433
434static struct virtqueue *
435virtio_queue_setup(struct rte_eth_dev *dev, int queue_type)
436{
437	struct virtqueue *vq = NULL;
438	uint16_t vq_size = VQ_DESC_NUM;
439	int i = 0;
440	char vq_name[VIRTQUEUE_MAX_NAME_SZ];
441	size_t size;
442	struct vring *vr;
443
444	/* Allocate memory for virtqueue. */
445	if (queue_type == VTNET_RQ) {
446		snprintf(vq_name, sizeof(vq_name), "port%d_rvq",
447				dev->data->port_id);
448		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
449			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
450		if (vq == NULL) {
451			RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__);
452			return NULL;
453		}
454		memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
455	} else if(queue_type == VTNET_TQ) {
456		snprintf(vq_name, sizeof(vq_name), "port%d_tvq",
457			dev->data->port_id);
458		vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) +
459			vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE);
460		if (vq == NULL) {
461			RTE_LOG(ERR, PMD, "%s: unabled to allocate virtqueue\n", __func__);
462			return NULL;
463		}
464		memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
465	}
466
467	memcpy(vq->vq_name, vq_name, sizeof(vq->vq_name));
468
469	vq->vq_alignment = VIRTIO_PCI_VRING_ALIGN;
470	vq->vq_nentries = vq_size;
471	vq->vq_free_cnt = vq_size;
472	/* Calcuate vring size according to virtio spec */
473	size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
474	vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
475	/* Allocate memory for virtio vring through gntalloc driver*/
476	vq->vq_ring_virt_mem = gntalloc_vring_create(queue_type, vq->vq_ring_size,
477		((struct pmd_internals *)dev->data->dev_private)->virtio_idx);
478	memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size);
479	vr = &vq->vq_ring;
480	vring_init(vr, vq_size, vq->vq_ring_virt_mem, vq->vq_alignment);
481	/*
482	 * Locally maintained last consumed index, this idex trails
483	 * vq_ring.used->idx.
484	 */
485	vq->vq_used_cons_idx = 0;
486	vq->vq_desc_head_idx = 0;
487	vq->vq_free_cnt = vq->vq_nentries;
488	memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries);
489
490	/* Chain all the descriptors in the ring with an END */
491	for (i = 0; i < vq_size - 1; i++)
492		vr->desc[i].next = (uint16_t)(i + 1);
493	vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
494
495	return vq;
496}
497
498static int
499eth_rx_queue_setup(struct rte_eth_dev *dev,uint16_t rx_queue_id,
500				uint16_t nb_rx_desc __rte_unused,
501				unsigned int socket_id __rte_unused,
502				const struct rte_eth_rxconf *rx_conf __rte_unused,
503				struct rte_mempool *mb_pool)
504{
505	struct virtqueue *vq;
506	vq = dev->data->rx_queues[rx_queue_id] = virtio_queue_setup(dev, VTNET_RQ);
507	vq->mpool = mb_pool;
508	return 0;
509}
510
511static int
512eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
513				uint16_t nb_tx_desc __rte_unused,
514				unsigned int socket_id __rte_unused,
515				const struct rte_eth_txconf *tx_conf __rte_unused)
516{
517	dev->data->tx_queues[tx_queue_id] = virtio_queue_setup(dev, VTNET_TQ);
518	return 0;
519}
520
521static void
522eth_xenvirt_free_queues(struct rte_eth_dev *dev)
523{
524	int i;
525
526	for (i = 0; i < dev->data->nb_rx_queues; i++) {
527		eth_queue_release(dev->data->rx_queues[i]);
528		dev->data->rx_queues[i] = NULL;
529	}
530	dev->data->nb_rx_queues = 0;
531
532	for (i = 0; i < dev->data->nb_tx_queues; i++) {
533		eth_queue_release(dev->data->tx_queues[i]);
534		dev->data->tx_queues[i] = NULL;
535	}
536	dev->data->nb_tx_queues = 0;
537}
538
539static const struct eth_dev_ops ops = {
540	.dev_start = eth_dev_start,
541	.dev_stop = eth_dev_stop,
542	.dev_close = eth_dev_close,
543	.dev_configure = eth_dev_configure,
544	.dev_infos_get = eth_dev_info,
545	.rx_queue_setup = eth_rx_queue_setup,
546	.tx_queue_setup = eth_tx_queue_setup,
547	.rx_queue_release = eth_queue_release,
548	.tx_queue_release = eth_queue_release,
549	.link_update = eth_link_update,
550	.stats_get = eth_stats_get,
551	.stats_reset = eth_stats_reset,
552};
553
554
555static int
556rte_eth_xenvirt_parse_args(struct xenvirt_dict *dict,
557			const char *name, const char *params)
558{
559	int i;
560	char *pairs[RTE_ETH_XENVIRT_MAX_ARGS];
561	int num_of_pairs;
562	char *pair[2];
563	char *args;
564	int ret = -1;
565
566	if (params == NULL)
567		return 0;
568
569	args = rte_zmalloc(NULL, strlen(params) + 1, RTE_CACHE_LINE_SIZE);
570	if (args == NULL) {
571		RTE_LOG(ERR, PMD, "Couldn't parse %s device \n", name);
572		return -1;
573	}
574	rte_memcpy(args, params, strlen(params));
575
576	num_of_pairs = rte_strsplit(args, strnlen(args, MAX_ARG_STRLEN),
577					pairs,
578					RTE_ETH_XENVIRT_MAX_ARGS ,
579					RTE_ETH_XENVIRT_PAIRS_DELIM);
580
581	for (i = 0; i < num_of_pairs; i++) {
582		pair[0] = NULL;
583		pair[1] = NULL;
584		rte_strsplit(pairs[i], strnlen(pairs[i], MAX_ARG_STRLEN),
585					pair, 2,
586					RTE_ETH_XENVIRT_KEY_VALUE_DELIM);
587
588		if (pair[0] == NULL || pair[1] == NULL || pair[0][0] == 0
589			|| pair[1][0] == 0) {
590			RTE_LOG(ERR, PMD,
591				"Couldn't parse %s device,"
592				"wrong key or value \n", name);
593			goto err;
594		}
595
596		if (!strncmp(pair[0], RTE_ETH_XENVIRT_MAC_PARAM,
597				sizeof(RTE_ETH_XENVIRT_MAC_PARAM))) {
598			if (cmdline_parse_etheraddr(NULL,
599						    pair[1],
600						    &dict->addr,
601						    sizeof(dict->addr)) < 0) {
602				RTE_LOG(ERR, PMD,
603					"Invalid %s device ether address\n",
604					name);
605				goto err;
606			}
607
608			dict->addr_valid = 1;
609		}
610	}
611
612	ret = 0;
613err:
614	rte_free(args);
615	return ret;
616}
617
618enum dev_action {
619	DEV_CREATE,
620	DEV_ATTACH
621};
622
623
624static int
625eth_dev_xenvirt_create(const char *name, const char *params,
626		const unsigned numa_node,
627                enum dev_action action)
628{
629	struct rte_eth_dev_data *data = NULL;
630	struct pmd_internals *internals = NULL;
631	struct rte_eth_dev *eth_dev = NULL;
632	struct xenvirt_dict dict;
633
634	memset(&dict, 0, sizeof(struct xenvirt_dict));
635
636	RTE_LOG(INFO, PMD, "Creating virtio rings backed ethdev on numa socket %u\n",
637			numa_node);
638	RTE_SET_USED(action);
639
640	if (rte_eth_xenvirt_parse_args(&dict, name, params) < 0) {
641		RTE_LOG(ERR, PMD, "%s: Failed to parse ethdev parameters\n", __func__);
642		return -1;
643	}
644
645	/* now do all data allocation - for eth_dev structure, dummy pci driver
646	 * and internal (private) data
647	 */
648	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
649	if (data == NULL)
650		goto err;
651
652	internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);
653	if (internals == NULL)
654		goto err;
655
656	/* reserve an ethdev entry */
657	eth_dev = rte_eth_dev_allocate(name);
658	if (eth_dev == NULL)
659		goto err;
660
661	data->dev_private = internals;
662	data->port_id = eth_dev->data->port_id;
663	data->nb_rx_queues = (uint16_t)1;
664	data->nb_tx_queues = (uint16_t)1;
665	data->dev_link = pmd_link;
666	data->mac_addrs = rte_zmalloc("xen_virtio", ETHER_ADDR_LEN, 0);
667
668	if(dict.addr_valid)
669		memcpy(&data->mac_addrs->addr_bytes, &dict.addr, sizeof(struct ether_addr));
670	else
671		eth_random_addr(&data->mac_addrs->addr_bytes[0]);
672
673	eth_dev->data = data;
674	eth_dev->dev_ops = &ops;
675
676	eth_dev->data->dev_flags = RTE_PCI_DRV_DETACHABLE;
677	eth_dev->data->kdrv = RTE_KDRV_NONE;
678	eth_dev->data->drv_name = drivername;
679	eth_dev->driver = NULL;
680	eth_dev->data->numa_node = numa_node;
681
682	eth_dev->rx_pkt_burst = eth_xenvirt_rx;
683	eth_dev->tx_pkt_burst = eth_xenvirt_tx;
684
685	internals->virtio_idx = virtio_idx++;
686	internals->port_id = eth_dev->data->port_id;
687
688	return 0;
689
690err:
691	rte_free(data);
692	rte_free(internals);
693
694	return -1;
695}
696
697
698static int
699eth_dev_xenvirt_free(const char *name, const unsigned numa_node)
700{
701	struct rte_eth_dev *eth_dev = NULL;
702
703	RTE_LOG(DEBUG, PMD,
704		"Free virtio rings backed ethdev on numa socket %u\n",
705		numa_node);
706
707	/* find an ethdev entry */
708	eth_dev = rte_eth_dev_allocated(name);
709	if (eth_dev == NULL)
710		return -1;
711
712	if (eth_dev->data->dev_started == 1) {
713		eth_dev_stop(eth_dev);
714		eth_dev_close(eth_dev);
715	}
716
717	eth_dev->rx_pkt_burst = NULL;
718	eth_dev->tx_pkt_burst = NULL;
719	eth_dev->dev_ops = NULL;
720
721	rte_free(eth_dev->data);
722	rte_free(eth_dev->data->dev_private);
723	rte_free(eth_dev->data->mac_addrs);
724
725	virtio_idx--;
726
727	return 0;
728}
729
730/*TODO: Support multiple process model */
731static int
732rte_pmd_xenvirt_probe(const char *name, const char *params)
733{
734	if (virtio_idx == 0) {
735		if (xenstore_init() != 0) {
736			RTE_LOG(ERR, PMD, "%s: xenstore init failed\n", __func__);
737			return -1;
738		}
739		if (gntalloc_open() != 0) {
740			RTE_LOG(ERR, PMD, "%s: grant init failed\n", __func__);
741			return -1;
742		}
743	}
744	eth_dev_xenvirt_create(name, params, rte_socket_id(), DEV_CREATE);
745	return 0;
746}
747
748static int
749rte_pmd_xenvirt_remove(const char *name)
750{
751	eth_dev_xenvirt_free(name, rte_socket_id());
752
753	if (virtio_idx == 0) {
754		if (xenstore_uninit() != 0)
755			RTE_LOG(ERR, PMD, "%s: xenstore uninit failed\n", __func__);
756
757		gntalloc_close();
758	}
759	return 0;
760}
761
762static struct rte_vdev_driver pmd_xenvirt_drv = {
763	.probe = rte_pmd_xenvirt_probe,
764	.remove = rte_pmd_xenvirt_remove,
765};
766
767RTE_PMD_REGISTER_VDEV(net_xenvirt, pmd_xenvirt_drv);
768RTE_PMD_REGISTER_ALIAS(net_xenvirt, eth_xenvirt);
769RTE_PMD_REGISTER_PARAM_STRING(net_xenvirt,
770	"mac=<mac addr>");
771