kni_vhost.c revision 3d9b7210
1/*-
2 * GPL LICENSE SUMMARY
3 *
4 *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 *
6 *   This program is free software; you can redistribute it and/or modify
7 *   it under the terms of version 2 of the GNU General Public License as
8 *   published by the Free Software Foundation.
9 *
10 *   This program is distributed in the hope that it will be useful, but
11 *   WITHOUT ANY WARRANTY; without even the implied warranty of
12 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *   General Public License for more details.
14 *
15 *   You should have received a copy of the GNU General Public License
16 *   along with this program; if not, write to the Free Software
17 *   Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 *   The full GNU General Public License is included in this distribution
19 *   in the file called LICENSE.GPL.
20 *
21 *   Contact Information:
22 *   Intel Corporation
23 */
24
25#include <linux/module.h>
26#include <linux/net.h>
27#include <net/sock.h>
28#include <linux/virtio_net.h>
29#include <linux/wait.h>
30#include <linux/mm.h>
31#include <linux/nsproxy.h>
32#include <linux/sched.h>
33#include <linux/if_tun.h>
34#include <linux/version.h>
35#include <linux/file.h>
36
37#include "compat.h"
38#include "kni_dev.h"
39#include "kni_fifo.h"
40
41#define RX_BURST_SZ 4
42
43#ifdef HAVE_STATIC_SOCK_MAP_FD
44static int kni_sock_map_fd(struct socket *sock)
45{
46	struct file *file;
47	int fd = get_unused_fd_flags(0);
48
49	if (fd < 0)
50		return fd;
51
52	file = sock_alloc_file(sock, 0, NULL);
53	if (IS_ERR(file)) {
54		put_unused_fd(fd);
55		return PTR_ERR(file);
56	}
57	fd_install(fd, file);
58	return fd;
59}
60#endif
61
62static struct proto kni_raw_proto = {
63	.name = "kni_vhost",
64	.owner = THIS_MODULE,
65	.obj_size = sizeof(struct kni_vhost_queue),
66};
67
68static inline int
69kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
70		 uint32_t offset, uint32_t len)
71{
72	struct rte_kni_mbuf *pkt_kva = NULL;
73	struct rte_kni_mbuf *pkt_va = NULL;
74	int ret;
75
76	pr_debug("tx offset=%d, len=%d, iovlen=%d\n",
77#ifdef HAVE_IOV_ITER_MSGHDR
78		   offset, len, (int)m->msg_iter.iov->iov_len);
79#else
80		   offset, len, (int)m->msg_iov->iov_len);
81#endif
82
83	/**
84	 * Check if it has at least one free entry in tx_q and
85	 * one entry in alloc_q.
86	 */
87	if (kni_fifo_free_count(kni->tx_q) == 0 ||
88	    kni_fifo_count(kni->alloc_q) == 0) {
89		/**
90		 * If no free entry in tx_q or no entry in alloc_q,
91		 * drops skb and goes out.
92		 */
93		goto drop;
94	}
95
96	/* dequeue a mbuf from alloc_q */
97	ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1);
98	if (likely(ret == 1)) {
99		void *data_kva;
100
101		pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
102		data_kva = pkt_kva->buf_addr + pkt_kva->data_off
103			- kni->mbuf_va + kni->mbuf_kva;
104
105#ifdef HAVE_IOV_ITER_MSGHDR
106		copy_from_iter(data_kva, len, &m->msg_iter);
107#else
108		memcpy_fromiovecend(data_kva, m->msg_iov, offset, len);
109#endif
110
111		if (unlikely(len < ETH_ZLEN)) {
112			memset(data_kva + len, 0, ETH_ZLEN - len);
113			len = ETH_ZLEN;
114		}
115		pkt_kva->pkt_len = len;
116		pkt_kva->data_len = len;
117
118		/* enqueue mbuf into tx_q */
119		ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
120		if (unlikely(ret != 1)) {
121			/* Failing should not happen */
122			pr_err("Fail to enqueue mbuf into tx_q\n");
123			goto drop;
124		}
125	} else {
126		/* Failing should not happen */
127		pr_err("Fail to dequeue mbuf from alloc_q\n");
128		goto drop;
129	}
130
131	/* update statistics */
132	kni->stats.tx_bytes += len;
133	kni->stats.tx_packets++;
134
135	return 0;
136
137drop:
138	/* update statistics */
139	kni->stats.tx_dropped++;
140
141	return 0;
142}
143
144static inline int
145kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
146		 uint32_t offset, uint32_t len)
147{
148	uint32_t pkt_len;
149	struct rte_kni_mbuf *kva;
150	struct rte_kni_mbuf *va;
151	void *data_kva;
152	struct sk_buff *skb;
153	struct kni_vhost_queue *q = kni->vhost_queue;
154
155	if (unlikely(q == NULL))
156		return 0;
157
158	/* ensure at least one entry in free_q */
159	if (unlikely(kni_fifo_free_count(kni->free_q) == 0))
160		return 0;
161
162	skb = skb_dequeue(&q->sk.sk_receive_queue);
163	if (unlikely(skb == NULL))
164		return 0;
165
166	kva = (struct rte_kni_mbuf *)skb->data;
167
168	/* free skb to cache */
169	skb->data = NULL;
170	if (unlikely(kni_fifo_put(q->fifo, (void **)&skb, 1) != 1))
171		/* Failing should not happen */
172		pr_err("Fail to enqueue entries into rx cache fifo\n");
173
174	pkt_len = kva->data_len;
175	if (unlikely(pkt_len > len))
176		goto drop;
177
178	pr_debug("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
179#ifdef HAVE_IOV_ITER_MSGHDR
180		   offset, len, pkt_len, (int)m->msg_iter.iov->iov_len);
181#else
182		   offset, len, pkt_len, (int)m->msg_iov->iov_len);
183#endif
184
185	data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva;
186#ifdef HAVE_IOV_ITER_MSGHDR
187	if (unlikely(copy_to_iter(data_kva, pkt_len, &m->msg_iter)))
188#else
189	if (unlikely(memcpy_toiovecend(m->msg_iov, data_kva, offset, pkt_len)))
190#endif
191		goto drop;
192
193	/* Update statistics */
194	kni->stats.rx_bytes += pkt_len;
195	kni->stats.rx_packets++;
196
197	/* enqueue mbufs into free_q */
198	va = (void *)kva - kni->mbuf_kva + kni->mbuf_va;
199	if (unlikely(kni_fifo_put(kni->free_q, (void **)&va, 1) != 1))
200		/* Failing should not happen */
201		pr_err("Fail to enqueue entries into free_q\n");
202
203	pr_debug("receive done %d\n", pkt_len);
204
205	return pkt_len;
206
207drop:
208	/* Update drop statistics */
209	kni->stats.rx_dropped++;
210
211	return 0;
212}
213
214static uint32_t
215kni_sock_poll(struct file *file, struct socket *sock, poll_table *wait)
216{
217	struct kni_vhost_queue *q =
218		container_of(sock->sk, struct kni_vhost_queue, sk);
219	struct kni_dev *kni;
220	uint32_t mask = 0;
221
222	if (unlikely(q == NULL || q->kni == NULL))
223		return POLLERR;
224
225	kni = q->kni;
226#ifdef HAVE_SOCKET_WQ
227	pr_debug("start kni_poll on group %d, wq 0x%16llx\n",
228		  kni->group_id, (uint64_t)sock->wq);
229	poll_wait(file, &sock->wq->wait, wait);
230#else
231	pr_debug("start kni_poll on group %d, wait at 0x%16llx\n",
232		  kni->group_id, (uint64_t)&sock->wait);
233	poll_wait(file, &sock->wait, wait);
234#endif
235
236	if (kni_fifo_count(kni->rx_q) > 0)
237		mask |= POLLIN | POLLRDNORM;
238
239	if (sock_writeable(&q->sk) ||
240#ifdef SOCKWQ_ASYNC_NOSPACE
241		(!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) &&
242			sock_writeable(&q->sk)))
243#else
244		(!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) &&
245			sock_writeable(&q->sk)))
246#endif
247		mask |= POLLOUT | POLLWRNORM;
248
249	return mask;
250}
251
252static inline void
253kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q,
254		  struct sk_buff *skb, struct rte_kni_mbuf *va)
255{
256	struct rte_kni_mbuf *kva;
257
258	kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva;
259	(skb)->data = (unsigned char *)kva;
260	(skb)->len = kva->data_len;
261	skb_queue_tail(&q->sk.sk_receive_queue, skb);
262}
263
264static inline void
265kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q,
266	  struct sk_buff **skb, struct rte_kni_mbuf **va)
267{
268	int i;
269
270	for (i = 0; i < RX_BURST_SZ; skb++, va++, i++)
271		kni_vhost_enqueue(kni, q, *skb, *va);
272}
273
274int
275kni_chk_vhost_rx(struct kni_dev *kni)
276{
277	struct kni_vhost_queue *q = kni->vhost_queue;
278	uint32_t nb_in, nb_mbuf, nb_skb;
279	const uint32_t BURST_MASK = RX_BURST_SZ - 1;
280	uint32_t nb_burst, nb_backlog, i;
281	struct sk_buff *skb[RX_BURST_SZ];
282	struct rte_kni_mbuf *va[RX_BURST_SZ];
283
284	if (unlikely(BE_STOP & kni->vq_status)) {
285		kni->vq_status |= BE_FINISH;
286		return 0;
287	}
288
289	if (unlikely(q == NULL))
290		return 0;
291
292	nb_skb = kni_fifo_count(q->fifo);
293	nb_mbuf = kni_fifo_count(kni->rx_q);
294
295	nb_in = min(nb_mbuf, nb_skb);
296	nb_in = min_t(uint32_t, nb_in, RX_BURST_SZ);
297	nb_burst   = (nb_in & ~BURST_MASK);
298	nb_backlog = (nb_in & BURST_MASK);
299
300	/* enqueue skb_queue per BURST_SIZE bulk */
301	if (nb_burst != 0) {
302		if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, RX_BURST_SZ)
303				!= RX_BURST_SZ))
304			goto except;
305
306		if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, RX_BURST_SZ)
307				!= RX_BURST_SZ))
308			goto except;
309
310		kni_vhost_enqueue_burst(kni, q, skb, va);
311	}
312
313	/* all leftover, do one by one */
314	for (i = 0; i < nb_backlog; ++i) {
315		if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, 1) != 1))
316			goto except;
317
318		if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, 1) != 1))
319			goto except;
320
321		kni_vhost_enqueue(kni, q, *skb, *va);
322	}
323
324	/* Ondemand wake up */
325	if ((nb_in == RX_BURST_SZ) || (nb_skb == 0) ||
326	    ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) {
327		wake_up_interruptible_poll(sk_sleep(&q->sk),
328				   POLLIN | POLLRDNORM | POLLRDBAND);
329		pr_debug("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n",
330			   nb_mbuf, nb_skb, nb_in);
331	}
332
333	return 0;
334
335except:
336	/* Failing should not happen */
337	pr_err("Fail to enqueue fifo, it shouldn't happen\n");
338	BUG_ON(1);
339
340	return 0;
341}
342
343static int
344#ifdef HAVE_KIOCB_MSG_PARAM
345kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock,
346	   struct msghdr *m, size_t total_len)
347#else
348kni_sock_sndmsg(struct socket *sock,
349	   struct msghdr *m, size_t total_len)
350#endif /* HAVE_KIOCB_MSG_PARAM */
351{
352	struct kni_vhost_queue *q =
353		container_of(sock->sk, struct kni_vhost_queue, sk);
354	int vnet_hdr_len = 0;
355	unsigned long len = total_len;
356
357	if (unlikely(q == NULL || q->kni == NULL))
358		return 0;
359
360	pr_debug("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
361#ifdef HAVE_IOV_ITER_MSGHDR
362		   len, q->flags, (int)m->msg_iter.iov->iov_len);
363#else
364		   len, q->flags, (int)m->msg_iovlen);
365#endif
366
367#ifdef RTE_KNI_VHOST_VNET_HDR_EN
368	if (likely(q->flags & IFF_VNET_HDR)) {
369		vnet_hdr_len = q->vnet_hdr_sz;
370		if (unlikely(len < vnet_hdr_len))
371			return -EINVAL;
372		len -= vnet_hdr_len;
373	}
374#endif
375
376	if (unlikely(len < ETH_HLEN + q->vnet_hdr_sz))
377		return -EINVAL;
378
379	return kni_vhost_net_tx(q->kni, m, vnet_hdr_len, len);
380}
381
382static int
383#ifdef HAVE_KIOCB_MSG_PARAM
384kni_sock_rcvmsg(struct kiocb *iocb, struct socket *sock,
385	   struct msghdr *m, size_t len, int flags)
386#else
387kni_sock_rcvmsg(struct socket *sock,
388	   struct msghdr *m, size_t len, int flags)
389#endif /* HAVE_KIOCB_MSG_PARAM */
390{
391	int vnet_hdr_len = 0;
392	int pkt_len = 0;
393	struct kni_vhost_queue *q =
394		container_of(sock->sk, struct kni_vhost_queue, sk);
395	static struct virtio_net_hdr
396		__attribute__ ((unused)) vnet_hdr = {
397		.flags = 0,
398		.gso_type = VIRTIO_NET_HDR_GSO_NONE
399	};
400
401	if (unlikely(q == NULL || q->kni == NULL))
402		return 0;
403
404#ifdef RTE_KNI_VHOST_VNET_HDR_EN
405	if (likely(q->flags & IFF_VNET_HDR)) {
406		vnet_hdr_len = q->vnet_hdr_sz;
407		len -= vnet_hdr_len;
408		if (len < 0)
409			return -EINVAL;
410	}
411#endif
412
413	pkt_len = kni_vhost_net_rx(q->kni, m, vnet_hdr_len, len);
414	if (unlikely(pkt_len == 0))
415		return 0;
416
417#ifdef RTE_KNI_VHOST_VNET_HDR_EN
418	/* no need to copy hdr when no pkt received */
419#ifdef HAVE_IOV_ITER_MSGHDR
420	if (unlikely(copy_to_iter((void *)&vnet_hdr, vnet_hdr_len,
421		&m->msg_iter)))
422#else
423	if (unlikely(memcpy_toiovecend(m->msg_iov,
424		(void *)&vnet_hdr, 0, vnet_hdr_len)))
425#endif /* HAVE_IOV_ITER_MSGHDR */
426		return -EFAULT;
427#endif /* RTE_KNI_VHOST_VNET_HDR_EN */
428	pr_debug("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
429		   (unsigned long)len, q->flags, pkt_len);
430
431	return pkt_len + vnet_hdr_len;
432}
433
434/* dummy tap like ioctl */
435static int
436kni_sock_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg)
437{
438	void __user *argp = (void __user *)arg;
439	struct ifreq __user *ifr = argp;
440	uint32_t __user *up = argp;
441	struct kni_vhost_queue *q =
442		container_of(sock->sk, struct kni_vhost_queue, sk);
443	struct kni_dev *kni;
444	uint32_t u;
445	int __user *sp = argp;
446	int s;
447	int ret;
448
449	pr_debug("tap ioctl cmd 0x%08x\n", cmd);
450
451	switch (cmd) {
452	case TUNSETIFF:
453		pr_debug("TUNSETIFF\n");
454		/* ignore the name, just look at flags */
455		if (get_user(u, &ifr->ifr_flags))
456			return -EFAULT;
457
458		ret = 0;
459		if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP))
460			ret = -EINVAL;
461		else
462			q->flags = u;
463
464		return ret;
465
466	case TUNGETIFF:
467		pr_debug("TUNGETIFF\n");
468		rcu_read_lock_bh();
469		kni = rcu_dereference_bh(q->kni);
470		if (kni)
471			dev_hold(kni->net_dev);
472		rcu_read_unlock_bh();
473
474		if (!kni)
475			return -ENOLINK;
476
477		ret = 0;
478		if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ)
479				|| put_user(q->flags, &ifr->ifr_flags))
480			ret = -EFAULT;
481		dev_put(kni->net_dev);
482		return ret;
483
484	case TUNGETFEATURES:
485		pr_debug("TUNGETFEATURES\n");
486		u = IFF_TAP | IFF_NO_PI;
487#ifdef RTE_KNI_VHOST_VNET_HDR_EN
488		u |= IFF_VNET_HDR;
489#endif
490		if (put_user(u, up))
491			return -EFAULT;
492		return 0;
493
494	case TUNSETSNDBUF:
495		pr_debug("TUNSETSNDBUF\n");
496		if (get_user(u, up))
497			return -EFAULT;
498
499		q->sk.sk_sndbuf = u;
500		return 0;
501
502	case TUNGETVNETHDRSZ:
503		s = q->vnet_hdr_sz;
504		if (put_user(s, sp))
505			return -EFAULT;
506		pr_debug("TUNGETVNETHDRSZ %d\n", s);
507		return 0;
508
509	case TUNSETVNETHDRSZ:
510		if (get_user(s, sp))
511			return -EFAULT;
512		if (s < (int)sizeof(struct virtio_net_hdr))
513			return -EINVAL;
514
515		pr_debug("TUNSETVNETHDRSZ %d\n", s);
516		q->vnet_hdr_sz = s;
517		return 0;
518
519	case TUNSETOFFLOAD:
520		pr_debug("TUNSETOFFLOAD %lx\n", arg);
521#ifdef RTE_KNI_VHOST_VNET_HDR_EN
522		/* not support any offload yet */
523		if (!(q->flags & IFF_VNET_HDR))
524			return  -EINVAL;
525
526		return 0;
527#else
528		return -EINVAL;
529#endif
530
531	default:
532		pr_debug("NOT SUPPORT\n");
533		return -EINVAL;
534	}
535}
536
537static int
538kni_sock_compat_ioctl(struct socket *sock, uint32_t cmd,
539		     unsigned long arg)
540{
541	/* 32 bits app on 64 bits OS to be supported later */
542	pr_debug("Not implemented.\n");
543
544	return -EINVAL;
545}
546
547#define KNI_VHOST_WAIT_WQ_SAFE()                        \
548do {							\
549	while ((BE_FINISH | BE_STOP) == kni->vq_status) \
550		msleep(1);				\
551} while (0)						\
552
553
554static int
555kni_sock_release(struct socket *sock)
556{
557	struct kni_vhost_queue *q =
558		container_of(sock->sk, struct kni_vhost_queue, sk);
559	struct kni_dev *kni;
560
561	if (q == NULL)
562		return 0;
563
564	kni = q->kni;
565	if (kni != NULL) {
566		kni->vq_status = BE_STOP;
567		KNI_VHOST_WAIT_WQ_SAFE();
568		kni->vhost_queue = NULL;
569		q->kni = NULL;
570	}
571
572	if (q->sockfd != -1)
573		q->sockfd = -1;
574
575	sk_set_socket(&q->sk, NULL);
576	sock->sk = NULL;
577
578	sock_put(&q->sk);
579
580	pr_debug("dummy sock release done\n");
581
582	return 0;
583}
584
585int
586kni_sock_getname(struct socket *sock, struct sockaddr *addr,
587		int *sockaddr_len, int peer)
588{
589	pr_debug("dummy sock getname\n");
590	((struct sockaddr_ll *)addr)->sll_family = AF_PACKET;
591	return 0;
592}
593
594static const struct proto_ops kni_socket_ops = {
595	.getname = kni_sock_getname,
596	.sendmsg = kni_sock_sndmsg,
597	.recvmsg = kni_sock_rcvmsg,
598	.release = kni_sock_release,
599	.poll    = kni_sock_poll,
600	.ioctl   = kni_sock_ioctl,
601	.compat_ioctl = kni_sock_compat_ioctl,
602};
603
604static void
605kni_sk_write_space(struct sock *sk)
606{
607	wait_queue_head_t *wqueue;
608
609	if (!sock_writeable(sk) ||
610#ifdef SOCKWQ_ASYNC_NOSPACE
611	    !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
612#else
613	    !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
614#endif
615		return;
616	wqueue = sk_sleep(sk);
617	if (wqueue && waitqueue_active(wqueue))
618		wake_up_interruptible_poll(
619			wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
620}
621
622static void
623kni_sk_destruct(struct sock *sk)
624{
625	struct kni_vhost_queue *q =
626		container_of(sk, struct kni_vhost_queue, sk);
627
628	if (!q)
629		return;
630
631	/* make sure there's no packet in buffer */
632	while (skb_dequeue(&sk->sk_receive_queue) != NULL)
633		;
634
635	mb();
636
637	if (q->fifo != NULL) {
638		kfree(q->fifo);
639		q->fifo = NULL;
640	}
641
642	if (q->cache != NULL) {
643		kfree(q->cache);
644		q->cache = NULL;
645	}
646}
647
648static int
649kni_vhost_backend_init(struct kni_dev *kni)
650{
651	struct kni_vhost_queue *q;
652	struct net *net = current->nsproxy->net_ns;
653	int err, i, sockfd;
654	struct rte_kni_fifo *fifo;
655	struct sk_buff *elem;
656
657	if (kni->vhost_queue != NULL)
658		return -1;
659
660#ifdef HAVE_SK_ALLOC_KERN_PARAM
661	q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
662			&kni_raw_proto, 0);
663#else
664	q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
665			&kni_raw_proto);
666#endif
667	if (!q)
668		return -ENOMEM;
669
670	err = sock_create_lite(AF_UNSPEC, SOCK_RAW, IPPROTO_RAW, &q->sock);
671	if (err)
672		goto free_sk;
673
674	sockfd = kni_sock_map_fd(q->sock);
675	if (sockfd < 0) {
676		err = sockfd;
677		goto free_sock;
678	}
679
680	/* cache init */
681	q->cache = kzalloc(
682		RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff),
683		GFP_KERNEL);
684	if (!q->cache)
685		goto free_fd;
686
687	fifo = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(void *)
688			+ sizeof(struct rte_kni_fifo), GFP_KERNEL);
689	if (!fifo)
690		goto free_cache;
691
692	kni_fifo_init(fifo, RTE_KNI_VHOST_MAX_CACHE_SIZE);
693
694	for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) {
695		elem = &q->cache[i];
696		kni_fifo_put(fifo, (void **)&elem, 1);
697	}
698	q->fifo = fifo;
699
700	/* store sockfd in vhost_queue */
701	q->sockfd = sockfd;
702
703	/* init socket */
704	q->sock->type = SOCK_RAW;
705	q->sock->state = SS_CONNECTED;
706	q->sock->ops = &kni_socket_ops;
707	sock_init_data(q->sock, &q->sk);
708
709	/* init sock data */
710	q->sk.sk_write_space = kni_sk_write_space;
711	q->sk.sk_destruct = kni_sk_destruct;
712	q->flags = IFF_NO_PI | IFF_TAP;
713	q->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
714#ifdef RTE_KNI_VHOST_VNET_HDR_EN
715	q->flags |= IFF_VNET_HDR;
716#endif
717
718	/* bind kni_dev with vhost_queue */
719	q->kni = kni;
720	kni->vhost_queue = q;
721
722	wmb();
723
724	kni->vq_status = BE_START;
725
726#ifdef HAVE_SOCKET_WQ
727	pr_debug("backend init sockfd=%d, sock->wq=0x%16llx,sk->sk_wq=0x%16llx",
728		  q->sockfd, (uint64_t)q->sock->wq,
729		  (uint64_t)q->sk.sk_wq);
730#else
731	pr_debug("backend init sockfd=%d, sock->wait at 0x%16llx,sk->sk_sleep=0x%16llx",
732		  q->sockfd, (uint64_t)&q->sock->wait,
733		  (uint64_t)q->sk.sk_sleep);
734#endif
735
736	return 0;
737
738free_cache:
739	kfree(q->cache);
740	q->cache = NULL;
741
742free_fd:
743	put_unused_fd(sockfd);
744
745free_sock:
746	q->kni = NULL;
747	kni->vhost_queue = NULL;
748	kni->vq_status |= BE_FINISH;
749	sock_release(q->sock);
750	q->sock->ops = NULL;
751	q->sock = NULL;
752
753free_sk:
754	sk_free((struct sock *)q);
755
756	return err;
757}
758
759/* kni vhost sock sysfs */
760static ssize_t
761show_sock_fd(struct device *dev, struct device_attribute *attr,
762	     char *buf)
763{
764	struct net_device *net_dev = container_of(dev, struct net_device, dev);
765	struct kni_dev *kni = netdev_priv(net_dev);
766	int sockfd = -1;
767
768	if (kni->vhost_queue != NULL)
769		sockfd = kni->vhost_queue->sockfd;
770	return snprintf(buf, 10, "%d\n", sockfd);
771}
772
773static ssize_t
774show_sock_en(struct device *dev, struct device_attribute *attr,
775	     char *buf)
776{
777	struct net_device *net_dev = container_of(dev, struct net_device, dev);
778	struct kni_dev *kni = netdev_priv(net_dev);
779
780	return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1));
781}
782
783static ssize_t
784set_sock_en(struct device *dev, struct device_attribute *attr,
785	      const char *buf, size_t count)
786{
787	struct net_device *net_dev = container_of(dev, struct net_device, dev);
788	struct kni_dev *kni = netdev_priv(net_dev);
789	unsigned long en;
790	int err = 0;
791
792	if (kstrtoul(buf, 0, &en) != 0)
793		return -EINVAL;
794
795	if (en)
796		err = kni_vhost_backend_init(kni);
797
798	return err ? err : count;
799}
800
801static DEVICE_ATTR(sock_fd, S_IRUGO | S_IRUSR, show_sock_fd, NULL);
802static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en);
803static struct attribute *dev_attrs[] = {
804	&dev_attr_sock_fd.attr,
805	&dev_attr_sock_en.attr,
806	NULL,
807};
808
809static const struct attribute_group dev_attr_grp = {
810	.attrs = dev_attrs,
811};
812
813int
814kni_vhost_backend_release(struct kni_dev *kni)
815{
816	struct kni_vhost_queue *q = kni->vhost_queue;
817
818	if (q == NULL)
819		return 0;
820
821	/* dettach from kni */
822	q->kni = NULL;
823
824	pr_debug("release backend done\n");
825
826	return 0;
827}
828
829int
830kni_vhost_init(struct kni_dev *kni)
831{
832	struct net_device *dev = kni->net_dev;
833
834	if (sysfs_create_group(&dev->dev.kobj, &dev_attr_grp))
835		sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
836
837	kni->vq_status = BE_STOP;
838
839	pr_debug("kni_vhost_init done\n");
840
841	return 0;
842}
843