1/*-
2 *   BSD LICENSE
3 *
4 *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 *   All rights reserved.
6 *
7 *   Redistribution and use in source and binary forms, with or without
8 *   modification, are permitted provided that the following conditions
9 *   are met:
10 *
11 *     * Redistributions of source code must retain the above copyright
12 *       notice, this list of conditions and the following disclaimer.
13 *     * Redistributions in binary form must reproduce the above copyright
14 *       notice, this list of conditions and the following disclaimer in
15 *       the documentation and/or other materials provided with the
16 *       distribution.
17 *     * Neither the name of Intel Corporation nor the names of its
18 *       contributors may be used to endorse or promote products derived
19 *       from this software without specific prior written permission.
20 *
21 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <stdint.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <sys/socket.h>
38#include <sys/time.h>
39#include <sys/types.h>
40#include <unistd.h>
41#include <string.h>
42
43#include <rte_common.h>
44#include <rte_log.h>
45
46#include "fd_man.h"
47
48#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
49
50static int
51get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
52{
53	int i;
54
55	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
56		;
57
58	return i;
59}
60
61static void
62fdset_move(struct fdset *pfdset, int dst, int src)
63{
64	pfdset->fd[dst]    = pfdset->fd[src];
65	pfdset->rwfds[dst] = pfdset->rwfds[src];
66}
67
68/*
69 * Find deleted fd entries and remove them
70 */
71static void
72fdset_shrink(struct fdset *pfdset)
73{
74	int i;
75	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
76
77	pthread_mutex_lock(&pfdset->fd_mutex);
78
79	for (i = 0; i < last_valid_idx; i++) {
80		if (pfdset->fd[i].fd != -1)
81			continue;
82
83		fdset_move(pfdset, i, last_valid_idx);
84		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
85	}
86	pfdset->num = last_valid_idx + 1;
87
88	pthread_mutex_unlock(&pfdset->fd_mutex);
89}
90
91/**
92 * Returns the index in the fdset for a given fd.
93 * @return
94 *   index for the fd, or -1 if fd isn't in the fdset.
95 */
96static int
97fdset_find_fd(struct fdset *pfdset, int fd)
98{
99	int i;
100
101	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
102		;
103
104	return i == pfdset->num ? -1 : i;
105}
106
107static void
108fdset_add_fd(struct fdset *pfdset, int idx, int fd,
109	fd_cb rcb, fd_cb wcb, void *dat)
110{
111	struct fdentry *pfdentry = &pfdset->fd[idx];
112	struct pollfd *pfd = &pfdset->rwfds[idx];
113
114	pfdentry->fd  = fd;
115	pfdentry->rcb = rcb;
116	pfdentry->wcb = wcb;
117	pfdentry->dat = dat;
118
119	pfd->fd = fd;
120	pfd->events  = rcb ? POLLIN : 0;
121	pfd->events |= wcb ? POLLOUT : 0;
122	pfd->revents = 0;
123}
124
125void
126fdset_init(struct fdset *pfdset)
127{
128	int i;
129
130	if (pfdset == NULL)
131		return;
132
133	for (i = 0; i < MAX_FDS; i++) {
134		pfdset->fd[i].fd = -1;
135		pfdset->fd[i].dat = NULL;
136	}
137	pfdset->num = 0;
138}
139
140/**
141 * Register the fd in the fdset with read/write handler and context.
142 */
143int
144fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
145{
146	int i;
147
148	if (pfdset == NULL || fd == -1)
149		return -1;
150
151	pthread_mutex_lock(&pfdset->fd_mutex);
152	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
153	if (i == -1) {
154		pthread_mutex_unlock(&pfdset->fd_mutex);
155		return -2;
156	}
157
158	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
159	pthread_mutex_unlock(&pfdset->fd_mutex);
160
161	return 0;
162}
163
164/**
165 *  Unregister the fd from the fdset.
166 *  Returns context of a given fd or NULL.
167 */
168void *
169fdset_del(struct fdset *pfdset, int fd)
170{
171	int i;
172	void *dat = NULL;
173
174	if (pfdset == NULL || fd == -1)
175		return NULL;
176
177	do {
178		pthread_mutex_lock(&pfdset->fd_mutex);
179
180		i = fdset_find_fd(pfdset, fd);
181		if (i != -1 && pfdset->fd[i].busy == 0) {
182			/* busy indicates r/wcb is executing! */
183			dat = pfdset->fd[i].dat;
184			pfdset->fd[i].fd = -1;
185			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
186			pfdset->fd[i].dat = NULL;
187			i = -1;
188		}
189		pthread_mutex_unlock(&pfdset->fd_mutex);
190	} while (i != -1);
191
192	return dat;
193}
194
195/**
196 *  Unregister the fd from the fdset.
197 *
198 *  If parameters are invalid, return directly -2.
199 *  And check whether fd is busy, if yes, return -1.
200 *  Otherwise, try to delete the fd from fdset and
201 *  return true.
202 */
203int
204fdset_try_del(struct fdset *pfdset, int fd)
205{
206	int i;
207
208	if (pfdset == NULL || fd == -1)
209		return -2;
210
211	pthread_mutex_lock(&pfdset->fd_mutex);
212	i = fdset_find_fd(pfdset, fd);
213	if (i != -1 && pfdset->fd[i].busy) {
214		pthread_mutex_unlock(&pfdset->fd_mutex);
215		return -1;
216	}
217
218	if (i != -1) {
219		pfdset->fd[i].fd = -1;
220		pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
221		pfdset->fd[i].dat = NULL;
222	}
223
224	pthread_mutex_unlock(&pfdset->fd_mutex);
225	return 0;
226}
227
228/**
229 * This functions runs in infinite blocking loop until there is no fd in
230 * pfdset. It calls corresponding r/w handler if there is event on the fd.
231 *
232 * Before the callback is called, we set the flag to busy status; If other
233 * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
234 * will wait until the flag is reset to zero(which indicates the callback is
235 * finished), then it could free the context after fdset_del.
236 */
237void
238fdset_event_dispatch(struct fdset *pfdset)
239{
240	int i;
241	struct pollfd *pfd;
242	struct fdentry *pfdentry;
243	fd_cb rcb, wcb;
244	void *dat;
245	int fd, numfds;
246	int remove1, remove2;
247	int need_shrink;
248
249	if (pfdset == NULL)
250		return;
251
252	while (1) {
253
254		/*
255		 * When poll is blocked, other threads might unregister
256		 * listenfds from and register new listenfds into fdset.
257		 * When poll returns, the entries for listenfds in the fdset
258		 * might have been updated. It is ok if there is unwanted call
259		 * for new listenfds.
260		 */
261		pthread_mutex_lock(&pfdset->fd_mutex);
262		numfds = pfdset->num;
263		pthread_mutex_unlock(&pfdset->fd_mutex);
264
265		poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
266
267		need_shrink = 0;
268		for (i = 0; i < numfds; i++) {
269			pthread_mutex_lock(&pfdset->fd_mutex);
270
271			pfdentry = &pfdset->fd[i];
272			fd = pfdentry->fd;
273			pfd = &pfdset->rwfds[i];
274
275			if (fd < 0) {
276				need_shrink = 1;
277				pthread_mutex_unlock(&pfdset->fd_mutex);
278				continue;
279			}
280
281			if (!pfd->revents) {
282				pthread_mutex_unlock(&pfdset->fd_mutex);
283				continue;
284			}
285
286			remove1 = remove2 = 0;
287
288			rcb = pfdentry->rcb;
289			wcb = pfdentry->wcb;
290			dat = pfdentry->dat;
291			pfdentry->busy = 1;
292
293			pthread_mutex_unlock(&pfdset->fd_mutex);
294
295			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
296				rcb(fd, dat, &remove1);
297			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
298				wcb(fd, dat, &remove2);
299			pfdentry->busy = 0;
300			/*
301			 * fdset_del needs to check busy flag.
302			 * We don't allow fdset_del to be called in callback
303			 * directly.
304			 */
305			/*
306			 * When we are to clean up the fd from fdset,
307			 * because the fd is closed in the cb,
308			 * the old fd val could be reused by when creates new
309			 * listen fd in another thread, we couldn't call
310			 * fdset_del.
311			 */
312			if (remove1 || remove2) {
313				pfdentry->fd = -1;
314				need_shrink = 1;
315			}
316		}
317
318		if (need_shrink)
319			fdset_shrink(pfdset);
320	}
321}
322