1/*
2 Itay Marom
3 Cisco Systems, Inc.
4*/
5
6/*
7Copyright (c) 2015-2015 Cisco Systems, Inc.
8
9Licensed under the Apache License, Version 2.0 (the "License");
10you may not use this file except in compliance with the License.
11You may obtain a copy of the License at
12
13    http://www.apache.org/licenses/LICENSE-2.0
14
15Unless required by applicable law or agreed to in writing, software
16distributed under the License is distributed on an "AS IS" BASIS,
17WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18See the License for the specific language governing permissions and
19limitations under the License.
20*/
21
22#ifndef __TREX_WATCHDOG_H__
23#define __TREX_WATCHDOG_H__
24
25#include <string>
26#include <vector>
27#include <thread>
28#include <mutex>
29#include <assert.h>
30
31#include "mbuf.h"
32#include "os_time.h"
33
34/**
35 * every thread creates its own monitor from its own memory
36 *
37 * @author imarom (19-Jun-16)
38 */
39class TrexMonitor {
40    friend class TrexWatchDog;
41
42public:
43
44    /**
45    * create a monitor
46    *
47    * @author imarom (31-May-16)
48    *
49    * @param name
50    * @param timeout
51    *
52    * @return int
53    */
54    void create(const std::string &name, double timeout_sec);
55
56    /**
57     * disable the monitor for 'time_sec'
58     * by default it will disable it for a long period of time
59     * (forever)
60     *
61     */
62    void disable(dsec_t time_sec = 1e9) {
63        set_timeout(time_sec);
64    }
65
66    /**
67     * re-enable a monitor after it was disabled
68     *
69     */
70    void enable() {
71        set_timeout(m_base_timeout_sec);
72    }
73
74    /**
75     * not thread safe
76     * call from current thread only
77     */
78    void io_begin() {
79        /**
80         * holds a ref cnt
81         * a thread might start many IO operations
82         */
83        m_io_ref_cnt++;
84        set_timeout(IO_TIMEOUT_SEC);
85    }
86
87     /**
88     * not thread safe
89     * call from current thread only
90     */
91    void io_end() {
92        assert(m_io_ref_cnt > 0);
93        m_io_ref_cnt--;
94        if (m_io_ref_cnt == 0) {
95            set_timeout(m_base_timeout_sec);
96        }
97    }
98
99    /**
100     * tickle the monitor - this should be called from the thread
101     * to avoid the watchdog from detecting a stuck thread
102     *
103     * @author imarom (19-Jun-16)
104     */
105    void tickle() {
106        /* to avoid useless writes - first check */
107        if (!m_tickled) {
108            m_tickled = true;
109        }
110    }
111
112    const std::string &get_name() const {
113        return m_name;
114    }
115
116    /* return how much time has passed since last tickle */
117    dsec_t get_interval(dsec_t now) const {
118        return (now - m_ts);
119    }
120
121
122    dsec_t get_timeout_sec() const {
123        return m_timeout_sec;
124    }
125
126
127private:
128
129    /**
130     * called by the watchdog to reset the monitor for a new round
131     *
132     */
133    void reset(dsec_t now) {
134        m_tickled = false;
135        m_ts      = now;
136    }
137
138
139    pthread_t get_tid() const {
140        return m_tid;
141    }
142
143    volatile bool is_tickled() const {
144        return m_tickled;
145    }
146
147    bool is_expired(dsec_t now) const {
148        return ( get_interval(now) > m_timeout_sec );
149    }
150
151    void set_timeout(double timeout_sec) {
152        /* before changing timeout we MUST tickle and memory fence o.w the main thread might crash */
153        tickle();
154        asm volatile("mfence" ::: "memory");
155        m_timeout_sec = timeout_sec;
156    }
157
158
159    /* write fields are first */
160    volatile bool    m_tickled;
161    int              m_handle;
162    dsec_t           m_ts;
163    double           m_timeout_sec;
164    double           m_base_timeout_sec;
165    pthread_t        m_tid;
166    std::string      m_name;
167
168    uint32_t         m_io_ref_cnt;
169
170    static const int IO_TIMEOUT_SEC = 30;
171
172} __rte_cache_aligned;
173
174
175/**
176 * a watchdog is a list of registered monitors
177 *
178 * @author imarom (19-Jun-16)
179 */
180class TrexWatchDog {
181public:
182
183    /**
184     * singleton entry
185     *
186     * @author imarom (19-Jun-16)
187     *
188     * @return TrexWatchDog&
189     */
190    static TrexWatchDog& getInstance() {
191        static TrexWatchDog instance;
192
193        return instance;
194    }
195
196    class IOFunction;
197
198    void init(bool enable);
199
200    /**
201     * get monitor of current thread if registered
202     * (NULL if not registered)
203     *
204     */
205    TrexMonitor * get_current_monitor();
206
207    /**
208     * add a monitor to the watchdog
209     * from now on this monitor will be watched
210     *
211     * @author imarom (19-Jun-16)
212     *
213     * @param monitor - a pointer to the object
214     *
215     */
216    void register_monitor(TrexMonitor *monitor);
217
218
219    /**
220     * start the watchdog
221     *
222     */
223    void start();
224
225
226    /**
227     * stop the watchdog
228     *
229     */
230    void stop();
231
232
233private:
234
235    TrexWatchDog() {
236        m_thread        = NULL;
237        m_enable        = false;
238        m_active        = false;
239        m_mon_count     = 0;
240    }
241
242    void register_signal();
243    void _main();
244
245    static const int           MAX_MONITORS = 100;
246    TrexMonitor               *m_monitors[MAX_MONITORS];
247    volatile int               m_mon_count;
248    std::mutex                 m_lock;
249
250    bool                       m_enable;
251    volatile bool              m_active;
252    std::thread               *m_thread;
253
254    static bool                g_signal_init;
255};
256
257class TrexWatchDog::IOFunction {
258public:
259    static void io_begin() {
260        TrexMonitor * cur_monitor = TrexWatchDog::getInstance().get_current_monitor();
261        if (cur_monitor != NULL) {
262            cur_monitor->io_begin();
263        }
264    }
265
266    static void io_end() {
267        TrexMonitor * cur_monitor = TrexWatchDog::getInstance().get_current_monitor();
268        if (cur_monitor != NULL) {
269            cur_monitor->io_end();
270        }
271    }
272
273    IOFunction() {
274        IOFunction::io_begin();
275    }
276
277    ~IOFunction() {
278        IOFunction::io_end();
279    }
280
281};
282
283#endif /* __TREX_WATCHDOG_H__ */
284