trex_watchdog.h revision 9ad36b3d
1/*
2 Itay Marom
3 Cisco Systems, Inc.
4*/
5
6/*
7Copyright (c) 2015-2015 Cisco Systems, Inc.
8
9Licensed under the Apache License, Version 2.0 (the "License");
10you may not use this file except in compliance with the License.
11You may obtain a copy of the License at
12
13    http://www.apache.org/licenses/LICENSE-2.0
14
15Unless required by applicable law or agreed to in writing, software
16distributed under the License is distributed on an "AS IS" BASIS,
17WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18See the License for the specific language governing permissions and
19limitations under the License.
20*/
21
22#ifndef __TREX_WATCHDOG_H__
23#define __TREX_WATCHDOG_H__
24
25#include <string>
26#include <vector>
27#include <thread>
28#include <mutex>
29#include <assert.h>
30
31#include "mbuf.h"
32#include "os_time.h"
33
34/**
35 * every thread creates its own monitor from its own memory
36 *
37 * @author imarom (19-Jun-16)
38 */
39class TrexMonitor {
40    friend class TrexWatchDog;
41
42public:
43
44    /**
45    * create a monitor
46    *
47    * @author imarom (31-May-16)
48    *
49    * @param name
50    * @param timeout
51    *
52    * @return int
53    */
54    void create(const std::string &name, double timeout_sec);
55
56    /**
57     * disable the monitor for 'time_sec'
58     * by default it will disable it for a long period of time
59     * (forever)
60     *
61     */
62    void disable(dsec_t time_sec = 1e9) {
63        set_timeout(time_sec);
64    }
65
66    /**
67     * re-enable a monitor after it was disabled
68     *
69     */
70    void enable() {
71        set_timeout(m_base_timeout_sec);
72    }
73
74    /**
75     * not thread safe
76     * call from current thread only
77     */
78    void io_begin() {
79        /**
80         * holds a ref cnt
81         * a thread might start many IO operations
82         */
83        m_io_ref_cnt++;
84        set_timeout(IO_TIMEOUT_SEC);
85    }
86
87     /**
88     * not thread safe
89     * call from current thread only
90     */
91    void io_end() {
92        assert(m_io_ref_cnt > 0);
93        m_io_ref_cnt--;
94        if (m_io_ref_cnt == 0) {
95            set_timeout(m_base_timeout_sec);
96        }
97    }
98
99    /**
100     * tickle the monitor - this should be called from the thread
101     * to avoid the watchdog from detecting a stuck thread
102     *
103     * @author imarom (19-Jun-16)
104     */
105    void tickle() {
106        /* to avoid useless writes - first check */
107        if (!m_tickled) {
108            m_tickled = true;
109        }
110    }
111
112    const std::string &get_name() const {
113        return m_name;
114    }
115
116    /* return how much time has passed since last tickle */
117    dsec_t get_interval(dsec_t now) const {
118        return (now - m_ts);
119    }
120
121
122    dsec_t get_timeout_sec() const {
123        return m_timeout_sec;
124    }
125
126
127private:
128
129    /**
130     * called by the watchdog to reset the monitor for a new round
131     *
132     */
133    void reset(dsec_t now) {
134        m_tickled = false;
135        m_ts      = now;
136    }
137
138
139    pthread_t get_tid() const {
140        return m_tid;
141    }
142
143    volatile bool is_tickled() const {
144        return m_tickled;
145    }
146
147    bool is_expired(dsec_t now) const {
148        return ( get_interval(now) > m_timeout_sec );
149    }
150
151    void set_timeout(double timeout_sec) {
152        /* before changing timeout we MUST tickle and memory fence o.w the main thread might crash */
153        tickle();
154        asm volatile("mfence" ::: "memory");
155        m_timeout_sec = timeout_sec;
156    }
157
158
159    /* write fields are first */
160    volatile bool    m_tickled;
161    int              m_handle;
162    dsec_t           m_ts;
163    double           m_timeout_sec;
164    double           m_base_timeout_sec;
165    pthread_t        m_tid;
166    std::string      m_name;
167
168    uint32_t         m_io_ref_cnt;
169
170    static const int IO_TIMEOUT_SEC = 30;
171
172} __rte_cache_aligned;
173
174
175/**
176 * a watchdog is a list of registered monitors
177 *
178 * @author imarom (19-Jun-16)
179 */
180class TrexWatchDog {
181public:
182
183    /**
184     * singleton entry
185     *
186     * @author imarom (19-Jun-16)
187     *
188     * @return TrexWatchDog&
189     */
190    static TrexWatchDog& getInstance() {
191        static TrexWatchDog instance;
192
193        return instance;
194    }
195
196    void init(bool enable);
197
198    /**
199     * get monitor of current thread if registered
200     * (NULL if not registered)
201     *
202     */
203    TrexMonitor * get_current_monitor();
204
205    /**
206     * add a monitor to the watchdog
207     * from now on this monitor will be watched
208     *
209     * @author imarom (19-Jun-16)
210     *
211     * @param monitor - a pointer to the object
212     *
213     */
214    void register_monitor(TrexMonitor *monitor);
215
216
217    /**
218     * start the watchdog
219     *
220     */
221    void start();
222
223
224    /**
225     * stop the watchdog
226     *
227     */
228    void stop();
229
230
231private:
232
233    TrexWatchDog() {
234        m_thread        = NULL;
235        m_enable        = false;
236        m_active        = false;
237        m_mon_count     = 0;
238    }
239
240    void register_signal();
241    void _main();
242
243    static const int           MAX_MONITORS = 100;
244    TrexMonitor               *m_monitors[MAX_MONITORS];
245    volatile int               m_mon_count;
246    std::mutex                 m_lock;
247
248    bool                       m_enable;
249    volatile bool              m_active;
250    std::thread               *m_thread;
251
252    static bool                g_signal_init;
253};
254
255
256#endif /* __TREX_WATCHDOG_H__ */
257