1
2/*
3 * Copyright (C) Roman Arutyunyan
4 * Copyright (C) Nginx, Inc.
5 */
6
7
8#include <ngx_config.h>
9#include <ngx_core.h>
10#include <ngx_stream.h>
11
12
13typedef struct {
14    uint32_t                              hash;
15    ngx_str_t                            *server;
16} ngx_stream_upstream_chash_point_t;
17
18
19typedef struct {
20    ngx_uint_t                            number;
21    ngx_stream_upstream_chash_point_t     point[1];
22} ngx_stream_upstream_chash_points_t;
23
24
25typedef struct {
26    ngx_stream_complex_value_t            key;
27    ngx_stream_upstream_chash_points_t   *points;
28} ngx_stream_upstream_hash_srv_conf_t;
29
30
31typedef struct {
32    /* the round robin data must be first */
33    ngx_stream_upstream_rr_peer_data_t    rrp;
34    ngx_stream_upstream_hash_srv_conf_t  *conf;
35    ngx_str_t                             key;
36    ngx_uint_t                            tries;
37    ngx_uint_t                            rehash;
38    uint32_t                              hash;
39    ngx_event_get_peer_pt                 get_rr_peer;
40} ngx_stream_upstream_hash_peer_data_t;
41
42
43static ngx_int_t ngx_stream_upstream_init_hash(ngx_conf_t *cf,
44    ngx_stream_upstream_srv_conf_t *us);
45static ngx_int_t ngx_stream_upstream_init_hash_peer(ngx_stream_session_t *s,
46    ngx_stream_upstream_srv_conf_t *us);
47static ngx_int_t ngx_stream_upstream_get_hash_peer(ngx_peer_connection_t *pc,
48    void *data);
49
50static ngx_int_t ngx_stream_upstream_init_chash(ngx_conf_t *cf,
51    ngx_stream_upstream_srv_conf_t *us);
52static int ngx_libc_cdecl
53    ngx_stream_upstream_chash_cmp_points(const void *one, const void *two);
54static ngx_uint_t ngx_stream_upstream_find_chash_point(
55    ngx_stream_upstream_chash_points_t *points, uint32_t hash);
56static ngx_int_t ngx_stream_upstream_init_chash_peer(ngx_stream_session_t *s,
57    ngx_stream_upstream_srv_conf_t *us);
58static ngx_int_t ngx_stream_upstream_get_chash_peer(ngx_peer_connection_t *pc,
59    void *data);
60
61static void *ngx_stream_upstream_hash_create_conf(ngx_conf_t *cf);
62static char *ngx_stream_upstream_hash(ngx_conf_t *cf, ngx_command_t *cmd,
63    void *conf);
64
65
66static ngx_command_t  ngx_stream_upstream_hash_commands[] = {
67
68    { ngx_string("hash"),
69      NGX_STREAM_UPS_CONF|NGX_CONF_TAKE12,
70      ngx_stream_upstream_hash,
71      NGX_STREAM_SRV_CONF_OFFSET,
72      0,
73      NULL },
74
75      ngx_null_command
76};
77
78
79static ngx_stream_module_t  ngx_stream_upstream_hash_module_ctx = {
80    NULL,                                  /* preconfiguration */
81    NULL,                                  /* postconfiguration */
82
83    NULL,                                  /* create main configuration */
84    NULL,                                  /* init main configuration */
85
86    ngx_stream_upstream_hash_create_conf,  /* create server configuration */
87    NULL                                   /* merge server configuration */
88};
89
90
91ngx_module_t  ngx_stream_upstream_hash_module = {
92    NGX_MODULE_V1,
93    &ngx_stream_upstream_hash_module_ctx,  /* module context */
94    ngx_stream_upstream_hash_commands,     /* module directives */
95    NGX_STREAM_MODULE,                     /* module type */
96    NULL,                                  /* init master */
97    NULL,                                  /* init module */
98    NULL,                                  /* init process */
99    NULL,                                  /* init thread */
100    NULL,                                  /* exit thread */
101    NULL,                                  /* exit process */
102    NULL,                                  /* exit master */
103    NGX_MODULE_V1_PADDING
104};
105
106
107static ngx_int_t
108ngx_stream_upstream_init_hash(ngx_conf_t *cf,
109    ngx_stream_upstream_srv_conf_t *us)
110{
111    if (ngx_stream_upstream_init_round_robin(cf, us) != NGX_OK) {
112        return NGX_ERROR;
113    }
114
115    us->peer.init = ngx_stream_upstream_init_hash_peer;
116
117    return NGX_OK;
118}
119
120
121static ngx_int_t
122ngx_stream_upstream_init_hash_peer(ngx_stream_session_t *s,
123    ngx_stream_upstream_srv_conf_t *us)
124{
125    ngx_stream_upstream_hash_srv_conf_t   *hcf;
126    ngx_stream_upstream_hash_peer_data_t  *hp;
127
128    hp = ngx_palloc(s->connection->pool,
129                    sizeof(ngx_stream_upstream_hash_peer_data_t));
130    if (hp == NULL) {
131        return NGX_ERROR;
132    }
133
134    s->upstream->peer.data = &hp->rrp;
135
136    if (ngx_stream_upstream_init_round_robin_peer(s, us) != NGX_OK) {
137        return NGX_ERROR;
138    }
139
140    s->upstream->peer.get = ngx_stream_upstream_get_hash_peer;
141
142    hcf = ngx_stream_conf_upstream_srv_conf(us,
143                                            ngx_stream_upstream_hash_module);
144
145    if (ngx_stream_complex_value(s, &hcf->key, &hp->key) != NGX_OK) {
146        return NGX_ERROR;
147    }
148
149    ngx_log_debug1(NGX_LOG_DEBUG_STREAM, s->connection->log, 0,
150                   "upstream hash key:\"%V\"", &hp->key);
151
152    hp->conf = hcf;
153    hp->tries = 0;
154    hp->rehash = 0;
155    hp->hash = 0;
156    hp->get_rr_peer = ngx_stream_upstream_get_round_robin_peer;
157
158    return NGX_OK;
159}
160
161
162static ngx_int_t
163ngx_stream_upstream_get_hash_peer(ngx_peer_connection_t *pc, void *data)
164{
165    ngx_stream_upstream_hash_peer_data_t *hp = data;
166
167    time_t                          now;
168    u_char                          buf[NGX_INT_T_LEN];
169    size_t                          size;
170    uint32_t                        hash;
171    ngx_int_t                       w;
172    uintptr_t                       m;
173    ngx_uint_t                      n, p;
174    ngx_stream_upstream_rr_peer_t  *peer;
175
176    ngx_log_debug1(NGX_LOG_DEBUG_STREAM, pc->log, 0,
177                   "get hash peer, try: %ui", pc->tries);
178
179    ngx_stream_upstream_rr_peers_wlock(hp->rrp.peers);
180
181    if (hp->tries > 20 || hp->rrp.peers->single) {
182        ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
183        return hp->get_rr_peer(pc, &hp->rrp);
184    }
185
186    now = ngx_time();
187
188    pc->connection = NULL;
189
190    for ( ;; ) {
191
192        /*
193         * Hash expression is compatible with Cache::Memcached:
194         * ((crc32([REHASH] KEY) >> 16) & 0x7fff) + PREV_HASH
195         * with REHASH omitted at the first iteration.
196         */
197
198        ngx_crc32_init(hash);
199
200        if (hp->rehash > 0) {
201            size = ngx_sprintf(buf, "%ui", hp->rehash) - buf;
202            ngx_crc32_update(&hash, buf, size);
203        }
204
205        ngx_crc32_update(&hash, hp->key.data, hp->key.len);
206        ngx_crc32_final(hash);
207
208        hash = (hash >> 16) & 0x7fff;
209
210        hp->hash += hash;
211        hp->rehash++;
212
213        w = hp->hash % hp->rrp.peers->total_weight;
214        peer = hp->rrp.peers->peer;
215        p = 0;
216
217        while (w >= peer->weight) {
218            w -= peer->weight;
219            peer = peer->next;
220            p++;
221        }
222
223        n = p / (8 * sizeof(uintptr_t));
224        m = (uintptr_t) 1 << p % (8 * sizeof(uintptr_t));
225
226        if (hp->rrp.tried[n] & m) {
227            goto next;
228        }
229
230        ngx_log_debug2(NGX_LOG_DEBUG_STREAM, pc->log, 0,
231                       "get hash peer, value:%uD, peer:%ui", hp->hash, p);
232
233        if (peer->down) {
234            goto next;
235        }
236
237        if (peer->max_fails
238            && peer->fails >= peer->max_fails
239            && now - peer->checked <= peer->fail_timeout)
240        {
241            goto next;
242        }
243
244        if (peer->max_conns && peer->conns >= peer->max_conns) {
245            goto next;
246        }
247
248        break;
249
250    next:
251
252        if (++hp->tries > 20) {
253            ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
254            return hp->get_rr_peer(pc, &hp->rrp);
255        }
256    }
257
258    hp->rrp.current = peer;
259
260    pc->sockaddr = peer->sockaddr;
261    pc->socklen = peer->socklen;
262    pc->name = &peer->name;
263
264    peer->conns++;
265
266    if (now - peer->checked > peer->fail_timeout) {
267        peer->checked = now;
268    }
269
270    ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
271
272    hp->rrp.tried[n] |= m;
273
274    return NGX_OK;
275}
276
277
278static ngx_int_t
279ngx_stream_upstream_init_chash(ngx_conf_t *cf,
280    ngx_stream_upstream_srv_conf_t *us)
281{
282    u_char                               *host, *port, c;
283    size_t                                host_len, port_len, size;
284    uint32_t                              hash, base_hash;
285    ngx_str_t                            *server;
286    ngx_uint_t                            npoints, i, j;
287    ngx_stream_upstream_rr_peer_t        *peer;
288    ngx_stream_upstream_rr_peers_t       *peers;
289    ngx_stream_upstream_chash_points_t   *points;
290    ngx_stream_upstream_hash_srv_conf_t  *hcf;
291    union {
292        uint32_t                          value;
293        u_char                            byte[4];
294    } prev_hash;
295
296    if (ngx_stream_upstream_init_round_robin(cf, us) != NGX_OK) {
297        return NGX_ERROR;
298    }
299
300    us->peer.init = ngx_stream_upstream_init_chash_peer;
301
302    peers = us->peer.data;
303    npoints = peers->total_weight * 160;
304
305    size = sizeof(ngx_stream_upstream_chash_points_t)
306           + sizeof(ngx_stream_upstream_chash_point_t) * (npoints - 1);
307
308    points = ngx_palloc(cf->pool, size);
309    if (points == NULL) {
310        return NGX_ERROR;
311    }
312
313    points->number = 0;
314
315    for (peer = peers->peer; peer; peer = peer->next) {
316        server = &peer->server;
317
318        /*
319         * Hash expression is compatible with Cache::Memcached::Fast:
320         * crc32(HOST \0 PORT PREV_HASH).
321         */
322
323        if (server->len >= 5
324            && ngx_strncasecmp(server->data, (u_char *) "unix:", 5) == 0)
325        {
326            host = server->data + 5;
327            host_len = server->len - 5;
328            port = NULL;
329            port_len = 0;
330            goto done;
331        }
332
333        for (j = 0; j < server->len; j++) {
334            c = server->data[server->len - j - 1];
335
336            if (c == ':') {
337                host = server->data;
338                host_len = server->len - j - 1;
339                port = server->data + server->len - j;
340                port_len = j;
341                goto done;
342            }
343
344            if (c < '0' || c > '9') {
345                break;
346            }
347        }
348
349        host = server->data;
350        host_len = server->len;
351        port = NULL;
352        port_len = 0;
353
354    done:
355
356        ngx_crc32_init(base_hash);
357        ngx_crc32_update(&base_hash, host, host_len);
358        ngx_crc32_update(&base_hash, (u_char *) "", 1);
359        ngx_crc32_update(&base_hash, port, port_len);
360
361        prev_hash.value = 0;
362        npoints = peer->weight * 160;
363
364        for (j = 0; j < npoints; j++) {
365            hash = base_hash;
366
367            ngx_crc32_update(&hash, prev_hash.byte, 4);
368            ngx_crc32_final(hash);
369
370            points->point[points->number].hash = hash;
371            points->point[points->number].server = server;
372            points->number++;
373
374#if (NGX_HAVE_LITTLE_ENDIAN)
375            prev_hash.value = hash;
376#else
377            prev_hash.byte[0] = (u_char) (hash & 0xff);
378            prev_hash.byte[1] = (u_char) ((hash >> 8) & 0xff);
379            prev_hash.byte[2] = (u_char) ((hash >> 16) & 0xff);
380            prev_hash.byte[3] = (u_char) ((hash >> 24) & 0xff);
381#endif
382        }
383    }
384
385    ngx_qsort(points->point,
386              points->number,
387              sizeof(ngx_stream_upstream_chash_point_t),
388              ngx_stream_upstream_chash_cmp_points);
389
390    for (i = 0, j = 1; j < points->number; j++) {
391        if (points->point[i].hash != points->point[j].hash) {
392            points->point[++i] = points->point[j];
393        }
394    }
395
396    points->number = i + 1;
397
398    hcf = ngx_stream_conf_upstream_srv_conf(us,
399                                            ngx_stream_upstream_hash_module);
400    hcf->points = points;
401
402    return NGX_OK;
403}
404
405
406static int ngx_libc_cdecl
407ngx_stream_upstream_chash_cmp_points(const void *one, const void *two)
408{
409    ngx_stream_upstream_chash_point_t *first =
410                                     (ngx_stream_upstream_chash_point_t *) one;
411    ngx_stream_upstream_chash_point_t *second =
412                                     (ngx_stream_upstream_chash_point_t *) two;
413
414    if (first->hash < second->hash) {
415        return -1;
416
417    } else if (first->hash > second->hash) {
418        return 1;
419
420    } else {
421        return 0;
422    }
423}
424
425
426static ngx_uint_t
427ngx_stream_upstream_find_chash_point(ngx_stream_upstream_chash_points_t *points,
428    uint32_t hash)
429{
430    ngx_uint_t                          i, j, k;
431    ngx_stream_upstream_chash_point_t  *point;
432
433    /* find first point >= hash */
434
435    point = &points->point[0];
436
437    i = 0;
438    j = points->number;
439
440    while (i < j) {
441        k = (i + j) / 2;
442
443        if (hash > point[k].hash) {
444            i = k + 1;
445
446        } else if (hash < point[k].hash) {
447            j = k;
448
449        } else {
450            return k;
451        }
452    }
453
454    return i;
455}
456
457
458static ngx_int_t
459ngx_stream_upstream_init_chash_peer(ngx_stream_session_t *s,
460    ngx_stream_upstream_srv_conf_t *us)
461{
462    uint32_t                               hash;
463    ngx_stream_upstream_hash_srv_conf_t   *hcf;
464    ngx_stream_upstream_hash_peer_data_t  *hp;
465
466    if (ngx_stream_upstream_init_hash_peer(s, us) != NGX_OK) {
467        return NGX_ERROR;
468    }
469
470    s->upstream->peer.get = ngx_stream_upstream_get_chash_peer;
471
472    hp = s->upstream->peer.data;
473    hcf = ngx_stream_conf_upstream_srv_conf(us,
474                                            ngx_stream_upstream_hash_module);
475
476    hash = ngx_crc32_long(hp->key.data, hp->key.len);
477
478    ngx_stream_upstream_rr_peers_rlock(hp->rrp.peers);
479
480    hp->hash = ngx_stream_upstream_find_chash_point(hcf->points, hash);
481
482    ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
483
484    return NGX_OK;
485}
486
487
488static ngx_int_t
489ngx_stream_upstream_get_chash_peer(ngx_peer_connection_t *pc, void *data)
490{
491    ngx_stream_upstream_hash_peer_data_t *hp = data;
492
493    time_t                                now;
494    intptr_t                              m;
495    ngx_str_t                            *server;
496    ngx_int_t                             total;
497    ngx_uint_t                            i, n, best_i;
498    ngx_stream_upstream_rr_peer_t        *peer, *best;
499    ngx_stream_upstream_chash_point_t    *point;
500    ngx_stream_upstream_chash_points_t   *points;
501    ngx_stream_upstream_hash_srv_conf_t  *hcf;
502
503    ngx_log_debug1(NGX_LOG_DEBUG_STREAM, pc->log, 0,
504                   "get consistent hash peer, try: %ui", pc->tries);
505
506    ngx_stream_upstream_rr_peers_wlock(hp->rrp.peers);
507
508    pc->connection = NULL;
509
510    now = ngx_time();
511    hcf = hp->conf;
512
513    points = hcf->points;
514    point = &points->point[0];
515
516    for ( ;; ) {
517        server = point[hp->hash % points->number].server;
518
519        ngx_log_debug2(NGX_LOG_DEBUG_STREAM, pc->log, 0,
520                       "consistent hash peer:%uD, server:\"%V\"",
521                       hp->hash, server);
522
523        best = NULL;
524        best_i = 0;
525        total = 0;
526
527        for (peer = hp->rrp.peers->peer, i = 0;
528             peer;
529             peer = peer->next, i++)
530        {
531            n = i / (8 * sizeof(uintptr_t));
532            m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));
533
534            if (hp->rrp.tried[n] & m) {
535                continue;
536            }
537
538            if (peer->down) {
539                continue;
540            }
541
542            if (peer->server.len != server->len
543                || ngx_strncmp(peer->server.data, server->data, server->len)
544                   != 0)
545            {
546                continue;
547            }
548
549            if (peer->max_fails
550                && peer->fails >= peer->max_fails
551                && now - peer->checked <= peer->fail_timeout)
552            {
553                continue;
554            }
555
556            if (peer->max_conns && peer->conns >= peer->max_conns) {
557                continue;
558            }
559
560            peer->current_weight += peer->effective_weight;
561            total += peer->effective_weight;
562
563            if (peer->effective_weight < peer->weight) {
564                peer->effective_weight++;
565            }
566
567            if (best == NULL || peer->current_weight > best->current_weight) {
568                best = peer;
569                best_i = i;
570            }
571        }
572
573        if (best) {
574            best->current_weight -= total;
575            break;
576        }
577
578        hp->hash++;
579        hp->tries++;
580
581        if (hp->tries >= points->number) {
582            pc->name = hp->rrp.peers->name;
583            ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
584            return NGX_BUSY;
585        }
586    }
587
588    hp->rrp.current = best;
589
590    pc->sockaddr = best->sockaddr;
591    pc->socklen = best->socklen;
592    pc->name = &best->name;
593
594    best->conns++;
595
596    if (now - best->checked > best->fail_timeout) {
597        best->checked = now;
598    }
599
600    ngx_stream_upstream_rr_peers_unlock(hp->rrp.peers);
601
602    n = best_i / (8 * sizeof(uintptr_t));
603    m = (uintptr_t) 1 << best_i % (8 * sizeof(uintptr_t));
604
605    hp->rrp.tried[n] |= m;
606
607    return NGX_OK;
608}
609
610
611static void *
612ngx_stream_upstream_hash_create_conf(ngx_conf_t *cf)
613{
614    ngx_stream_upstream_hash_srv_conf_t  *conf;
615
616    conf = ngx_palloc(cf->pool, sizeof(ngx_stream_upstream_hash_srv_conf_t));
617    if (conf == NULL) {
618        return NULL;
619    }
620
621    conf->points = NULL;
622
623    return conf;
624}
625
626
627static char *
628ngx_stream_upstream_hash(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
629{
630    ngx_stream_upstream_hash_srv_conf_t  *hcf = conf;
631
632    ngx_str_t                           *value;
633    ngx_stream_upstream_srv_conf_t      *uscf;
634    ngx_stream_compile_complex_value_t   ccv;
635
636    value = cf->args->elts;
637
638    ngx_memzero(&ccv, sizeof(ngx_stream_compile_complex_value_t));
639
640    ccv.cf = cf;
641    ccv.value = &value[1];
642    ccv.complex_value = &hcf->key;
643
644    if (ngx_stream_compile_complex_value(&ccv) != NGX_OK) {
645        return NGX_CONF_ERROR;
646    }
647
648    uscf = ngx_stream_conf_get_module_srv_conf(cf, ngx_stream_upstream_module);
649
650    if (uscf->peer.init_upstream) {
651        ngx_conf_log_error(NGX_LOG_WARN, cf, 0,
652                           "load balancing method redefined");
653    }
654
655    uscf->flags = NGX_STREAM_UPSTREAM_CREATE
656                  |NGX_STREAM_UPSTREAM_WEIGHT
657                  |NGX_STREAM_UPSTREAM_MAX_CONNS
658                  |NGX_STREAM_UPSTREAM_MAX_FAILS
659                  |NGX_STREAM_UPSTREAM_FAIL_TIMEOUT
660                  |NGX_STREAM_UPSTREAM_DOWN;
661
662    if (cf->args->nelts == 2) {
663        uscf->peer.init_upstream = ngx_stream_upstream_init_hash;
664
665    } else if (ngx_strcmp(value[2].data, "consistent") == 0) {
666        uscf->peer.init_upstream = ngx_stream_upstream_init_chash;
667
668    } else {
669        ngx_conf_log_error(NGX_LOG_EMERG, cf, 0,
670                           "invalid parameter \"%V\"", &value[2]);
671        return NGX_CONF_ERROR;
672    }
673
674    return NGX_CONF_OK;
675}
676