· 6 years ago · Jan 09, 2020, 10:02 PM
1#ifndef _COMMON_NET_NG_NET_H
2#define _COMMON_NET_NG_NET_H
3
4/*
5 * An RPC API with connection sharing across multiple threads.
6 *
7 * Currently only supports HTTP/1.1 over TCP or TLS, but we are planning
8 * on adding support for other application and transport protocols, such
9 * as HTTP/2, HTTP/3 over QUIC, etc...
10 *
11 * Only the least connection used load-balancing algorithm is implemented
12 * for now, but support for other methodologies such as round-robin will
13 * be added in the future. All load-balancing algorithms support weights
14 * and are implemented using haproxy's elastic binary trees [1].
15 *
16 * Connection sharing is implemented using a semaphore eventfd. Once a
17 * worker thread has no use for a connection anymore, it will push it on
18 * the per-node free-list and write 1 to the eventfd semaphore. This will,
19 * in turn, wake up a thread waiting for a connection on that eventfd if
20 * any. Connections kept on the free-list are still being monitored by the
21 * worker thread that initially put it there. They can, however, be stolen
22 * at any time by another worker. When this happens, a cross-thread
23 * notification is sent from the stealing worker to the monitoring worker.
24 *
25 * Notifications are implemented using MPSC FIFOs coupled with eventfd for
26 * wake-ups. Each notification is acknowledged using a secondary eventfd
27 * to trigger a callback on the sending worker, informing it that the
28 * notification has been fully processed by its recipient(s).
29 *
30 * Notifications are also used by the master thread to synchronize the
31 * nodes in the cluster. In this case, notifications are brocasted from
32 * the master thread to all the other workers.
33 *
34 * For node insertion, a NODE_READY notification is sent to all the
35 * workers, so that they can drain any pending requests (which can
36 * happen if the cluster was empty). If health checks are enabled,
37 * the master thread handles sending them and will send the NODE_READY
38 * notification only once the node passes them.
39 *
40 * For node removal, we use a 2-phased procedure to support removals with
41 * a timeout. First, a NODE_QUIESCE notification is sent to instruct workers
42 * that we are trying to remove a node. If and when they have completed all
43 * pending requests on that node, workers will acknowledge the notification.
44 *
45 * If all workers have acknowledged the NODE_QUIESCE notification, or if the
46 * timeout expired, the master then broadcasts a NODE_SHUTDOWN notification
47 * for that node, instructing workers to abort any pending rquests and
48 * connections, and close and destroy any idle connection that they own.
49 *
50 * Cluster shutdown is just implemented as a removal of all nodes, and so
51 * it also supports a timeout.
52 *
53 *
54 * Typical API usage:
55 *
56 * // The control thread
57 * cl = cl_create(...); // cluster creation
58 *
59 * // configuration phase
60 * cl_set_max_conns(cl, ...);
61 * cl_set_conn_timeout(cl, ...);
62 * cl_set_conn_max_idle(cl, ...);
63 * cl_set_conn_retry_backoff(cl, ...);
64 * cl_set_health_check(cl, ...);
65 * [...]
66 *
67 * // start the cluster
68 * cl_start();
69 *
70 * // Worker threads
71 * static void
72 * req_handler(enum cl_req_status status,
73 * const struct cl_response *resp, void *arg)
74 * {
75 * if (status != CL_REQ_COMPLETED) {
76 * // error handling
77* return;
78 * }
79 * if (resp->code != HTTP_OK) {
80 * // error handling
81 * return;
82 * }
83 *
84 * // ...
85 * cl_response_destroy(resp);
86 * }
87 *
88 * timeout = (struct timeval){ .tv_sec = 5 };
89 * req = cl_request_create(cl, HTTP_GET, "/foo", &tv, NULL, req_handler, NULL);
90 * headers = cl_request_headers(req);
91 * an_kv_add(headers, "Some-Header", "somevalue");
92 * error = cl_request(cl, NULL, req); // non-targeted request
93 * if (error) {
94 * cl_request_destroy(req);
95 * }
96 *
97 * Additionally, nodes are inserted aysnchronously into the cluster using
98 * cl_insert() from the master thread (the thread which created the cluster).
99 * In most cases, we expect people will use the an_adns or an_discovery
100 * adapters to take care of populating the cluster automatically given
101 * a DNS name or an an_discovery application name, respectively. However,
102 * if you wish to send targeted requests (requests to a specific node in
103 * the cluster), you will need to remember the handle returned by
104 * cl_insert() and pass it as the second parameter to cl_request().
105 *
106 * Nodes can be removed from the cluster similarly from the master thread
107* using cl_remove() and the handle previously returned by cl_insert().
108 *
109 * [1] http://wtarreau.blogspot.com/2011/12/elastic-binary-trees-ebtree.html
110 */
111#include <pthread.h>
112#include <stdbool.h>
113
114/* For enum http_method */
115#include "common/net/protocol/http-parser/http_parser.h"
116/* For enum cl_transport_id (transport protocol identifiers) */
117#include "common/net-ng/transport.h"
118
119struct an_instance;
120struct an_kv_list;
121struct an_rbuf;
122struct event_base;
123struct evbuffer;
124struct threadset;
125struct timeval;
126struct tcp_info;
127
128/* Cluster creation flags */
129#define CL_PREFERV4 (1U << 0) /* Prefer IPv4 over IPv6 */
130#define CL_NODELAY (1U << 1) /* Disable send-side buffering */
131#define CL_FASTOPEN (1U << 2) /* Use TCP Fast Open */
132/*
133 * Force sending the Connection: keep-alive header, despite it
134 * being the default in HTTP/1.1. It is useful to work with some
135 * broken HTTP implementations.
136 */
137#define CL_FORCEKEEPALIVE (1U << 3)
138#define CL_NOKEEPALIVE (1U << 4)
139/*
140 * Soft-remove means that we do not attempt to remove hosts that
141 * are in the purgatory right away, but wait for them to come up
142 * and then remove them. Mostly used by the test tool.
143 */
144#define CL_SOFTREMOVE (1U << 5)
145/*
146 * The cluster instance assumes ownership of the body buffers passed
147 * to cl_request_create() and destroys them when it no longer has a
148 * use for them. If this is set, it is the caller's responsibility to
149 * make sure those body buffers aren't accessed after the request
150 * callback returns.
151 */
152#define CL_OWN_BODY (1U << 6)
153/*
154 * Disable certificate checks. Only applies to the TLS transport.
155 * Do not use this unless you are sure you need it, as it could
156 * lead to security vulnerabilities.
157 */
158#define CL_NO_CERT_CHECKS (1U << 7)
159
160#define CL_WEIGHT_MAX 256
161
162/* All load-balancing schemes support weights. */
163enum cl_lb_scheme {
164 CL_LB_LEASTCONN, /* Least connection used (default) */
165 CL_LB_RR, /* Round-robin selection */
166 CL_LB_MAX
167};
168
169enum cl_req_proto {
170 CL_PROTO_HTTP1, /* HTTP/1.1 */
171 CL_PROTO_MAX
172};
173
174/* Exponential backoff algorithms. */
175enum cl_backoff_type {
176 CL_BACKOFF_EXP, /* Normal exponential backoff, no jitter */
177 CL_BACKOFF_JITTER /* Exponential backoff with (full) jitter */
178};
179
180enum cl_req_status {
181 CL_REQ_COMPLETED, /* The request completed normally */
182 CL_REQ_FAILED, /* The request failed */
183 CL_REQ_TIMEDOUT /* Deadline was reached */
184};
185
186/* Cluster-wide statistics */
187enum cl_stat {
188 /* Connections related counters */
189 CL_STAT_CONNS_INITIATED, /* Connections we initiated */
190 CL_STAT_CONNS_TIMEOUT, /* Connections which timed out */
191 CL_STAT_CONNS_FAILED, /* Connections refused by peer */
192 CL_STAT_CONNS_RESET, /* Connections which got reset */
193 CL_STAT_CONNS_CLOSED, /* Connections which got Connection: close */
194 CL_STAT_CONNS_DISCARDED, /* Connections discarded because of lifetime
195 or # of requests restrictions */
196 CL_STAT_CONNS_ACTIVE, /* # of currently active connections */
197 CL_STAT_CONNS_ESTABLISHED, /* # of established connections */
198 /*
199 * Connection system errors. For TCP, This can happen if we run out of
200 * file descriptors momentarily or if we have exhausted all the ports
201 * ports in the ephemeral range. For TLS, it's likely a library error.
202 */
203 CL_STAT_CONNS_SYSERRORS,
204
205 /* Requests related counters */
206 CL_STAT_REQS_SEEN, /* Requests seen. This should equal the sum
207 of all other request counters below. */
208 CL_STAT_REQS_COMPLETED, /* Requests completed normally */
209 CL_STAT_REQS_PARSE_ERROR, /* Requests which got a parse error */
210 CL_STAT_REQS_READ_ERROR, /* Requests which got a read error */
211 CL_STAT_REQS_WRITE_ERROR, /* Requests which got a write error */
212 CL_STAT_REQS_TRANSPORT_ERROR, /* Requests which got a transport error */
213 CL_STAT_REQS_TIMEDOUT, /* Requests which timed out */
214 CL_STAT_REQS_RETRIED, /* Requests retried (also counted in total) */
215 CL_STAT_REQS_ABORTED, /* Aborted requests because of shutdown */
216 CL_STAT_REQS_FAILED, /* Failed requests (transport level) */
217
218 /*
219 * Average and maximum time (in microseconds) that requests spend
220 * waiting before we start sending them on a connection.
221 */
222 CL_STAT_REQS_WAIT_AVG,
223 CL_STAT_REQS_WAIT_MAX,
224 CL_STAT_MAX
225};
226
227/* A handle identifying a node. */
228struct cl_node;
229
230struct cl_request;
231
232/*
233 * The response object passed to the response callback.
234 *
235 * The request field is guaranteed to be valid at all times.
236 *
237 * The node field can be NULL if a request times out before it got
238 * a chance to talk to a node.
239 *
240 * All other fields (code, status, headers, and body) are only valid if
241 * the request status is CL_REQ_COMPLETED.
242 */
243struct cl_response {
244 struct cl_request *request; /* The origin request. */
245 struct cl_node *node; /* The node that we last contacted. */
246 unsigned int code; /* The status code of the response. */
247 char *status; /* The reason phrase of the response. */
248 struct an_kv_list *headers; /* The headers of the response. */
249 struct an_rbuf *body; /* The body of the response. */
250};
251
252/*
253 * A response callback.
254 *
255 * The response pointer is only valid for the duration of the callback
256 * and must not be accessed after it has returned.
257 *
258 * The body buffer, if non-NULL, is owned by the callback and it is
259 * thus its responsibility to free it using an_buf_destroy(), or, more
260 * simply, by calling cl_response_destroy(). This is for performance
261 * reasons: if the API did this for us and that we needed the response
262 * body to outlive the callback, we would be forced to allocate more
263 * memory and copy the data around.
264 *
265 * The response structure lives on the stack and so must not be
266 * referenced outside of the callback code.
267 */
268typedef void (*cl_response_cb_t)(enum cl_req_status,
269 const struct cl_response *, void *);
270
271/*
272 * Collect information about the TCP connection on which this
273 * response was received. See tcp(7) for more information.
274 */
275int cl_response_get_tcp_info(const struct cl_response *, struct tcp_info *);
276
277/* Return the time at which cl_request() was called. */
278void cl_response_get_start(const struct cl_response *, struct timeval *);
279
280/* Return the cluster handle this request was sent to. */
281struct cluster *cl_response_get_cluster(const struct cl_response *);
282
283/*
284 * Release memory associated with a response object.
285 *
286 * At the time of this writing, this only releases the body buffer but
287 * this might change in the future. Only call this function if you do
288 * NOT need to access the body buffer after the user callback has run.
289 */
290void cl_response_destroy(const struct cl_response *);
291
292/*
293 * Logical object we make requests to.
294*
295 * A cluster is a collection of nodes along with a load-balancing scheme
296 * scheme, a transport protocol (TCP or TLS for now, SCTP comes to mind)
297 * and an application request protocol (HTTP for now, HTTP/2 at some point).
298 */
299struct cluster;
300
301/*
302 * Library initialization.
303 *
304 * The threadset object acts as a handle on the API instance, and
305 * must be subsequently passed to cl_thread_register() by any thread
306 * intending to use this API, except for the thread calling this
307 * function (the master).
308 */
309struct threadset *cl_lib_init(struct event_base *, unsigned int);
310
311/* Release our API handle */
312void cl_lib_fini(struct threadset *);
313
314/* Worker thread initialization */
315void cl_thread_register(struct threadset *, struct event_base *);
316
317/* Cluster creation and configuration */
318struct cluster *cl_create(const char *, struct threadset *, int);
319
320/* Configuration functions - to be called before cl_start(). */
321
322/* Transfer ownership of the cluster to the given thread. */
323int cl_set_master(struct cluster *, pthread_t);
324/* Set the transport protocol. Defaults to TCP. */
325int cl_set_transport(struct cluster *, enum cl_transport_id);
326/* Set the maximum # of connections per node (0 for infinite). */
327int cl_set_max_conns(struct cluster *, uint32_t);
328/* Set the maximum # of active connections per node (0 for infinite). */
329int cl_set_max_active_conns(struct cluster *, uint32_t);
330/*
331 * Set the maximum # of concurrent connections per node.
332 * Defaults to 0 for infinite.
333 */
334int cl_set_max_pending_conns(struct cluster *, uint32_t);
335/* Set the maximum # of connections total. (0 for infinite). */
336int cl_set_max_total_conns(struct cluster *, uint32_t);
337/* Set the maximum # of retries, or -1 for infinite. Defaults to 0. */
338int cl_set_max_retries(struct cluster *, int);
339/* Set the load-balancing scheme. Defaults to least-conn. */
340int cl_set_lb(struct cluster *, enum cl_lb_scheme);
341/* Set the protocol. Defaults to HTTP/1.1. */
342int cl_set_protocol(struct cluster *, enum cl_req_proto);
343/*
344 * Set the maximum # of requests allowed per connection. If this
345 * number is reached, the connection will be forcibly closed and
346 * a new one will be established.
347 *
348 * Defaults to 0 for an unlimited number of requests.
349 */
350int cl_set_max_requests(struct cluster *, size_t);
351/* Set the maximum lifetime of a connection. */
352int cl_set_conn_lifetime(struct cluster *, const struct timeval *);
353/* Set the maximum idle time of a connection. */
354int cl_set_conn_max_idle(struct cluster *, const struct timeval *);
355/* Set the connection timeout or {0,0} for unlimited. Defaults to 30 seconds. */
356int cl_set_conn_timeout(struct cluster *, const struct timeval *);
357/* Set the maximum request deadline. */
358int cl_set_max_deadline(struct cluster *, const struct timeval *);
359/*
360 * Set the base and maximum values (in ms) used by the binary exponential
361 * backoff algorithm for connection retries. Defaults to {5, 300000},
362 * using the no jitter algorithm.
363 */
364int cl_set_conn_retry_backoff(struct cluster *, enum cl_backoff_type,
365 unsigned int, unsigned int);
366
367/*
368 * Set the user data pointer. Can only be called before the cluster
369 * has been started like all other configuration functions.
370 */
371int cl_set_data(struct cluster *, void *);
372/* Get the user data pointer. Can be called at any time. */
373void *cl_get_data(const struct cluster *);
374
375/*
376 * A health check callback. Similar to the response callback except that
377 * it is only invoked when the request completed, and that it returns a
378 * a boolean indicating whether the host passed the check or not.
379 */
380typedef bool (*cl_health_check_cb)(const struct cl_response *, void *);
381
382/*
383 * The callback parameter is allowed to be NULL, in which case the default
384 * logic is to consider a health check successful if the HTTP status code
385 * of the response is 200.
386 */
387int cl_set_health_check(struct cluster *, enum http_method, const char *,
388 const struct timeval *, cl_health_check_cb, void *);
389/*
390 * Set the base and maximum values (in ms) used by the binary exponential
391 * backoff algorithm for health check retries. Defaults to {5, 300000}
392 * using the no jitter algorithm.
393 */
394int cl_set_health_check_backoff(struct cluster *, enum cl_backoff_type,
395 unsigned int, unsigned int);
396
397/* TLS-specific configuration functions. */
398
399/* Load the private key at the given path (PEM format). */
400int cl_set_tls_pkey(struct cluster *, const char *);
401
402/* Load the certificate chain at the given path (PEM format). */
403int cl_set_tls_certificate(struct cluster *, const char *);
404
405/* Sets the list of available ciphers (TLS < 1.3). */
406int cl_set_tls_cipher_list(struct cluster *, const char *);
407
408/* Sets the list of available ciphers (TLS 1.3). */
409int cl_set_tls_ciphersuites(struct cluster *, const char *);
410
411/* Obtain the current value of the given metric. */
412uint64_t cl_stat_get(struct cluster *, enum cl_stat, bool);
413/* Return a string representing the name of the given metric. */
414const char *cl_stat_get_name(const struct cluster *, enum cl_stat);
415
416/* Get the cluster creation flags. */
417int cl_get_flags(const struct cluster *);
418
419/* Return the total number of nodes in this cluster. */
420size_t cl_get_num_nodes(const struct cluster *);
421/* Return the number of usable nodes in this cluster. */
422size_t cl_get_ready_nodes(const struct cluster *);
423/* Return the number of established connections in this cluster. */
424size_t cl_get_num_conns(struct cluster *);
425/* Return the number of active connections (being used) in this cluster. */
426size_t cl_get_num_active_conns(struct cluster *);
427
428/* Start the cluster. */
429int cl_start(struct cluster *);
430
431/* Dump the cluster in JSON. */
432void cl_dump(struct cluster *, struct evbuffer *);
433
434/* A shutdown callback */
435typedef void (*cl_shutdown_cb_t)(struct cluster *, void *);
436
437/*
438 * Shut down the cluster, waiting for currently running requests
439 * to complete. If the timeval parameter is not NULL, and that the
440 * cluster still hasn't finished processing requests when that
441 * duration elapsed, we will forcibly abort the requests.
442 *
443 * If the shutdown callback is not NULL, it will be called right
444 * before we destroy the cluster so that it is possible to do
445 * something at that time, such as gathering the final values of
446 * the statistics.
447 *
448 * It is the responsibility of the caller to ensure that *no*
449 * cluster or node handles remain visible to any of the worker
450 * threads before calling that function. Failure to respect this
451 * requirement will result in undefined behavior.
452 */
453void cl_shutdown(struct cluster *, const struct timeval *,
454 cl_shutdown_cb_t, void *);
455
456/*
457 * Insert a node into the given cluster and return its handle.
458 *
459 * Note that the caller does not necessarily need to care about
460 * the handle that is being returned. It is only useful if you
461 * need to send requests to that specific node, or if you need
462 * to be able to remove that node from the cluster (all nodes
463 * are removed on cluster shutdown).
464 *
465 * However, be aware that if you keep node references around,
466 * you will have to guarantee that they cannot be accessed by
467 * other threads before you can safely call cl_remove() on them
468 * (technically, you only need to guarantee that no other thread
469 * is going to call cl_request() on that node handle).
470 */
471struct cl_node *cl_insert(struct cluster *, const char *,
472 const struct sockaddr *, socklen_t, unsigned int, void *);
473
474/*
475 * Convenience wrapper for the common case where we have a string
476 * representation of an IP address and an integer port.
477 */
478struct cl_node *cl_insert_ip_address(struct cluster *, const char *,
479 in_port_t, unsigned int weight, void *);
480
481/* Simple getaddrinfo() based insertion. */
482typedef void (*cl_dns_cb_t)(struct cl_node *, void *);
483
484int cl_insert_dns(struct cluster *, const char *, const char *,
485 unsigned int, cl_dns_cb_t, void *);
486
487/* Get the address of a node. */
488const struct sockaddr *cl_node_get_addr(const struct cl_node *, socklen_t *);
489
490/* Get the user data pointer passed to cl_insert(). */
491void *cl_node_get_data(const struct cl_node *);
492
493/*
494 * Remove a node from the cluster.
495 *
496 * Similarly to cl_shutdown(), it is the responsibility of the caller to
497 * ensure that this node handle is inaccessible from any of the workers
498 * prior to calling that function. Failure to respect this requirement
499 * will result in undefined behavior.
500 */
501void cl_remove(struct cl_node *, const struct timeval *);
502
503/* The request API. */
504
505/* Create a request object. */
506struct cl_request *cl_request_create(struct cluster *, enum http_method,
507 const char *, const struct timeval *, struct an_rbuf *, cl_response_cb_t,
508 void *);
509
510/* Access the an_kv object for this request's headers. */
511struct an_kv_list *cl_request_headers(struct cl_request *);
512
513/* Destroy a request. */
514void cl_request_destroy(struct cl_request *);
515
516/*
517 * Send a request to the cluster.
518 *
519 * If we pass a NULL node, one will be selected according to the
520 * configured load-balacing scheme. Otherwise, the request is sent on
521 * the provided node.
522 *
523 * Returns 0 when the request is either in progress, or if it has already
524 * completed. In those cases, the callback will be invoked (and it might
525 * even have already been invoked if the request completed immediately),
526 * and the API owns the request object and will take care of releasing it.
527 *
528 * Otherwise, returns -1 and the callback will never be called. In this
529 * case, it is the responsibility of the caller to call cl_request_destroy()
530 * on the request object that we failed to send.
531 */
532int cl_request(struct cluster *, struct cl_node *, struct cl_request *);
533
534#endif /* _COMMON_NET_NG_NET_H */