1/*
2 * clustering.h
3 *
4 * Copyright (C) 2016 Aerospike, Inc.
5 *
6 * Portions may be licensed to Aerospike, Inc. under one or more contributor
7 * license agreements.
8 *
9 * This program is free software: you can redistribute it and/or modify it under
10 * the terms of the GNU Affero General Public License as published by the Free
11 * Software Foundation, either version 3 of the License, or (at your option) any
12 * later version.
13 *
14 * This program is distributed in the hope that it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
16 * FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU Affero General Public License
20 * along with this program. If not, see http://www.gnu.org/licenses/
21 */
22
23/*
24 * Aerospike cluster formation v5 based on paxos.
25 * Complete discussion of the algorithm can be found
26 * https://docs.google.com/document/d/1u-27aeZD9no9wiWgt1_BsTSg_6ewG9VBI2sYA0g01BE/edit#
27 */
28#pragma once
29
30#include <stdbool.h>
31#include <stdint.h>
32
33#include "citrusleaf/cf_vector.h"
34
35#include "fault.h"
36
37#include "fabric/hlc.h"
38
39/*
40 * ----------------------------------------------------------------------------
41 * Public data structures.
42 * ----------------------------------------------------------------------------
43 */
44/**
45 * Aerospike cluster key.
46 */
47typedef uint64_t as_cluster_key;
48
49/**
50 * Aerospike clustering protocol identifier.
51 */
52typedef uint32_t as_cluster_proto_identifier;
53
54/**
55 * Configuration for the clustering algorithm.
56 */
57typedef struct as_clustering_config_s
58{
59 /**
60 * The smallest allowed cluster size.
61 */
62 uint32_t cluster_size_min;
63
64 /**
65 * Indicates if clique based eviction is enabled.
66 */
67 bool clique_based_eviction_enabled;
68
69 /**
70 * Current protocol identifier.
71 */
72 as_cluster_proto_identifier protocol_identifier;
73
74} as_clustering_config;
75
76/**
77 * The clustering protocol versions.
78 */
79typedef enum as_clustering_protocol_version
80{
81 AS_CLUSTERING_PROTOCOL_UNDEF,
82 AS_CLUSTERING_PROTOCOL_NONE,
83 AS_CLUSTERING_PROTOCOL_V1,
84 AS_CLUSTERING_PROTOCOL_V2,
85 AS_CLUSTERING_PROTOCOL_V3,
86 AS_CLUSTERING_PROTOCOL_V4,
87 AS_CLUSTERING_PROTOCOL_V5
88} as_clustering_protocol_version;
89
90/**
91 * Clustering event type.
92 */
93typedef enum as_clustering_event_type_e
94{
95 /**
96 * Cluster membership for this node changed.
97 */
98 AS_CLUSTERING_CLUSTER_CHANGED,
99
100 /**
101 * This node became an orphan node.
102 */
103 AS_CLUSTERING_ORPHANED
104} as_clustering_event_type;
105
106/**
107 * Clustering event type.
108 */
109typedef enum as_clustering_event_qualifier_e
110{
111 /**
112 * The default qualifier for cases where a qualifier is not applicable.
113 */
114 AS_CLUSTERING_QUALIFIER_NA,
115
116 /**
117 * Cluster membership lost since the principal evicted this node or is no
118 * longer reachable or the cluster is invalid. Relevant only for orphaned
119 * event.
120 */
121 AS_CLUSTERING_MEMBERSHIP_LOST,
122
123 /**
124 * This node became an orphan node in order to attempt a merge. Relevant
125 * only for orphaned event.
126 */
127 AS_CLUSTERING_ATTEMPTING_MERGE,
128} as_clustering_event_qualifier;
129
130/**
131 * Clustering event.
132 */
133typedef struct as_clustering_event_s
134{
135 /**
136 * The clustering event type.
137 */
138 as_clustering_event_type type;
139
140 /**
141 * The clustering event qualifier.
142 */
143 as_clustering_event_qualifier qualifier;
144
145 /**
146 * The cluster key. Will be non-zero if this is a cluster change event.
147 */
148 as_cluster_key cluster_key;
149
150 /**
151 * The new succession list. It will not be empty if this is a cluster change
152 * event.
153 *
154 * The allocated space will be freed once the event processing is complete.
155 * Listeners should always create a copy of this list, if it needs to be
156 * used later on by the listener.
157 */
158 cf_vector* succession_list;
159} as_clustering_event;
160
161/*
162 * ----------------------------------------------------------------------------
163 * Public API.
164 * ----------------------------------------------------------------------------
165 */
166/**
167 * Initialize clustering subsystem.
168 */
169void
170as_clustering_init();
171
172/**
173 * Start clustering subsystem.
174 */
175void
176as_clustering_start();
177
178/**
179 * Stop clustering subsystem.
180 */
181void
182as_clustering_stop();
183
184/**
185 * Reform the cluster with the same succession list.This would trigger the
186 * generation of new partition info and the cluster would get a new cluster key.
187 *
188 * @return 0 if new clustering round started, -1 otherwise.
189 */
190int
191as_clustering_cluster_reform();
192
193/**
194 * Return the quantum interval, i.e., the interval at which cluster change
195 * decisions are taken. The unit is milliseconds.
196 */
197uint64_t
198as_clustering_quantum_interval();
199
200/**
201 * Log a vector of node-ids at input severity spliting long vectors over
202 * multiple lines. The call might not work if the vector is not protected
203 * against multi-threaded access.
204 *
205 * @param context the logging context.
206 * @param severity the log severity.
207 * @param file_name the source file name for the log line.
208 * @param line the source file line number for the log line.
209 * @param message the message prefix for each log line. Message and node list
210 * will be separated with a space. Can be NULL for no prefix.
211 * @param nodes the vector of nodes.
212 */
213void
214as_clustering_cf_node_vector_event(cf_fault_severity severity,
215 cf_fault_context context, char* file_name, int line, char* message,
216 cf_vector* nodes);
217
218/**
219 * Log an array of node-ids at input severity spliting long vectors over
220 * multiple lines. The call might not work if the array is not protected against
221 * multi-threaded access.
222 *
223 * @param context the logging context.
224 * @param severity the log severity.
225 * @param file_name the source file name for the log line.
226 * @param line the source file line number for the log line.
227 * @param message the message prefix for each log line. Message and node list
228 * will be separated with a space. Can be NULL for no prefix.
229 * @param nodes the array of nodes.
230 * @param node_count the count of nodes in the array.
231 */
232void
233as_clustering_cf_node_array_event(cf_fault_severity severity,
234 cf_fault_context context, char* file_name, int line, char* message,
235 cf_node* nodes, int node_count);
236
237/**
238 * Log a vector of node-ids at input severity spliting long vectors over
239 * multiple lines. The call might not work if the vector is not protected
240 * against multi-threaded access.
241 *
242 * @param context the logging context.
243 * @param severity the log severity.
244 * @param message the message prefix for each log line. Message and node list
245 * will be separated with a space. Can be NULL for no prefix.
246 * @param nodes the vector of nodes.
247 */
248#define as_clustering_log_cf_node_vector(severity, context, message, nodes) \
249 as_clustering_cf_node_vector_event(severity, context, __FILENAME__, \
250 __LINE__, message, nodes)
251
252/**
253 * Log an array of node-ids at input severity spliting long vectors over
254 * multiple lines. The call might not work if the array is not protected against
255 * multi-threaded access.
256 *
257 * @param context the logging context.
258 * @param severity the log severity.
259 * @param message the message prefix for each log line. Message and node list
260 * will be separated with a space. Can be NULL for no prefix.
261 * @param nodes the array of nodes.
262 * @param node_count the count of nodes in the array.
263 */
264#define as_clustering_log_cf_node_array(severity, context, message, nodes, \
265 node_count) \
266as_clustering_cf_node_array_event(severity, context, __FILENAME__, \
267 __LINE__, message, nodes, node_count);
268
269
270/*
271 * ---- Clustering info command functions. ----
272 */
273/**
274 * If false means than either this node is orphaned, or is undergoing a cluster
275 * change.
276 */
277bool
278as_clustering_has_integrity();
279
280/**
281 * Indicates if self node is orphaned.
282 */
283bool
284as_clustering_is_orphan();
285
286/**
287 * Dump clustering state to the log.
288 */
289void
290as_clustering_dump(bool verbose);
291
292/**
293 * Set the min cluster size.
294 */
295int
296as_clustering_cluster_size_min_set(uint32_t new_cluster_size_min);
297