diff --git a/doc/src/sgml/watchdog.sgml b/doc/src/sgml/watchdog.sgml index 2e5ba046..d71baa4e 100644 --- a/doc/src/sgml/watchdog.sgml +++ b/doc/src/sgml/watchdog.sgml @@ -580,6 +580,59 @@ + + allow_a_half_consensus (boolean) + + allow_a_half_consensus configuration parameter + + + + + This parameter works in connection with the with even number of + Pgpool-II nodes. Odd number of nodes + will not be affected. When enabled, the quorum exists if more + than or equal to a half of Pgpool-II + nodes are alive. If the parameter is off, more than half of + nodes need to be alive to make quorum exist. + + + For example, when this parameter is enabled in a two node watchdog + cluster, one Pgpool-II node needs to + be alive to make the quorum exist. If the parameter is off, two + nodes need to be alive to make quorum exist. + + + When this parameter is enabled in a four node watchdog cluster, + two Pgpool-II node needs to be alive + to make the quorum exist. If the parameter is off, three nodes + need to be alive to make quorum exist. + + + By enabling this parameter, you should aware that you take a + risk to make split-brain happen. For example, in four node + cluster consisted of node A, B, C and D, it is possible that the + cluster goes into two separated networks (A, B) and (C, D). For + (A, B) and (C, D) the quorum still exist since for both groups + there are two live nodes out of 4. The two groups choose their + own master watchdog, which is a split-brain. + + + Default is off. + + + allow_a_half_consensus is not available + prior to Pgpool-II + V4.1. The prior versions work + as if the parameter is on. It is only effective when is enabled + + + This parameter can only be set at server start. + + + + diff --git a/src/config/pool_config_variables.c b/src/config/pool_config_variables.c index f291f40e..5cc8e420 100644 --- a/src/config/pool_config_variables.c +++ b/src/config/pool_config_variables.c @@ -308,6 +308,15 @@ static struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { + {"allow_a_half_consensus", CFGCXT_INIT, FAILOVER_CONFIG, + "Quorum exists if more than or equal to a half of even number nodes are alive.", + CONFIG_VAR_TYPE_BOOL, false, 0 + }, + &g_pool_config.allow_a_half_consensus, + false, + NULL, NULL, NULL + }, { {"log_connections", CFGCXT_RELOAD, LOGING_CONFIG, "Logs each successful connection.", diff --git a/src/include/pool_config.h b/src/include/pool_config.h index dab3fa3e..178308e4 100644 --- a/src/include/pool_config.h +++ b/src/include/pool_config.h @@ -478,6 +478,10 @@ typedef struct * can send multiple * failover requests to * build consensus */ + bool allow_a_half_consensus; /* Quorum exists if more than a + * half of even number nodes are + * alive */ + WdLifeCheckMethod wd_lifecheck_method; /* method of lifecheck. * 'heartbeat' or 'query' */ bool clear_memqcache_on_escalation; /* Clear query cache on shmem diff --git a/src/include/pool_config_variables.h b/src/include/pool_config_variables.h index 7bf4f51b..e3841112 100644 --- a/src/include/pool_config_variables.h +++ b/src/include/pool_config_variables.h @@ -4,7 +4,7 @@ * pgpool: a language independent connection pool server for PostgreSQL * written by Tatsuo Ishii * - * Copyright (c) 2003-2018 PgPool Global Development Group + * Copyright (c) 2003-2019 PgPool Global Development Group * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby diff --git a/src/sample/pgpool.conf.sample b/src/sample/pgpool.conf.sample index 4d69fe51..7f79ad15 100644 --- a/src/sample/pgpool.conf.sample +++ b/src/sample/pgpool.conf.sample @@ -646,6 +646,9 @@ allow_multiple_failover_requests_from_node = off # for building the consensus on failover # (change requires restart) +allow_a_half_consensus = off + # Quorum exists if more than or equal to a half of even number nodes are alive. + # (change requires restart) # - Lifecheck Setting - # -- common -- diff --git a/src/sample/pgpool.conf.sample-logical b/src/sample/pgpool.conf.sample-logical index 6f8cdc7c..a8d3fc35 100644 --- a/src/sample/pgpool.conf.sample-logical +++ b/src/sample/pgpool.conf.sample-logical @@ -626,6 +626,9 @@ enable_multiple_failover_requests_from_node = off # A Pgpool-II node can cast multiple votes # for building the consensus on failover +allow_a_half_consensus = off + # Quorum exists if more than or equal to a half of even number nodes are alive. + # (change requires restart) # - Lifecheck Setting - # -- common -- diff --git a/src/sample/pgpool.conf.sample-master-slave b/src/sample/pgpool.conf.sample-master-slave index 49bb9664..2e9cda05 100644 --- a/src/sample/pgpool.conf.sample-master-slave +++ b/src/sample/pgpool.conf.sample-master-slave @@ -642,6 +642,9 @@ allow_multiple_failover_requests_from_node = off # (change requires restart) +allow_a_half_consensus = off + # Quorum exists if more than or equal to a half of even number nodes are alive. + # (change requires restart) # - Lifecheck Setting - # -- common -- diff --git a/src/sample/pgpool.conf.sample-replication b/src/sample/pgpool.conf.sample-replication index b16322a7..6c9f8f66 100644 --- a/src/sample/pgpool.conf.sample-replication +++ b/src/sample/pgpool.conf.sample-replication @@ -642,6 +642,9 @@ allow_multiple_failover_requests_from_node = off # (change requires restart) +allow_a_half_consensus = off + # Quorum exists if more than or equal to a half of even number nodes are alive. + # (change requires restart) # - Lifecheck Setting - # -- common -- diff --git a/src/sample/pgpool.conf.sample-stream b/src/sample/pgpool.conf.sample-stream index f76d5cc1..96804f0a 100644 --- a/src/sample/pgpool.conf.sample-stream +++ b/src/sample/pgpool.conf.sample-stream @@ -644,6 +644,9 @@ allow_multiple_failover_requests_from_node = off # (change requires restart) +allow_a_half_consensus = off + # Quorum exists if more than or equal to a half of even number nodes are alive. + # (change requires restart) # - Lifecheck Setting - # -- common -- diff --git a/src/watchdog/watchdog.c b/src/watchdog/watchdog.c index 403a641e..a0e21b3c 100644 --- a/src/watchdog/watchdog.c +++ b/src/watchdog/watchdog.c @@ -5,7 +5,7 @@ * pgpool: a language independent connection pool server for PostgreSQL * written by Tatsuo Ishii * - * Copyright (c) 2003-2016 PgPool Global Development Group + * Copyright (c) 2003-2019 PgPool Global Development Group * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby @@ -420,7 +420,8 @@ static void service_internal_command(void); static unsigned int get_next_commandID(void); static WatchdogNode * parse_node_info_message(WDPacketData * pkt, char **authkey); static void update_quorum_status(void); -static int get_mimimum_nodes_required_for_quorum(void); +static int get_mimimum_remote_nodes_required_for_quorum(void); +static int get_minimum_votes_to_resolve_consensus(void); static bool write_packet_to_socket(int sock, WDPacketData * pkt, bool ipcPacket); static int read_sockets(fd_set *rmask, int pending_fds_count); @@ -2384,7 +2385,7 @@ static WDFailoverCMDResults compute_failover_consensus(POOL_REQUEST_KIND reqKind bool duplicate = false; WDFailoverObject *failoverObj = add_failover(reqKind, node_id_list, node_count, wdNode, *flags, &duplicate); - if (failoverObj->request_count <= get_mimimum_nodes_required_for_quorum()) + if (failoverObj->request_count < get_minimum_votes_to_resolve_consensus()) { ereport(LOG, ( errmsg("failover requires the majority vote, waiting for consensus"), @@ -6333,14 +6334,19 @@ update_quorum_status(void) { int quorum_status = g_cluster.quorum_status; - if (g_cluster.clusterMasterInfo.standby_nodes_count > get_mimimum_nodes_required_for_quorum()) + if (g_cluster.clusterMasterInfo.standby_nodes_count > get_mimimum_remote_nodes_required_for_quorum()) { g_cluster.quorum_status = 1; } - else if (g_cluster.clusterMasterInfo.standby_nodes_count == get_mimimum_nodes_required_for_quorum()) + else if (g_cluster.clusterMasterInfo.standby_nodes_count == get_mimimum_remote_nodes_required_for_quorum()) { if (g_cluster.remoteNodeCount % 2 != 0) - g_cluster.quorum_status = 0; /* on the edge */ + { + if (pool_config->allow_a_half_consensus) + g_cluster.quorum_status = 0; /* on the edge */ + else + g_cluster.quorum_status = -1; + } else g_cluster.quorum_status = 1; } @@ -6355,24 +6361,76 @@ update_quorum_status(void) } } -/* returns the minimum number of remote nodes required for quorum */ +/* + * returns the minimum number of remote nodes required for quorum + */ static int -get_mimimum_nodes_required_for_quorum(void) +get_mimimum_remote_nodes_required_for_quorum(void) { /* * Even numner of remote nodes, That means total number of nodes are odd, - * so minimum quorum is just remote/2 + * so minimum quorum is just remote/2. */ if (g_cluster.remoteNodeCount % 2 == 0) - return (g_cluster.remoteNodeCount / 2); + return (g_cluster.remoteNodeCount / 2); /* - * Total nodes including self are even, So we consider 50% nodes as - * quorum, should we? + * Total nodes including self are even, So we return 50% nodes as quorum + * requirements */ return ((g_cluster.remoteNodeCount - 1) / 2); } +/* + * returns the minimum number of votes required for consensus + */ +static int +get_minimum_votes_to_resolve_consensus(void) +{ + /* + * Since get_mimimum_remote_nodes_required_for_quorum() returns + * the number of remote nodes required to complete the quorum + * that is always one less than the total number of nodes required + * for the cluster to build quorum or consensus, reason being + * in get_mimimum_remote_nodes_required_for_quorum() + * we always consider the local node as a valid pre-casted vote. + * But when it comes to count the number of votes required to build + * consensus for any type of decision, for example for building the + * consensus on backend failover, the local node can vote on either + * side. So it's vote is not explicitly counted and for the consensus + * we actually need one more vote than the total number of remote nodes + * required for the quorum + * + * For example + * If Total nodes in cluster = 4 + * remote node will be = 3 + * get_mimimum_remote_nodes_required_for_quorum() return = 1 + * Minimum number of votes required for consensu will be + * + * if(pool_config->allow_a_half_consensus = true) + * (exact 50% n/2) ==> 4/2 = 2 + * + * if(pool_config->allow_a_half_consensus = false) + * (exact 50% +1 ==> (n/2)+1) ==> (4/2)+1 = 3 + * + */ + + int required_node_count = get_mimimum_remote_nodes_required_for_quorum() + 1; + /* + * When the total number of nodes in the watchdog cluster including the + * local node are even, The number of votes required for the consensus + * depends on the allow_a_half_consensus. + * So for even number of nodes when allow_a_half_consensus is not allowed + * than we would nedd one more vote than exact 50% + */ + if (g_cluster.remoteNodeCount % 2 != 0) + { + if (pool_config->allow_a_half_consensus == false) + required_node_count += 1; + } + + return required_node_count; +} /* * sets the state of local watchdog node, and fires a state change event