[pgpool-hackers: 399] Test down node in master/slave streaming replication mode, and reattach it.
Sergey Logvinov
serge.logvinov at gmail.com
Wed Nov 13 13:59:53 JST 2013
In streaming master/slave we testing all online nodes in function
check_replication_time_lag.
I add new feature. I test all nodes include dead nodes.
If down slave node now alive, i compare time_lag with master. And try this
few times (new param in config sr_check_max_retries) If slave node not far
behind, i mark node as NODE_UP.
If mater node now is online, i check time_lag on all slave nodes. And if
lag=0, i think this master is working fine, and reattach it.
This will solved the problem ticket #17
My implementation see in diff file.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.sraoss.jp/pipermail/pgpool-hackers/attachments/20131113/26316ce5/attachment.html>
-------------- next part --------------
diff --git a/pool_config.h b/pool_config.h
index a5a48f1..4668a68 100644
--- a/pool_config.h
+++ b/pool_config.h
@@ -124,6 +124,7 @@ typedef struct {
int sr_check_period; /* streming replication check period */
char *sr_check_user; /* PostgreSQL user name streaming replication check */
char *sr_check_password; /* password for sr_check_user */
+ int sr_check_max_retries; /* streming replication max retries to allow down node */
char *failover_command; /* execute command when failover happens */
char *follow_master_command; /* execute command when failover is ended */
char *failback_command; /* execute command when failback happens */
diff --git a/pool_config.l b/pool_config.l
index b560e65..8e221e0 100644
--- a/pool_config.l
+++ b/pool_config.l
@@ -195,6 +195,7 @@ int pool_init_config(void)
pool_config->sr_check_period = 0;
pool_config->sr_check_user = "nobody";
pool_config->sr_check_password = "";
+ pool_config->sr_check_max_retries = 0;
pool_config->failover_command = "";
pool_config->follow_master_command = "";
pool_config->failback_command = "";
@@ -1265,6 +1266,20 @@ int pool_get_config(char *confpath, POOL_CONFIG_CONTEXT context)
pool_config->sr_check_password = str;
}
+ else if (!strcmp(key, "sr_check_max_retries") &&
+ CHECK_CONTEXT(INIT_CONFIG|RELOAD_CONFIG, context))
+ {
+ int v = atoi(yytext);
+
+ if (token != POOL_INTEGER || v < 0)
+ {
+ pool_error("pool_config: %s must be equal or higher than 0 numeric value", key);
+ fclose(fd);
+ return(-1);
+ }
+ pool_config->sr_check_max_retries = v;
+ }
+
else if (!strcmp(key, "failover_command") &&
CHECK_CONTEXT(INIT_CONFIG|RELOAD_CONFIG, context))
{
diff --git a/pool_worker_child.c b/pool_worker_child.c
index 429a953..58d2871 100644
--- a/pool_worker_child.c
+++ b/pool_worker_child.c
@@ -62,6 +62,9 @@ extern char **myargv;
char remote_ps_data[NI_MAXHOST]; /* used for set_ps_display */
static POOL_CONNECTION_POOL_SLOT *slots[MAX_NUM_BACKENDS];
+int pg_standby[MAX_NUM_BACKENDS]; // -1 not defined, 0 - master, 1 - standby mode.
+int pg_retrycn[MAX_NUM_BACKENDS]; // retry count
+
static volatile sig_atomic_t reload_config_request = 0;
static volatile sig_atomic_t restart_request = 0;
@@ -91,6 +94,9 @@ static void reload_config(void);
*/
void do_worker_child(void)
{
+ int i;
+ BackendInfo *bkinfo;
+
pool_debug("I am %d", getpid());
/* Identify myself via ps */
@@ -114,6 +120,13 @@ void do_worker_child(void)
/* Initialize per process context */
pool_init_process_context();
+
+ for (i=0;i<NUM_BACKENDS;i++)
+ {
+ pg_retrycn[i] = 0;
+ pg_standby[i] = -1;
+ }
+
for (;;)
{
CHECK_REQUEST;
@@ -137,6 +150,57 @@ void do_worker_child(void)
/* Discard persistent connections */
discard_persistent_connection();
+
+ if (pool_config->sr_check_max_retries > 0)
+ {
+ for (i=0;i<NUM_BACKENDS;i++)
+ {
+ if (VALID_BACKEND(i))
+ continue;
+
+ if (pg_standby[i] == 1) // up stanby node
+ {
+ bkinfo = pool_get_node_info(i);
+ if (bkinfo->standby_delay == 0)
+ pg_retrycn[i]++;
+ else
+ pg_retrycn[i] = 0;
+
+ pool_log("Standby node:%d now is online (retry=%d)",i,pg_retrycn[i]);
+ if (pg_retrycn[i] >= pool_config->sr_check_max_retries)
+ {
+ send_failback_request(i);
+ pg_retrycn[i] = 0;
+ break;
+ }
+ }
+ else if (pg_standby[i] == 0) // up master node
+ {
+ int lag = 0;
+ int j;
+ // To allow master node need lag=0 for all standby servers
+ for (j=0;j<NUM_BACKENDS;j++)
+ if (VALID_BACKEND(j) && pg_standby[i] != -1)
+ {
+ bkinfo = pool_get_node_info(j);
+ lag = lag + bkinfo->standby_delay;
+ }
+
+ if (lag == 0)
+ pg_retrycn[i]++;
+ else
+ pg_retrycn[i] = 0;
+
+ pool_log("Master node:%d now is online (retry=%d)",i,pg_retrycn[i]);
+ if (pg_retrycn[i] >= pool_config->sr_check_max_retries)
+ {
+ send_failback_request(i);
+ pg_retrycn[i] = 0;
+ break;
+ }
+ }
+ }
+ }
}
sleep(pool_config->sr_check_period);
}
@@ -154,9 +218,7 @@ static void establish_persistent_connection(void)
for (i=0;i<NUM_BACKENDS;i++)
{
- if (!VALID_BACKEND(i))
- continue;
-
+ pg_standby[i] = -1;
if (slots[i] == NULL)
{
bkinfo = pool_get_node_info(i);
@@ -168,7 +230,10 @@ static void establish_persistent_connection(void)
if (s)
slots[i] = s;
else
+ {
slots[i] = NULL;
+ pg_retrycn[i] = 0;
+ }
}
}
}
@@ -203,20 +268,17 @@ static void check_replication_time_lag(void)
char *query;
BackendInfo *bkinfo;
unsigned long long int lag;
-
- if (NUM_BACKENDS <= 1)
- {
- /* If there's only one node, there's no point to do checking */
- return;
- }
+ int real_primary = -1;
/* Count healthy nodes */
for (i=0;i<NUM_BACKENDS;i++)
{
- if (VALID_BACKEND(i))
+ if (slots[i])
active_nodes++;
}
+ pool_debug("check_replication_time_lag: active_nodes %d",active_nodes);
+
if (active_nodes <= 1)
{
/* If there's only one or less active node, there's no point
@@ -226,25 +288,46 @@ static void check_replication_time_lag(void)
for (i=0;i<NUM_BACKENDS;i++)
{
- if (!VALID_BACKEND(i))
+ pool_debug("check_replication_time_lag: check DB node %d",i);
+ pg_standby[i]=-1;
+
+ if (!(VALID_BACKEND(i) || slots[i]))
continue;
if (!slots[i])
{
pool_debug("check_replication_time_lag: DB node is valid but no persistent connection");
pool_error("check_replication_time_lag: could not connect to DB node %d, check sr_check_user and sr_check_password", i);
-
- return;
+ continue;
}
- if (PRIMARY_NODE_ID == i)
+ query = "SELECT pg_current_xlog_location()";
+ sts = do_query(slots[i]->con, "SELECT pg_is_in_recovery()",
+ &res, PROTO_MAJOR_V3);
+ if (res->numrows <= 0)
{
- query = "SELECT pg_current_xlog_location()";
+ pool_error("check_replication_time_lag: do_query returns no rows");
}
- else
+ if (res->data[0] == NULL)
+ {
+ pool_error("check_replication_time_lag: do_query returns no data");
+ }
+ if (res->nullflags[0] == -1)
+ {
+ pool_error("check_replication_time_lag: do_query returns NULL");
+ }
+ if (res->data[0] && !strcmp(res->data[0], "t"))
{
+ pg_standby[i]=1;
query = "SELECT pg_last_xlog_replay_location()";
+ pool_debug("check_replication_time_lag: standby node id is %d", i);
}
+ else
+ {
+ pg_standby[i]=0;
+ pool_debug("check_replication_time_lag: primary node id is %d", i);
+ }
+ free_select_result(res);
sts = do_query(slots[i]->con, query, &res, PROTO_MAJOR_V3);
if (sts != POOL_CONTINUE)
@@ -287,16 +370,52 @@ static void check_replication_time_lag(void)
for (i=0;i<NUM_BACKENDS;i++)
{
- if (!VALID_BACKEND(i))
+ if (pg_standby[i] == 0)
+ {
+ if (real_primary == -1)
+ {
+ real_primary = i;
+ }
+ else
+ {
+ if (VALID_BACKEND(i))
+ {
+ pg_standby[i] = -1;
+ pg_standby[real_primary] = -1;
+ pool_error("check_replication_time_lag: second master found id is %d, keep old master id %d",i,PRIMARY_NODE_ID);
+ real_primary = PRIMARY_NODE_ID;
+ pg_standby[real_primary] = 0;
+ }
+ else
+ {
+ pg_standby[i] = -1;
+ pool_error("check_replication_time_lag: second master found id is %d",i);
+ }
+
+ }
+ }
+ }
+
+ if (real_primary < 0)
+ {
+ pool_debug("check_replication_time_lag: no primary node found");
+ return;
+ }
+
+ for (i=0;i<NUM_BACKENDS;i++)
+ {
+ if (!slots[i] || pg_standby[i] == -1)
continue;
/* Set standby delay value */
bkinfo = pool_get_node_info(i);
- lag = (lsn[PRIMARY_NODE_ID] > lsn[i]) ? lsn[PRIMARY_NODE_ID] - lsn[i] : 0;
+ lag = (lsn[real_primary] > lsn[i]) ? lsn[real_primary] - lsn[i] : 0;
- if (PRIMARY_NODE_ID == i)
+ if (real_primary == i)
{
bkinfo->standby_delay = 0;
+ if (!VALID_BACKEND(i))
+ pool_log("Master replication node:%d now is online",i);
}
else
{
@@ -308,8 +427,12 @@ static void check_replication_time_lag(void)
!strcmp(pool_config->log_standby_delay, "if_over_threshold") &&
lag > pool_config->delay_threshold))
{
- pool_log("Replication of node:%d is behind %llu bytes from the primary server (node:%d)",
- i, lsn[PRIMARY_NODE_ID] - lsn[i], PRIMARY_NODE_ID);
+ if (PRIMARY_NODE_ID != real_primary)
+ pool_log("Replication of node:%d is behind %llu bytes from the new primary server (node:%d)",
+ i, lag, real_primary);
+ else
+ pool_log("Replication of node:%d is behind %llu bytes from the primary server (node:%d)",
+ i, lag, real_primary);
}
}
}
More information about the pgpool-hackers
mailing list