[pgpool-general-jp: 1123] Re: オンラインリカバリ後にゾンビプロセスになる

GOTO, Daisuke gotoh @ m-design.com
2012年 11月 21日 (水) 19:56:17 JST


$B$3$s$K$A$O!#8eF#$H?=$7$^$9!#(B

$BF1$8$h$&$J8=>]$,:F8=$7$^$7$?$N$G$4Js9p$$$?$7$^$9!#(B

--
postgresql9$B7O$NI>2A$r$9$k$?$a$K0J2<$N4D6-$G9=C[$7$F$$$^$9!#(B

$B!&(Bwindows7$B$N(BVMware player$B>e$N2>A[%^%7%s4D6-(B
$B!&(BCentOS release 6.2 (Final)
$B!&(Bpostgresql91-9.1.6-1PGDG.rhel6.x86_64
$B!&(Bpgpool-II-91-3.1.3-2.rhel6.x86_64
$B!J(Bpostgresql91 $B$H(B pgpool-II-91 $B$O(B yum $B$G(B
 pgdg91 $B%l%]%8%H%j$+$i%$%s%9%H!<%k$7$^$7$?!K(B

$B%^%7%s#A(B(DB21)$B!$#B(B(DB22)$B$N#2Bf9=@.$G!"(B
$B$=$l$>$l(Bpostgresql$B!"(Bpgpool$B$r%$%s%9%H!<%k$7$F$"$j$^$9!#(B
$B!J%^%7%s#B$N(Bpgpool$B$O5/F0$7$F$$$^$;$s!K(B

pgpool.conf $B$O%l%W%j%1!<%7%g%s%b!<%I$G1?MQ$7$F$$$^$9!#(B
$B$D$^$j(B
replication_mode = on 
master_slave_mode = off
parallel_mode = off
$B$G$9!#(B

$B$3$N9=@.$GF14|$,$H$l$F$$$k>uBV$+$i(Bpcp_detach_node$B$r$7$?$j!"(B
$BJR7O$r @ Z$jN%$7$F%*%s%i%$%s%j%+%P%j$d(B pcp_attach_node $B$r<B;\$9$k$H!"(B
PCP$B$N;R%W%m%;%9$,%>%s%S$H$J$j!"0J9_(Bpcp$B$N%3%^%s%I$N1~Ez$,$J$/$J$k>l9g$,$"$j$^$9!#(B

--
$B"#;R%W%m%;%9$N3NG'(B
[root @ DB21 9.1]# ps ax | grep pgpool
24848 ?        Ss     0:00 /usr/pgpool-9.1/bin/pgpool -f /etc/pgpool-II-91/pgpool.conf
25054 ?        S      0:00 pgpool: wait for connection request
25055 ?        S      0:00 pgpool: wait for connection request
...
25084 ?        S      0:00 pgpool: wait for connection request
25085 ?        S      0:00 pgpool: wait for connection request
25086 ?        S      0:00 pgpool: worker process
25088 ?        S      0:00 pgpool: PCP: wait for connection request
25092 pts/1    S+     0:00 grep pgpool

$B"#JL$N%?!<%_%J%k$G(Bdetach/attach$B$r7+$jJV$7(B
[root @ DB21 ~]# pcp_node_info 10 192.168.68.151 9898 postgres postgres 0
192.168.68.151 5433 1 0.500000
[root @ DB21 ~]# pcp_node_info 10 192.168.68.151 9898 postgres postgres 1
192.168.68.152 5433 1 0.500000
[root @ DB21 ~]# pcp_detach_node 10 192.168.68.151 9898 postgres postgres 0
[root @ DB21 ~]# pcp_node_info 10 192.168.68.151 9898 postgres postgres 0
($B1~Ez$J$7!##1#0IC0J>e7P$C$F$bI|5"$;$:(B)

--
$B"#$=$N;~$N%W%m%;%9>uBV(B

[root @ DB21 9.1]#  ps ax | grep pgpool
24848 ?        Ss     0:00 /usr/pgpool-9.1/bin/pgpool -f /etc/pgpool-II-91/pgpool.conf
25088 ?        Z      0:00 [pgpool] <defunct>
25099 ?        S      0:00 pgpool: wait for connection request
25100 ?        S      0:00 pgpool: wait for connection request
...
25129 ?        S      0:00 pgpool: wait for connection request
25130 ?        S      0:00 pgpool: wait for connection request
25131 ?        S      0:00 pgpool: worker process
25139 pts/1    S+     0:00 grep pgpool


$B"#$=$N;~$N(Bpgpool$B%m%0(B($BJL%?!<%_%J%k(B)

[root @ DB21 log]# tail -f /var/log/messages
Nov 21 19:27:35 DB21 pgpool[25088]: degenerate_backend_set: 0 fail over request from pid 25088
Nov 21 19:27:35 DB21 pgpool[24848]: starting degeneration. shutdown host 192.168.68.151(5433)
Nov 21 19:27:35 DB21 pgpool[24848]: Restart all children
Nov 21 19:27:35 DB21 pgpool[24848]: execute command: echo failover 0 192.168.68.151 5433 /var/lib/pgsql/9.1/data 1 192.168.68.152 0 0
Nov 21 19:27:35 DB21 pgpool[24848]: failover: set new primary node: -1
Nov 21 19:27:35 DB21 pgpool[24848]: failover: set new master node: 1
Nov 21 19:27:35 DB21 pgpool[25086]: worker process received restart request
Nov 21 19:27:35 DB21 pgpool[24848]: failover done. shutdown host 192.168.68.151(5433)
Nov 21 19:27:36 DB21 pgpool[25088]: pcp child process received restart request
Nov 21 19:27:36 DB21 pgpool[24848]: worker child 25086 exits with status 256
Nov 21 19:27:36 DB21 pgpool[24848]: fork a new worker child pid 25131


$B"#1~Ez$,L5$/$J$C$F$+$i!"?F%W%m%;%9$K(B SIGCHLD $B$rEj$2$k$H(Bpcp$B%3%^%s%I$,I|5"$9$k!#(B
[root @ DB21 9.1]# kill -SIGCHLD 24848
[root @ DB21 9.1]# ps ax | grep pgpool
24848 ?        Ss     0:00 /usr/pgpool-9.1/bin/pgpool -f /etc/pgpool-II-91/pgpool.conf
25099 ?        S      0:00 pgpool: wait for connection request
25100 ?        S      0:00 pgpool: wait for connection request
...
25129 ?        S      0:00 pgpool: wait for connection request
25130 ?        S      0:00 pgpool: wait for connection request
25131 ?        S      0:00 pgpool: worker process
25143 ?        S      0:00 pgpool: PCP: wait for connection request
25147 pts/1    S+     0:00 grep pgpool

$B"#>e5-$N(Bmessages$B$NB3$-(B

Nov 21 19:29:28 DB21 pgpool[24848]: PCP child 25088 exits with status 256
Nov 21 19:29:28 DB21 pgpool[24848]: fork a new PCP child pid 25143


$B"#$5$C$-1~Ez$,$J$/$F%U%j!<%:$7$F$$$?%3%^%s%I$,I|5"$7$F7k2L$,=PNO$5$l$F$$$k(B
[root @ DB21 ~]# pcp_node_info 10 192.168.68.151 9898 postgres postgres 0
192.168.68.151 5433 3 0.500000

--

$B$&$A$N4D6-$G$O:F8=N($O$+$J$j9b$$46$8$G$7$?!#(B
$B!J(B7$B!A(B8$B3d$/$i$$!K(B

waitpid$B<~$j$,2x$7$$$H;W$$!"0J2<$N$h$&$K%=!<%9$r=$@5$7$?$H$3$m!"(B
$B$3$N8=>]$O:F8=$7$J$/$J$j$^$7$?!#(B

[root @ jtn-test tr]# diff -Naru main.c.org main.c
--- main.c.org  2012-11-21 18:47:55.000000000 +0900
+++ main.c      2012-11-21 18:48:39.000000000 +0900
@@ -2134,7 +2134,8 @@

                        pcp_pid = pcp_fork_a_child(pcp_unix_fd, pcp_inet_fd, pcp_conf_file);
                        pool_log("fork a new PCP child pid %d", pcp_pid);
-                       break;
+                       //break;
+                       continue;
                }

                /* exiting process was worker process */
@@ -2149,7 +2150,8 @@
                                worker_pid = worker_fork_a_child();

                        pool_log("fork a new worker child pid %d", worker_pid);
-                       break;
+                       //break;
+                       continue;
                } else
                {
                        if (WIFSIGNALED(status))

--

$B0J>e$G$9!#(B

--
s-fukuda$B$5$s(B<s-fukuda @ acs21.co.jp>wrote:
> $B$O$8$a$^$7$F!#(B
> $BJ!ED$H?=$7$^$9!#(B
> 
> $B8=:_!"(BPGPOOL-$B-6(B(3.1.3)$B!\(BPostgreSQL(8.4.12-1)$B$rMxMQ(B
> $B$7$F$*$j$^$9!#(B
> 
> $B%l%W%j%1!<%7%g%s%b!<%I$G1?MQ$7!"%*%s%i%$%s%j%+%P%j$N(B
> $B @ _Dj$r9T$$!"(Bpcp_recovery_node$B$r<B9T$7$?$H$3$m!"%3%^%s%I(B
> $B$O @ 5>o$K=*N;$9$k$N$G$9$,!"%W%m%;%9$N%j%9%?!<%H$,$+$+$C$?(B
> $B:]$K%W%m%;%9$,%>%s%S%W%m%;%9$H$J$C$F$7$^$$$^$9!#(B
> 
> $B!c<B9TA0!d(B
> postgres 18045 17008 0 10:13 ? 00:00:00 pgpool: PCP: wait for connection request
> 
> $B!c<B9T8e!d(B
> postgres 19114 17008 0 15:52 ? 00:00:00 [pgpool] <defunct>
> 
> $B"#<B9T$7$?%3%^%s%I(B
> 
> pcp_recovery_node 100 localhost $B%]!<%H(B $B%f!<%6L>(B $B%Q%9%o!<%I(B $B%N!<%I(BID
> 
> 
> $B"#4D6-(B
>  RHEL 5.6 (64bit)
>  - pgpool-$B-6(B 3.1.3
> 
>  RHEL 5.6 (64bit)
>  - PostgreSQL 8.4.12-1
> 
> 
> $BBP=hK!$,$o$+$i$::$$C$F$*$j$^$9!#(B
> 
> $B$I$J$?$+BP=hK!$r$4B8CN$JJ}!"$465<x4j$($^$;$s$G$7$g$&$+!#(B
> 
> $B0J>e!"$h$m$7$/$*4j$$$$$?$7$^$9!#(B
> 


-- 
 $B8eF#(B $BBgJe(B<gotoh @ m-design.com>
 $B3t<02q<R%(%`!&%G%#!<!&%7!<(B
 $B")(B212-0012 $B @ n:j;T9,6hCf9,D.#3CzL\#2(B
 Tel. 044-555-3185 Fax. 044-555-5700(B


pgpool-general-jp メーリングリストの案内