I tried to install Postgresql HA Cluster Setup with Patroni but all organizations after completion I got errors on my patroni and postgresql nodes. I checked the error logs and saw that the servers could not connect to my etcd server.
config;
node1 patroni,postgresql 192.168.1.15 node2 patroni,postgresql 192.168.1.16 node3 etcd 192.168.1.17 node4 HAProxy 192.168.1.18
- node1 /etc/patroni.yml;
scope: patroni_deneme
namespace: /db/
name: master
restapi:
listen: 192.168.1.15:8008
connect_address: 192.168.1.15:8008
etcd:
host: 192.168.1.17:2379
bootstrap:
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
postgresql:
use_pg_rewind: true
initdb:
- encoding: UTF8
- data-checksums
pg_hba:
- host replication replicator 127.0.0.1/32 md5
- host replication replicator 192.168.1.15/0 md5
- host replication replicator 192.168.1.16/0 md5
- host all all 0.0.0.0/0 md5
users:
admin:
password: admin
options:
- createrole
- createdb
postgresql:
listen: 192.168.1.15:5433
connect_address: 192.168.1.15:5433
data_dir: /data/patroni
pgpass: /tmp/pgpass
authentication:
replication:
username: replicator
password: sifre123
superuser:
username: postgres
password: sifre123
parameters:
unix_socket_directories: '.'
tags:
nofailover: false
noloadbalance: false
clonefrom: false
nosync: false
- node2 /etc/patroni.yml;
scope: patroni_deneme
namespace: /db/
name: master
restapi:
listen: 192.168.1.16:8008
connect_address: 192.168.1.16:8008
etcd:
host: 192.168.1.17:2379
bootstrap:
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
postgresql:
use_pg_rewind: true
initdb:
- encoding: UTF8
- data-checksums
pg_hba:
- host replication replicator 127.0.0.1/32 md5
- host replication replicator 192.168.1.15/0 md5
- host replication replicator 192.168.1.16/0 md5
- host all all 0.0.0.0/0 md5
users:
admin:
password: admin
options:
- createrole
- createdb
postgresql:
listen: 192.168.1.16:5433
connect_address: 192.168.1.16:5433
data_dir: /data/patroni
pgpass: /tmp/pgpass
authentication:
replication:
username: replicator
password: sifre123
superuser:
username: postgres
password: sifre123
parameters:
unix_socket_directories: '.'
tags:
nofailover: false
noloadbalance: false
clonefrom: false
nosync: false
- node3 /etc/default/etcd;
ETCD_LISTEN_PEER_URLS="http://192.168.1.17:2380"
ETCD_LISTEN_CLIENT_URLS="http://localhost:2379,http://192.168.1.17:2379"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://192.168.1.17:2380"
ETCD_INITIAL_CLUSTER="default=http://192.168.1.17:2380"
ETCD_ADVERTISE_CLIENT_URLS="http://192.168.1.17:2379"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
- node4 /etc/haproxy/haproxy.cfg;
global
maxconn 100
defaults
log global
mode tcp
retries 2
timeout client 30m
timeout connect 4s
timeout server 30m
timeout check 5s
listen stats
mode http
bind *:7000
stats enable
stats uri /
listen postgres
bind *:5000
option httpchk
http-check expect status 200
default-server inter 3s fall 3 rise 2 on-marked-down shutdown-sessions
server PSQL01 192.168.1.15:5433 maxconn 100 check port 8008
server PSQL02 192.166.1.16:5433 maxconn 100 check port 8008
Error Log;
Jan 02 12:08:38 master patroni[14341]: 2024-01-02 12:08:38,970 WARNING: Retrying (Retry(total=1, connect=None, read=None, redirect=0, status=None)) after connection broken by 'ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7f88825e6940>, 'Connection to 192.168.1.17 timed out. (connect timeout=1.6666666666666667)')': /v2/machines
Jan 02 12:08:40 master patroni[14341]: 2024-01-02 12:08:40,414 WARNING: Retrying (Retry(total=0, connect=None, read=None, redirect=0, status=None)) after connection broken by 'NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f88825ab730>: Failed to establish a new connection: [Errno 113] No route to host')': /v2/machines
Jan 02 12:08:42 master patroni[14341]: 2024-01-02 12:08:42,084 ERROR: Failed to get list of machines from http://192.168.1.17:2379/v2: MaxRetryError("HTTPConnectionPool(host='192.168.1.17', port=2379): Max retries exceeded with url: /v2/machines (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7f88825ab700>, 'Connection to 192.168.1.17 timed out. (connect timeout=1.6666666666666667)'))")
Jan 02 12:08:42 master patroni[14341]: 2024-01-02 12:08:42,084 INFO: waiting on etcd
I checked the configuration on the etcd node but all the information is correct. I also rechecked the info of the etcd node in the config file of the patroni nodes and didn't see anything wrong.