k8s-1.22.3集群etcd备份与恢复

一、环境准备

注:请在测试环境下验证操作
CentOS Linux release 7.7.1908 (Core)  3.10.0-1062.el7.x86_64 

kubeadm-1.22.3-0.x86_64
kubelet-1.22.3-0.x86_64
kubectl-1.22.3-0.x86_64
kubernetes-cni-0.8.7-0.x86_64

主机名 IP VIP
k8s-master01 192.168.10.61 192.168.10.70
k8s-master02 192.168.10.62
k8s-master03 192.168.10.63
k8s-node01 192.168.10.64
k8s-node02 192.168.10.65

二、安装etcdctl工具

1、yum安装

yum install -y etcd

2、或者直接下载静态编译的包

wget https://github.com/etcd-io/etcd/releases/download/v3.4.14/etcd-v3.4.14-linux-amd64.tar.gz
tar -zxf etcd-v3.4.14-linux-amd64.tar.gz
cd etcd-v3.4.14-linux-amd64
cp etcdctl /usr/local/bin

注:etcd最新的API版本是v3,与v2相比,v3更高效更清晰。k8s默认使用的etcd V3版本API,ectdctl默认使用V2版本API。要想使用v3,需要设置环境变量export ETCDCTL_API=3临时更改为V3或者在 /etc/profile后在里面添加export ETCDCTL_API=3,然后执行source /etc/profile则永久更改为V3。

echo "export ETCDCTL_API=3" >>/etc/profile
source /etc/profile

三、查看etcd节点和状态

注:根据自已的环境指定证书路径

1、查看etcd节点

etcdctl --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/peer.crt --key=/etc/kubernetes/pki/etcd/peer.key --write-out=table --endpoints=192.168.10.61:2379,192.168.10.63:2379,192.168.10.62:2379 member list
+------------------+---------+--------------+----------------------------+----------------------------+------------+
|        ID        | STATUS  |     NAME     |         PEER ADDRS         |        CLIENT ADDRS        | IS LEARNER |
+------------------+---------+--------------+----------------------------+----------------------------+------------+
| 3c3f0bd3bdd4ab17 | started | k8s-master01 | https://192.168.10.61:2380 | https://192.168.10.61:2379 |      false |
| 8f9d6f521fe8bcf3 | started | k8s-master03 | https://192.168.10.63:2380 | https://192.168.10.63:2379 |      false |
| c23c5081dc6638ca | started | k8s-master02 | https://192.168.10.62:2380 | https://192.168.10.62:2379 |      false |
+------------------+---------+--------------+----------------------------+----------------------------+------------+

2、查看etcd节点状态

etcdctl --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/peer.crt --key=/etc/kubernetes/pki/etcd/peer.key --write-out=table --endpoints=192.168.10.61:2379,192.168.10.63:2379,192.168.10.62:2379 endpoint  status
+--------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
|      ENDPOINT      |        ID        | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+--------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| 192.168.10.61:2379 | 3c3f0bd3bdd4ab17 |   3.5.6 |  4.4 MB |     false |      false |         4 |     892897 |             892897 |        |
| 192.168.10.63:2379 | 8f9d6f521fe8bcf3 |   3.5.6 |  4.4 MB |     false |      false |         4 |     892897 |             892897 |        |
| 192.168.10.62:2379 | c23c5081dc6638ca |   3.5.6 |  4.4 MB |      true |      false |         4 |     892897 |             892897 |        |
+--------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+

四、备份etcd数据

1、新建验证数据

 注:为了验证恢复数据是否正确,可以在备份之前新建一个namespace

kubectl create ns test-ns

#查看新建的ns

# kubectl get ns
NAME                   STATUS   AGE
default                Active   151d
ingress-nginx          Active   151d
kube-node-lease        Active   151d
kube-public            Active   151d
kube-system            Active   151d
kubernetes-dashboard   Active   151d
test-ns                Active   47s

2、备份

注:备份只需要找其中一个master节点的etcd进行备份就可以。

etcdctl --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/peer.crt --key=/etc/kubernetes/pki/etcd/peer.key --write-out=table --endpoints=192.168.10.61:2379 snapshot save /opt/etcd-snapshot.db

3、备份完后,再删除新建的ns

kubectl delete ns test-ns

五、恢复etcd数据

注:k8s集群中如果有多个etcd组成的集群,那么每个etcd都需要进行独立恢复

#在恢复前需要把每个节点的master上的服务先停掉,以免有新的数据写入,并且要把默认的etcd数据目录改名

mv /etc/kubernetes/manifests/ /etc/kubernetes/manifests.bak
mv /var/lib/etcd /var/lib/etcd.bak

#以下证书和key、name等相关信息可以在 /etc/kubernetes/manifests/etcd.yaml查到

    - --advertise-client-urls=https://192.168.10.61:2379
    - --cert-file=/etc/kubernetes/pki/etcd/server.crt
    - --client-cert-auth=true
    - --data-dir=/var/lib/etcd
    - --experimental-initial-corrupt-check=true
    - --initial-advertise-peer-urls=https://192.168.10.61:2380
    - --initial-cluster=k8s-master01=https://192.168.10.61:2380
    - --key-file=/etc/kubernetes/pki/etcd/server.key
    - --listen-client-urls=https://127.0.0.1:2379,https://192.168.10.61:2379
    - --listen-metrics-urls=http://127.0.0.1:2381
    - --listen-peer-urls=https://192.168.10.61:2380
    - --name=k8s-master01
    - --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt
    - --peer-client-cert-auth=true
    - --peer-key-file=/etc/kubernetes/pki/etcd/peer.key
    - --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
    - --snapshot-count=10000
    - --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt

1、恢复k8s-master01的etcd数据

#首先停掉服务

mv /etc/kubernetes/manifests/ /etc/kubernetes/manifests.bak
mv /var/lib/etcd /var/lib/etcd.bak

#恢复数据,要把上面备份的etcd数据分别上传到另外2台

etcdctl snapshot restore /opt/etcd-snapshot.db --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/peer.crt --key=/etc/kubernetes/pki/etcd/peer.key --name k8s-master01 --initial-cluster "k8s-master02=https://192.168.10.62:2380,k8s-master01=https://192.168.10.61:2380,k8s-master03=https://192.168.10.63:2380" --initial-advertise-peer-urls https://192.168.10.61:2380 --data-dir=/var/lib/etcd

2、恢复k8s-master02的etcd数据

#首先停掉服务

mv /etc/kubernetes/manifests/ /etc/kubernetes/manifests.bak
mv /var/lib/etcd /var/lib/etcd.bak

#恢复数据

etcdctl snapshot restore /opt/etcd-snapshot.db --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/peer.crt --key=/etc/kubernetes/pki/etcd/peer.key --name k8s-master02 --initial-cluster "k8s-master02=https://192.168.10.62:2380,k8s-master01=https://192.168.10.61:2380,k8s-master03=https://192.168.10.63:2380" --initial-advertise-peer-urls https://192.168.10.62:2380 --data-dir=/var/lib/etcd

 3、恢复k8s-master03的etcd数据

#首先停掉服务

mv /etc/kubernetes/manifests/ /etc/kubernetes/manifests.bak
mv /var/lib/etcd /var/lib/etcd.bak

#恢复数据

etcdctl snapshot restore /opt/etcd-snapshot.db --cacert=/etc/kubernetes/pki/etcd/ca.crt --cert=/etc/kubernetes/pki/etcd/peer.crt --key=/etc/kubernetes/pki/etcd/peer.key --name k8s-master03 --initial-cluster "k8s-master02=https://192.168.10.62:2380,k8s-master01=https://192.168.10.61:2380,k8s-master03=https://192.168.10.63:2380" --initial-advertise-peer-urls https://192.168.10.63:2380 --data-dir=/var/lib/etcd

4、恢复服务,3台master都需要操作

注:每个执行恢复数据操作后,都会新生成/var/lib/etcd数据目录

mv /etc/kubernetes/manifests.bak /etc/kubernetes/manifests

5、查看数据是否恢复

# kubectl get ns
NAME                   STATUS   AGE
default                Active   151d
ingress-nginx          Active   151d
kube-node-lease        Active   151d
kube-public            Active   151d
kube-system            Active   151d
kubernetes-dashboard   Active   151d
test-ns                Active   47s