Ansible-playbook 自动化离线部署 Kubernetes 集群
一、基础环境配置
📎 附件下载:Centos7-ansible-k8s-containerd-202501031.tar.gz
系统版本
[root@k8s-master1 ~]# cat /etc/os-release
NAME="CentOS Linux"
VERSION="7 (Core)"
ID="centos"
ID_LIKE="rhel fedora"
VERSION_ID="7"
PRETTY_NAME="CentOS Linux 7 (Core)"
ANSI_COLOR="0;31"
CPE_NAME="cpe:/o:centos:centos:7"
HOME_URL="https://www.centos.org/"
BUG_REPORT_URL="https://bugs.centos.org/"
CENTOS_MANTISBT_PROJECT="CentOS-7"
CENTOS_MANTISBT_PROJECT_VERSION="7"
REDHAT_SUPPORT_PRODUCT="centos"
REDHAT_SUPPORT_PRODUCT_VERSION="7"📎 附件下载:Ubuntu-22.04.2-ansible-k8s-containerd-202501031.tar.gz
系统版本
ubuntu@ubuntu:~$ cat /etc/os-release
PRETTY_NAME="Ubuntu 22.04.2 LTS"
NAME="Ubuntu"
VERSION_ID="22.04"
VERSION="22.04.2 LTS (Jammy Jellyfish)"
VERSION_CODENAME=jammy
ID=ubuntu
ID_LIKE=debian
HOME_URL="https://www.ubuntu.com/"
SUPPORT_URL="https://help.ubuntu.com/"
BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
UBUNTU_CODENAME=jammy1. 安装 ansbile 和 sshpass
# centos7 安装
[root@k8s-master1 Centos7-ansible-k8s-containerd-20250413]# yum -y localinstall roles/init/files/ansible/*.rpm
# ubuntu 安装
root@ubuntu:~# sudo apt remove --purge ansible
root@ubuntu:~# sudo apt autoremove
root@ubuntu:~# sudo apt --fix-broken install
root@ubuntu:~# sudo dpkg --configure -a
root@ubuntu:~# dpkg -i --force-all roles/init/files/ansible-2204/*.deb2. 创建普通用户和配置免密(不一定非得要免密 sudo)
脚本工具
#!/bin/bash
# 默认配置
DEFAULT_USER="root"
DEFAULT_PASSWORD=""
DEFAULT_NEW_USER="k8s-deploy"
DEFAULT_NEW_PASSWORD=""
DEFAULT_HOSTS=("11.0.1.31" "11.0.1.32" "11.0.1.33")
# 显示帮助信息
show_help() {
cat << EOF
用法: $0 [选项]
选项:
-u, --user USERNAME SSH登录用户名 (默认: $DEFAULT_USER)
-p, --password PASSWORD SSH登录密码 (必须指定)
-n, --new-user USERNAME 要创建的新用户名 (默认: $DEFAULT_NEW_USER)
-w, --new-password PASSWORD 新用户的密码 (必须指定)
-h, --host HOSTS 目标主机IP,多个用逗号分隔 (默认: ${DEFAULT_HOSTS[*]})
--help 显示此帮助信息
示例:
$0 -p "root_password" -w "new_user_password"
$0 -u root -p "123456" -n myuser -w "mypassword" -h "11.0.1.31,11.0.1.32"
EOF
}
# 检查必需工具
check_dependencies() {
if ! command -v sshpass &> /dev/null; then
echo "错误: 未找到 sshpass,请先安装: yum install sshpass 或 apt-get install sshpass"
exit 1
fi
}
# 执行远程命令
execute_remote() {
local host="$1"
local command="$2"
if [ -z "$PASSWORD" ]; then
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 "$USER@$host" "$command"
else
sshpass -p "$PASSWORD" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 "$USER@$host" "$command"
fi
}
# 修改secure_path配置
modify_secure_path() {
local host="$1"
echo "修改secure_path配置..."
# 备份原始sudoers文件
execute_remote "$host" "
if [ -f /etc/sudoers ]; then
cp /etc/sudoers /etc/sudoers.backup.$(date +%Y%m%d%H%M%S)
fi
"
# 修改secure_path
execute_remote "$host" "
# 检查是否已经存在secure_path配置
if grep -q '^Defaults.*secure_path' /etc/sudoers; then
# 替换现有的secure_path
sed -i 's|^Defaults.*secure_path.*|Defaults secure_path = /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin:/usr/local/sbin|' /etc/sudoers
else
# 添加新的secure_path配置
echo 'Defaults secure_path = /sbin:/bin:/usr/sbin:/usr/bin:/usr/local/bin:/usr/local/sbin' >> /etc/sudoers
fi
# 验证sudoers文件语法
if ! visudo -c &> /dev/null; then
echo '错误: sudoers文件语法验证失败,恢复备份'
if [ -f /etc/sudoers.backup.* ]; then
cp /etc/sudoers.backup.* /etc/sudoers
fi
exit 1
fi
" || {
echo "错误: 在主机 $host 上修改secure_path失败"
return 1
}
echo "secure_path配置修改完成"
return 0
}
# 创建用户并配置sudo
create_user_and_sudo() {
local host="$1"
echo "正在处理主机: $host"
# 检查主机是否可达
if ! execute_remote "$host" "echo '连接测试成功'" &> /dev/null; then
echo "错误: 无法连接到主机 $host"
return 1
fi
# 检查用户是否已存在
if execute_remote "$host" "id '$NEW_USER'" &> /dev/null; then
echo "警告: 用户 $NEW_USER 在主机 $host 上已存在"
else
# 创建用户
echo "创建用户: $NEW_USER"
execute_remote "$host" "
useradd -m -s /bin/bash '$NEW_USER' && \
echo '$NEW_USER:$NEW_PASSWORD' | chpasswd
" || {
echo "错误: 在主机 $host 上创建用户失败"
return 1
}
fi
# 配置sudo
echo "配置sudo权限"
execute_remote "$host" "
sudo_dir='/etc/sudoers.d'
sudo_file=\"\$sudo_dir/$NEW_USER\"
# 确保sudoers.d目录存在且有正确权限
if [ ! -d \"\$sudo_dir\" ]; then
mkdir -p \"\$sudo_dir\"
fi
# 创建sudo配置文件
echo '$NEW_USER ALL=(ALL) NOPASSWD: ALL' > \"\$sudo_file\" && \
chmod 440 \"\$sudo_file\" && \
visudo -c &> /dev/null
" || {
echo "错误: 在主机 $host 上配置sudo失败"
return 1
}
# 修改secure_path配置
if ! modify_secure_path "$host"; then
return 1
fi
echo "主机 $host 配置完成"
return 0
}
# 主函数
main() {
local success_hosts=()
local failed_hosts=()
# 检查依赖
check_dependencies
echo "开始批量配置..."
echo "目标主机: ${HOSTS[*]}"
echo "创建用户: $NEW_USER"
echo "配置免密sudo: 是"
echo "修改secure_path: 是"
echo "----------------------------------------"
for host in "${HOSTS[@]}"; do
if create_user_and_sudo "$host"; then
success_hosts+=("$host")
else
failed_hosts+=("$host")
fi
echo "----------------------------------------"
done
# 输出结果摘要
echo "配置完成!"
echo "成功的主机: ${success_hosts[*]}"
if [ ${#failed_hosts[@]} -gt 0 ]; then
echo "失败的主机: ${failed_hosts[*]}"
exit 1
else
echo "所有主机配置成功!"
fi
}
# 解析命令行参数
while [[ $# -gt 0 ]]; do
case $1 in
-u|--user)
USER="$2"
shift 2
;;
-p|--password)
PASSWORD="$2"
shift 2
;;
-n|--new-user)
NEW_USER="$2"
shift 2
;;
-w|--new-password)
NEW_PASSWORD="$2"
shift 2
;;
-h|--host)
IFS=',' read -ra HOSTS <<< "$2"
shift 2
;;
--help)
show_help
exit 0
;;
*)
echo "未知选项: $1"
show_help
exit 1
;;
esac
done
# 设置默认值
USER=${USER:-$DEFAULT_USER}
NEW_USER=${NEW_USER:-$DEFAULT_NEW_USER}
HOSTS=("${HOSTS[@]:-${DEFAULT_HOSTS[@]}}")
# 验证必需参数
if [ -z "$PASSWORD" ]; then
echo "错误: 必须指定SSH登录密码 (-p)"
show_help
exit 1
fi
if [ -z "$NEW_PASSWORD" ]; then
echo "错误: 必须指定新用户密码 (-w)"
show_help
exit 1
fi
# 运行主函数
main执行脚本
[root@localhost ~]# ./user_setup.sh -u root -p "123456" -n deploy -w "123456" -h "11.0.1.31,11.0.1.32,11.0.1.33,11.0.1.34,11.0.1.35,11.0.1.36"
开始批量配置...
目标主机: 11.0.1.31 11.0.1.32 11.0.1.33 11.0.1.34 11.0.1.35 11.0.1.36
创建用户: deploy
配置免密sudo: 是
----------------------------------------
正在处理主机: 11.0.1.31
创建用户: deploy
配置sudo权限
主机 11.0.1.31 配置完成
----------------------------------------
正在处理主机: 11.0.1.32
创建用户: deploy
配置sudo权限
主机 11.0.1.32 配置完成
----------------------------------------
正在处理主机: 11.0.1.33
创建用户: deploy
配置sudo权限
主机 11.0.1.33 配置完成
----------------------------------------
正在处理主机: 11.0.1.34
创建用户: deploy
配置sudo权限
主机 11.0.1.34 配置完成
----------------------------------------
正在处理主机: 11.0.1.35
创建用户: deploy
配置sudo权限
主机 11.0.1.35 配置完成
----------------------------------------
正在处理主机: 11.0.1.36
创建用户: deploy
配置sudo权限
主机 11.0.1.36 配置完成
----------------------------------------
配置完成!
成功的主机: 11.0.1.31 11.0.1.32 11.0.1.33 11.0.1.34 11.0.1.35 11.0.1.36
所有主机配置成功!配置免密登陆
# 配置普通用户免密登陆
[root@localhost ~]# su - deploy
[deploy@localhost ~]$ ssh-keygen -t rsa -b 4096 -C "deploy@example.com" -f ~/.ssh/id_rsa -N ""
Generating public/private rsa key pair.
Your identification has been saved in /home/deploy/.ssh/id_rsa.
Your public key has been saved in /home/deploy/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:WdmWLCMulO0nInUf/7cCn8Gti4Oxc35cjfRf0wsgKoM deploy@example.com
The key's randomart image is:
+---[RSA 4096]----+
| |
| o + . |
| + + * = |
| o + * B . |
| o o S + +..oo|
| E + + + . =.+=|
| o + + B *|
| + o.B oo|
| +oo.o. |
+----[SHA256]-----+
[deploy@localhost ~]$ vim iplist.txt
[deploy@localhost ~]$ cat iplist.txt
11.0.1.31
11.0.1.32
11.0.1.33
11.0.1.34
11.0.1.35
11.0.1.36
[deploy@localhost ~]$ for host in $(cat iplist.txt); do sshpass -p '123456' ssh-copy-id -o StrictHostKeyChecking=no 'deploy'@$host; done二、单 master 部署如下修改
1. 主机分组文件
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ vim hosts.ini
[all]
k8s-master1 ansible_host=11.0.1.31 ip=11.0.1.31
k8s-node1 ansible_host=11.0.1.34 ip=11.0.1.34
k8s-node2 ansible_host=11.0.1.35 ip=11.0.1.35
k8s-node3 ansible_host=11.0.1.36 ip=11.0.1.36
[all:vars]
ansible_port=22
ansible_user=k8s-deploy
ansible_become=yes
ansible_become_method=sudo
# 如果sudo需要密码,则取消如下注释
#ansible_become_password=123456
[master]
k8s-master1
[node]
k8s-node1
k8s-node2
k8s-node3
[etcd]
#etcd1
#etcd2
#etcd3
# 如果单 master ha 里面的可以注释掉了,避免产生警告信息
[ha]
#k8s-master1 ha_name=ha-master
#k8s-master2 ha_name=ha-backup
#k8s-master3 ha_name=ha-backup
#24小时token过期后添加node节点
[newnode]
[k8s:children]
master
node
newnode2. 全局变量文件
根据您在 group_vars/all.yml 中定义的 keepalived 选项自动选择是否启用高可以用部署,以下配置留空为单 master 节点部署
nic: '' # 网关网卡
Virtual_Router_ID: '' # keeplaived 唯一路由 ID
vip: '' # 高可用虚拟IP
api_vip_hosts: '' # 高可用集群通信域名
lb_port: '' # 负载均衡端口3. 执行部署
# 建议升级 Centos 内核
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible-playbook -i hosts.ini install_kernel.yml# 使用 single 文件部署
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible-playbook -i hosts.ini single-master-deploy.yml三、多 master 集群方式部署
1. 主机分组文件
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ vim hosts.ini
k8s-master1 ansible_host=11.0.1.31 ip=11.0.1.31
k8s-master2 ansible_host=11.0.1.32 ip=11.0.1.32
k8s-master3 ansible_host=11.0.1.33 ip=11.0.1.33
k8s-node1 ansible_host=11.0.1.34 ip=11.0.1.34
k8s-node2 ansible_host=11.0.1.35 ip=11.0.1.35
k8s-node3 ansible_host=11.0.1.36 ip=11.0.1.36
etcd1 ansible_host=11.0.1.31 ip=11.0.1.31
etcd2 ansible_host=11.0.1.32 ip=11.0.1.32
etcd3 ansible_host=11.0.1.33 ip=11.0.1.33
[all:vars]
ansible_port=22
ansible_user=k8s-deploy
ansible_become=yes
ansible_become_method=sudo
# 如果sudo需要密码,则取消如下注释
#ansible_become_password=123456
[master]
k8s-master1
k8s-master2
k8s-master3
[node]
k8s-node1
k8s-node2
[etcd]
etcd1
etcd2
etcd3
[ha]
k8s-master1 ha_name=ha-master
k8s-master2 ha_name=ha-backup
k8s-master3 ha_name=ha-backup
#24小时token过期后添加node节点
[newnode]
[k8s:children]
master
node
newnode2. 全局变量文件
根据您在 group_vars/all.yml 中定义的 keepalived 选项自动选择是否启用高可以用部署,填写以下配置为多 master 节点部署
nic: 'ens160' # 网关网卡
Virtual_Router_ID: '51' # keeplaived 唯一路由 ID
vip: '11.0.1.30' # 高可用虚拟IP
api_vip_hosts: 'apiserver.cluster.local' # 高可用集群通信域名
lb_port: '16443' # 负载均衡端口3. 执行部署
# 建议升级 Centos 内核
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible-playbook -i hosts.ini install_kernel.yml# 使用 multi 文件部署
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible-playbook -i hosts.ini multi-master-ha-deploy.yml四、验证集群
1. 查看 etcd 集群状态
# 其实还是当前机器,因为我的 etcd 和 master 公用的同一台机器
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ssh etcd1
Last login: Mon Apr 14 13:20:03 2025
[deploy@k8s-master1 ~]$ # etcd 节点基本信息
[deploy@k8s-master1 ~]$ sudo etcdctl --cacert=/etc/etcd/ssl/etcd-ca.pem --cert=/etc/etcd/ssl/etcd-server.pem --key=/etc/etcd/ssl/etcd-server-key.pem --endpoints="https://etcd1:2379,https://etcd2:2379,https://etcd3:2379" member list -w table
+------------------+---------+-------+------------------------+------------------------+------------+
| ID | STATUS | NAME | PEER ADDRS | CLIENT ADDRS | IS LEARNER |
+------------------+---------+-------+------------------------+------------------------+------------+
| 19057a4144bcd8c4 | started | etcd2 | https://11.0.1.32:2380 | https://11.0.1.32:2379 | false |
| 7467e635c43f67f4 | started | etcd1 | https://11.0.1.31:2380 | https://11.0.1.31:2379 | false |
| 8eb8e095642da063 | started | etcd3 | https://11.0.1.33:2380 | https://11.0.1.33:2379 | false |
+------------------+---------+-------+------------------------+------------------------+------------+
# etcd 集群各节点的健康状态
[deploy@k8s-master1 ~]$ sudo etcdctl --cacert=/etc/etcd/ssl/etcd-ca.pem --cert=/etc/etcd/ssl/etcd-server.pem --key=/etc/etcd/ssl/etcd-server-key.pem --endpoints="https://etcd1:2379,https://etcd2:2379,https://etcd3:2379" endpoint health --write-out=table
+--------------------+--------+-------------+-------+
| ENDPOINT | HEALTH | TOOK | ERROR |
+--------------------+--------+-------------+-------+
| https://etcd1:2379 | true | 11.55301ms | |
| https://etcd3:2379 | true | 10.722732ms | |
| https://etcd2:2379 | true | 8.544486ms | |
+--------------------+--------+-------------+-------+
# 查看 etcd 集群各节点的详细状态信息 的命令,比 endpoint health 提供更丰富的数据
[deploy@k8s-master1 ~]$ sudo etcdctl --cacert=/etc/etcd/ssl/etcd-ca.pem --cert=/etc/etcd/ssl/etcd-server.pem --key=/etc/etcd/ssl/etcd-server-key.pem --endpoints="https://etcd1:2379,https://etcd2:2379,https://etcd3:2379" endpoint status -w table
+--------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | IS LEARNER | RAFT TERM | RAFT INDEX | RAFT APPLIED INDEX | ERRORS |
+--------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
| https://etcd1:2379 | 7467e635c43f67f4 | 3.5.9 | 9.3 MB | false | false | 4 | 172130 | 172130 | |
| https://etcd2:2379 | 19057a4144bcd8c4 | 3.5.9 | 9.3 MB | true | false | 4 | 172130 | 172130 | |
| https://etcd3:2379 | 8eb8e095642da063 | 3.5.9 | 9.4 MB | false | false | 4 | 172130 | 172130 | |
+--------------------+------------------+---------+---------+-----------+------------+-----------+------------+--------------------+--------+
# 查看 ETCD 备份情况
[deploy@k8s-master1 ~]$ sudo systemctl status etcd-backup.timer
[deploy@k8s-master1 ~]$ sudo systemctl status etcd-backup.service
[deploy@k8s-master1 ~]$ sudo ls -lh /var/lib/etcd-backup/
[deploy@k8s-master1 ~]$ sudo ls -lh /var/lib/etcd-backup/ |tail -n 1
-rw------- 1 etcd etcd 8.9M Apr 14 13:00 etcd-snapshot-20250414-130000.db
[deploy@k8s-master1 ~]$ sudo ETCDCTL_API=3 etcdctl --cacert=/etc/etcd/ssl/etcd-ca.pem \
--cert=/etc/etcd/ssl/etcd-server.pem --key=/etc/etcd/ssl/etcd-server-key.pem \
--endpoints=https://etcd1:2379 snapshot status /var/lib/etcd-backup/etcd-snapshot-20250414-130000.db
[deploy@k8s-master1 ~]$ logout
Connection to etcd1 closed.
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ 2. 检查 k8s 集群状态
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ sudo kubectl get cs
Warning: v1 ComponentStatus is deprecated in v1.19+
NAME STATUS MESSAGE ERROR
scheduler Healthy ok
controller-manager Healthy ok
etcd-0 Healthy ok
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ sudo kubectl get node -o wide
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
k8s-master1 Ready control-plane 14h v1.28.2 11.0.1.31 <none> CentOS Linux 7 (Core) 5.4.160-1.el7.elrepo.x86_64 containerd://1.7.25
k8s-master2 Ready control-plane 14h v1.28.2 11.0.1.32 <none> CentOS Linux 7 (Core) 5.4.160-1.el7.elrepo.x86_64 containerd://1.7.25
k8s-master3 Ready control-plane 14h v1.28.2 11.0.1.33 <none> CentOS Linux 7 (Core) 5.4.160-1.el7.elrepo.x86_64 containerd://1.7.25
k8s-node1 Ready <none> 14h v1.28.2 11.0.1.34 <none> CentOS Linux 7 (Core) 5.4.160-1.el7.elrepo.x86_64 containerd://1.7.25
k8s-node2 Ready <none> 14h v1.28.2 11.0.1.35 <none> CentOS Linux 7 (Core) 5.4.160-1.el7.elrepo.x86_64 containerd://1.7.25
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ sudo kubectl label node {k8s-node1,k8s-node2} ingress/type=nginx
node/k8s-node1 labeled
node/k8s-node2 labeled
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ sudo kubectl get pods -A
NAMESPACE NAME READY STATUS RESTARTS AGE
ingress-nginx ingress-nginx-admission-create-dd8z8 0/1 Completed 1 14h
ingress-nginx ingress-nginx-admission-patch-mml7t 0/1 Completed 2 14h
ingress-nginx ingress-nginx-controller-2qjpn 1/1 Running 0 6m14s
ingress-nginx ingress-nginx-controller-lwz4w 1/1 Running 0 5m52s
kube-system calico-kube-controllers-5678d86b78-zfvz8 1/1 Running 0 6m7s
kube-system calico-node-22fkm 1/1 Running 0 14h
kube-system calico-node-8kb75 1/1 Running 0 14h
kube-system calico-node-g85rv 1/1 Running 0 6m22s
kube-system calico-node-mr5ls 1/1 Running 0 6m22s
kube-system calico-node-tgmgq 1/1 Running 0 14h
kube-system coredns-f8dcdd6b5-b4vgf 1/1 Running 0 14h
kube-system coredns-f8dcdd6b5-wbl8s 1/1 Running 0 14h
kube-system kube-apiserver-k8s-master1 1/1 Running 4 14h
kube-system kube-apiserver-k8s-master2 1/1 Running 4 14h
kube-system kube-apiserver-k8s-master3 1/1 Running 2 14h
kube-system kube-controller-manager-k8s-master1 1/1 Running 5 14h
kube-system kube-controller-manager-k8s-master2 1/1 Running 4 14h
kube-system kube-controller-manager-k8s-master3 1/1 Running 2 14h
kube-system kube-proxy-4qf79 1/1 Running 0 14h
kube-system kube-proxy-btp8t 1/1 Running 0 14h
kube-system kube-proxy-w6zzp 1/1 Running 0 14h
kube-system kube-proxy-zclj7 1/1 Running 0 14h
kube-system kube-proxy-zv7z8 1/1 Running 0 14h
kube-system kube-scheduler-k8s-master1 1/1 Running 5 14h
kube-system kube-scheduler-k8s-master2 1/1 Running 4 14h
kube-system kube-scheduler-k8s-master3 1/1 Running 2 14h
openebs openebs-localpv-provisioner-6787b599b9-c7cxd 1/1 Running 0 14h
openebs openebs-ndm-cluster-exporter-7bfd5746f4-jkcnw 1/1 Running 0 14h
openebs openebs-ndm-j7p9n 1/1 Running 0 14h
openebs openebs-ndm-mclk2 1/1 Running 0 14h
openebs openebs-ndm-node-exporter-845g7 1/1 Running 0 14h
openebs openebs-ndm-node-exporter-jf9n5 1/1 Running 1 (14h ago) 14h
openebs openebs-ndm-operator-845b8858db-57qkj 1/1 Running 0 14h# 如果pod一直 pending 或者 node 无法 ready,则重启 containerd 属于 BUG
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible -i hosts.ini k8s -m shell -a "systemctl restart containerd"# 创建服务
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ sudo kubectl create deployment nginx --image=harbor.meta42.indc.vnet.com/library/nginx:latest --replicas=4
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ sudo kubectl expose deployment nginx --port=80 --target-port=80 --type=NodePort五、配置 ingress 7 层代理
1. ha 节点 (master 节点) 修改配置
# 3 台 master 机器相同配置
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ sudo vim /etc/nginx/nginx.conf
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log;
pid /run/nginx.pid;
include /usr/share/nginx/modules/*.conf;
events {
worker_connections 1024;
}
# HTTP负载均衡配置
http {
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'Status: $status BodyBytesSent: $body_bytes_sent '
'Referer: "$http_referer" '
'UserAgent: "$http_user_agent" '
'XForwardedFor: "$http_x_forwarded_for" '
'Upgrade: $http_upgrade Connection: $http_connection '
'Host: $http_host '
'CacheStatus: $upstream_cache_status '
'RequestTime: $request_time';
access_log /var/log/nginx/access.log main;
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
include /etc/nginx/mime.types;
include /etc/nginx/conf.d/*.conf;
default_type application/octet-stream;
client_max_body_size 102400m;
# HTTP Ingress-Nginx负载均衡
upstream ingress-nginx-http {
server 11.0.1.33:80; # 选择 ingress 所在的节点
server 11.0.1.34:80; # 选择 ingress 所在的节点
}
# HTTPS Ingress-Nginx负载均衡
upstream ingress-nginx-https {
server 11.0.1.33:443; # 选择 ingress 所在的节点
server 11.0.1.34:443; # 选择 ingress 所在的节点
}
# HTTP负载均衡
server {
listen 80;
location / {
proxy_pass http://ingress-nginx-http; # 指定 HTTP 上游
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
# HTTPS负载均衡
server {
listen 443 ssl;
ssl_certificate /etc/nginx/cret/nginx/*.linuxtian.com.crt;
ssl_certificate_key /etc/nginx/cret/nginx/*.linuxtian.com.key;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers 'TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES256-GCM-SHA384';
ssl_prefer_server_ciphers on;
location / {
proxy_pass https://ingress-nginx-https; # 指定 HTTPS 上游
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
}六、其他相关配置
1. 调整 kube 启动参数 (可选修改)
# 请手动执行以下命令来修改 kube 自定义配置:
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ sed -i "/image:/i\ - --feature-gates=RemoveSelfLink=false" /etc/kubernetes/manifests/kube-apiserver.yaml # 每台 master 都执行
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ sed -i "s/bind-address=127.0.0.1/bind-address=0.0.0.0/g" /etc/kubernetes/manifests/kube-controller-manager.yaml # 每台 master 都执行
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ kubectl get cm -n kube-system kube-proxy -o yaml | sed "s/metricsBindAddress: \"\"/metricsBindAddress: \"0.0.0.0\"/g" | kubectl replace -f -
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ kubectl rollout restart daemonset -n kube-system kube-proxy2. 解决 node 节点报错
Sep 14 00:59:22 k8s-node1 kubelet[1611]: E0914 00:59:22.040084 1611 file_linux.go:61] "Unable to read config path" err="path does not exist, ignoring" path="/etc/kubernetes/manifests"
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible -i hosts.ini node -m shell -a "mkdir -pv /etc/kubernetes/manifests"
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible -i hosts.ini node -m shell -a "systemctl restart kubelet"七、k8s 节点平滑扩容
1. ip 列表新增机器
# 新增机器的 IP 地址
[root@k8s-master1 Centos7-ansible-k8s-kubeadm-on-line-deploy-main]# cat iplist.txt
11.0.1.31
11.0.1.32
11.0.1.33
11.0.1.34
11.0.1.35
11.0.1.36 # 新增
11.0.1.37 # 新增
11.0.1.38 # 新增2. 配置免密登陆
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ for host in $(cat iplist.txt); do sshpass -p 'your_password' ssh-copy-id -o StrictHostKeyChecking=no 'deploy'@$host; done3. 主机分组新增机器
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ vim hosts.ini
# 全部分组下面新增3个
[all]
k8s-node-3 ansible_host=11.0.1.36 ip=11.0.1.36 ansible_port=22 ansible_user=deploy
k8s-node-4 ansible_host=11.0.1.37 ip=11.0.1.37 ansible_port=22 ansible_user=deploy
k8s-node-5 ansible_host=11.0.1.38 ip=11.0.1.38 ansible_port=22 ansible_user=deploy
# k8s 分组下面新增3个
[k8s]
k8s-node3
k8s-node4
k8s-node5
# newnode 分组里面也新增3个
[newnode]
k8s-node-3
k8s-node-4
k8s-node-54. 测试是否能用
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible -i hosts.ini newnode -m shell -a "whoami"5. 升级内核 (可选操作)
# --limit 指定 newnode 分组,默认文件里面应该是 k8s 分组,所以我们只希望升级新添加机器的内核
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible-playbook -i hosts.ini --limit newnode install_kernel.yml6. 执行操作
# 指向 add 文件
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible-playbook -i hosts.ini add-node.yml7. 解决 node 节点报错
Sep 14 00:59:22 k8s-node1 kubelet[1611]: E0914 00:59:22.040084 1611 file_linux.go:61] "Unable to read config path" err="path does not exist, ignoring" path="/etc/kubernetes/manifests"
# 选择 newnode 分组,因为这个里面是新增的机器
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible -i hosts.ini newnode -m shell -a "mkdir -pv /etc/kubernetes/manifests"
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible -i hosts.ini newnode -m shell -a "systemctl restart kubelet"八、其他操作
1. 配置 harbor 证书
# 拷贝 harbor 证书文件,当然 ansbile 中是没有的,需要后期自己部署 harbor,只是为了使用 ansible 统一集群配置
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible -i hosts.ini k8s -m copy -a "src=/etc/containerd/certs.d/ dest=/etc/containerd mode=0755" --become
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible -i hosts.ini k8s -m copy -a "src=/etc/docker/certs.d/ dest=/etc/docker/certs.d mode=0755" --become2. 安装低版本 k8s 使用 docker 不使用 containerd
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ vim multi-master-ha-deploy.yml
- name: 3.部署Docker # 把原本的修改为 docker
gather_facts: true
hosts: k8s
roles:
- docker
tags: docker[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ vim group_vars/all.yml
code_version: 'v1.23.0' # 修改为 1.23.0
kube_version: '1.23.0' # 修改为 1.23.0
k8s_version: 'v1.23.0' # 修改为 1.23.0[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ vim roles/master/templates/kubeadm-init.conf.j2
#criSocket: unix:///run/containerd/containerd.sock # 使用 containerd 作为 CRI k8s 与容器的通信套接字
Socket: /var/run/dockershim.sock # 使用 dockershim 作为 CRI k8s 与容器的通信套接字# 执行部署
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ ansible-playbook -i hosts.ini multi-master-ha-deploy.yml3. 更换安装版本
3.1 准备 RPM 包
挑选一台可以上外网的机器下载对应版本的安装 RPM
# 安装创建 YUM/DNF 软件仓库元数据的命令
[root@localhost ~]# yum -y install createrepo# 下载指定版本的 RPM
[root@localhost ~]# mkdir -pv ./kubernetes-1.32.2
[root@localhost ~]# yum -y reinstall --downloadonly kubelet-1.32.2 kubeadm-1.32.2 kubectl-1.32.2 --downloaddir=./kubernetes-1.32.2
# 创建索引
[root@localhost ~]# createrepo ./kubernetes-1.32.23.2 准备 kubeadm
修改 kubeadm 证书过期时间
[root@localhost kubernetes-1.32.2]# git clone https://ghfast.top/https://github.com/kubernetes/kubernetes/archive/v1.32.2.tar.gz
[root@localhost kubernetes-1.32.2]# tar xvf v1.32.2.tar.gz && cd kubernetes-1.32.2
# 修改 CA 证书有效期
[root@k8s-master1 kubernetes-1.32.2]# vim staging/src/k8s.io/client-go/util/cert/cert.go
57 // NewSelfSignedCACert creates a CA certificate
58 func NewSelfSignedCACert(cfg Config, key crypto.Signer) (*x509.Certificate, error) {
59 now := time.Now()
60 tmpl := x509.Certificate{
61 SerialNumber: new(big.Int).SetInt64(0),
62 Subject: pkix.Name{
63 CommonName: cfg.CommonName,
64 Organization: cfg.Organization,
65 },
66 DNSNames: []string{cfg.CommonName},
67 NotBefore: now.UTC(),
68 NotAfter: now.Add(duration365d * 100).UTC(), ####### 这里原本为 10,修改为 100
69 KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature | x509.KeyUsageCertSign,
70 BasicConstraintsValid: true,
71 IsCA: true,
72 }
# 修改证书有效期
[root@localhost kubernetes-1.32.2]# vim cmd/kubeadm/app/constants/constants.go
40 const (
41 // KubernetesDir is the directory Kubernetes owns for storing various configuration files
42 KubernetesDir = "/etc/kubernetes"
43 // ManifestsSubDirName defines directory name to store manifests
44 ManifestsSubDirName = "manifests"
45 // TempDirForKubeadm defines temporary directory for kubeadm
46 // should be joined with KubernetesDir.
47 TempDirForKubeadm = "tmp"
48
49 // CertificateValidity defines the validity for all the signed certificates generated by kubeadm
50 CertificateValidity = time.Hour * 24 * 365 * 100 ###### 这里原本为 24 * 365 修改为 24 * 365 * 100
51
52 // DefaultCertificateDir defines default certificate directory
53 DefaultCertificateDir = "pki"开始编译(Go 环境省略安装)
[root@localhost kubernetes-1.32.2]# make all WHAT=cmd/kubeadm GOFLAGS=-v
[root@localhost kubernetes-1.32.2]# ls -l _output/bin/kubeadm
-rwxr-xr-x. 1 root root 44511232 Apr 2 19:46 _output/bin/kubeadm3.3 准备 k8s 组件镜像
# 镜像列表如下:
registry.aliyuncs.com/google_containers/kube-apiserver:v1.32.2
registry.aliyuncs.com/google_containers/kube-controller-manager:v1.32.2
registry.aliyuncs.com/google_containers/kube-scheduler:v1.32.2
registry.aliyuncs.com/google_containers/kube-proxy:v1.32.2
registry.aliyuncs.com/google_containers/pause:3.9
registry.aliyuncs.com/google_containers/etcd:3.5.12-0
registry.aliyuncs.com/google_containers/coredns/coredns:v1.11.1# 拉取镜像
[root@localhost ~]# for img in \
"kube-apiserver:v1.32.2" \
"kube-controller-manager:v1.32.2" \
"kube-scheduler:v1.32.2" \
"kube-proxy:v1.32.2" \
"pause:3.10" \
"etcd:3.5.12-0" \
"coredns/coredns:v1.11.1"; do \
docker pull "registry.aliyuncs.com/google_containers/${img}"; \
done# 修改 tag ,必须是 192.168.248.40:5000
[root@localhost ~]# for img in \
"kube-apiserver:v1.32.2" \
"kube-controller-manager:v1.32.2" \
"kube-scheduler:v1.32.2" \
"kube-proxy:v1.32.2" \
"pause:3.10" \
"etcd:3.5.12-0" \
"coredns/coredns:v1.11.1"; do \
docker tag "registry.aliyuncs.com/google_containers/${img}" "192.168.248.40:5000/google_containers/$(basename ${img})"; \
done# save 打包镜像,必须是 192.168.248.40:5000
[root@localhost ~]# for img in \
"kube-apiserver:v1.32.2" \
"kube-controller-manager:v1.32.2" \
"kube-scheduler:v1.32.2" \
"kube-proxy:v1.32.2" \
"pause:3.10" \
"etcd:3.5.12-0" \
"coredns:v1.11.1"; do \
docker save "192.168.248.40:5000/google_containers/${img}" | gzip > "kube-$(echo ${img} | cut -d':' -f1)-$(echo ${img} | cut -d':' -f2).tar.gz"; \
done准备所对应版本的 calico 镜像
# 镜像列表
docker.io/calico/cni:v3.29.0
docker.io/calico/node:v3.29.0
docker.io/calico/kube-controllers:v3.29.0# 拉取镜像
[root@localhost ~]# for img in \
"cni:v3.29.0" \
"node:v3.29.0" \
"kube-controllers:v3.29.0"; do \
docker pull "docker.io/calico/${img}"; \
done# 修改 tag ,必须是 192.168.248.40:5000
[root@localhost ~]# for img in \
"cni:v3.29.0" \
"node:v3.29.0" \
"kube-controllers:v3.29.0"; do \
docker tag "docker.io/calico/${img}" "192.168.248.40:5000/calico/$(basename ${img})"; \
done# save 打包镜像,必须是 192.168.248.40:5000
[root@localhost ~]# for img in \
"cni:v3.29.0" \
"node:v3.29.0" \
"kube-controllers:v3.29.0"; do \
docker save "192.168.248.40:5000/calico/${img}" | gzip > "calico-$(echo ${img} | cut -d':' -f1)-$(echo ${img} | cut -d':' -f2).tar.gz"; \
done3.4 整合离线文件
将外网机器中准备好的
RPM 文件、docker 镜像文件、kubeadm 二进制命令上传到到离线环境中.........过程略去
# 先将 ansible 目录中的 mirrors.tar.gz 解压
[root@k8s-master1 Centos7-ansible-k8s-containerd-2025-04-14]# ls roles/init/files/
ansible ipvs.modules mirrors.tar.gz nginx-all
[root@k8s-master1 Centos7-ansible-k8s-containerd-2025-04-14]# tar xvf mirrors.tar.gz
# 解压出来如下
[root@k8s-master1 files]# ls
ansible docker-ce-20.10.9 ipvs.modules kubernetes-1.23.0 kubernetes-1.28.2 local-rpm mirrors.tar.gz nginx-all
# 整合 RPM 到 ansible 中
$ cp -ra ./kubernetes-1.32.2 roles/init/files/
[root@k8s-master1 files]# ls
ansible docker-ce-20.10.9 ipvs.modules kubernetes-1.23.0 kubernetes-1.28.2 kubernetes-1.32.2 local-rpm mirrors.tar.gz nginx-all
# 重新压缩
[root@k8s-master1 files]# tar jcvf mirrors.tar.gz docker-ce-20.10.9 kubernetes-1.23.0 kubernetes-1.28.2 kubernetes-1.32.2 local-rpm
# 删除无用包
[root@k8s-master1 files]# rm -rf docker-ce-20.10.9 kubernetes-1.23.0 kubernetes-1.28.2 kubernetes-1.32.2 local-rpm# 修改 repo 文件
[root@k8s-master1 Centos7-ansible-k8s-containerd-2025-04-14]# cd roles/init/templates/
[root@k8s-master1 templates]# vim CentOS-local.repo.j2
[base]
name=base
baseurl=http://{{ registry_address }}:6789/local-rpm
gpgcheck=0
enabled=1
[docker-ce-20.10.9]
name=docker-ce-20.10.9
baseurl=http://{{ registry_address }}:6789/docker-ce-20.10.9
gpgcheck=0
enabled=1
[kubernetes-1.23.0]
name=kubernetes-1.23.0
baseurl=http://{{ registry_address }}:6789/kubernetes-1.23.0
gpgcheck=0
enabled=1
[kubernetes-1.28.2]
name=kubernetes-1.28.2
baseurl=http://{{ registry_address }}:6789/kubernetes-1.28.2
gpgcheck=0
enabled=1
[kubernetes-1.32.2]
name=kubernetes-1.32.2
baseurl=http://{{ registry_address }}:6789/kubernetes-1.32.2
gpgcheck=0
enabled=1拷贝 docker 镜像
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ cp *.gz roles/registry/files/拷贝准备好的 kubeadm
# 至于是否备份保存原有的 kubeadm 看自己
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ cp kubeadm roles/master/files/kubeadm-1.32.2
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ cp kubeadm roles/joinmaster/files/kubeadm-1.32.2
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ cp kubeadm roles/node/files/kubeadm-1.32.2
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ cp kubeadm roles/single-master/files/kubeadm-1.32.23.5 修改变量文件
离线包至此准备完毕,现在修改变量文件
# 将里面修改为对应的版本即可
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ grep '1.28.2' group_vars/all.yml
code_version: 'v1.28.2'
kube_version: '1.28.2'
k8s_version: 'v1.28.2'
[deploy@k8s-master1 Centos7-ansible-k8s-containerd-20250413]$ vim group_vars/all.yml
code_version: 'v1.32.2'
kube_version: '1.32.2'
k8s_version: 'v1.32.2'
pause_image: '11.0.1.31:5000/google_containers/pause:3.10' # 更换所对应的
k8s_calico_version_map: # 定义所对应的 1.32.2 版本使用的 calico
"1.32": "v3.29.0"
"1.28": "v3.28.0"
"1.27": "v3.27.0"
"1.26": "v3.26.0"
"1.25": "v3.25.0"
"1.24": "v3.24.0"
"1.23": "v3.24.0"
"1.22": "v3.24.0"
k8s_ingress_version_map:
"1.29": "v1.10.0"
"1.28": "v1.9.5"
"1.27": "v1.9.5"
"1.26": "v1.9.5"
"1.25": "v1.5.1"
"1.24": "v1.5.1"
"1.23": "v1.5.1"
"1.22": "v1.5.1"
default_calico_version: "v3.25.0"
default_ingress_version: "v1.5.1"九、卸载删除集群
1. 卸载 playbook
[k8s-deploy@k8s-master1 Centos7-ansible-k8s-containerd-202501031]$ cat remove-k8s.yml
---
- hosts: k8s
gather_facts: False
tags: always
tasks:
- name: 停止所有相关服务
systemd:
name: "{{ item }}"
state: stopped
loop:
- kubelet
- containerd
- docker
register: stop_result
ignore_errors: yes
failed_when:
- stop_result.failed
- "'Could not find the requested service' not in stop_result.msg"
- "'Unit not found' not in stop_result.msg"
tags: ['service-stop', 'k8s', 'containerd', 'docker']
- hosts: k8s
gather_facts: False
tags: k8s
tasks:
- name: 移除k8s集群配置
shell: |
if command -v kubeadm >/dev/null 2>&1; then
kubeadm reset -f >/dev/null 2>&1
echo "reset_completed"
else
echo "kubeadm_not_found"
fi
args:
warn: false
register: reset_result
changed_when: "'reset_completed' in reset_result.stdout"
failed_when: false
tags: ['k8s-reset']
- name: 显示重置结果
debug:
msg: "{{ 'kubeadm reset 执行成功' if 'reset_completed' in reset_result.stdout else 'kubeadm 不存在,跳过重置' }}"
tags: ['k8s-reset']
- name: 卸载 k8s 相关软件
package:
name:
- kubelet
- kubeadm
- kubectl
state: absent
ignore_errors: yes
tags: ['k8s-packages']
- name: 干掉可能存在的 kube 相关进程
shell: |
pids=$(ps -ef | grep -E 'kube|etcd' | grep -v grep | awk '{print $2}')
if [ -n "$pids" ]; then
kill -9 $pids
fi
args:
warn: false
ignore_errors: yes
tags: ['k8s-processes']
- name: 删除 kubelet 数据
file:
path: "{{ kubelet_data_dir }}"
state: absent
ignore_errors: yes
tags: ['k8s-data']
- name: 删除 k8s 配置文件
file:
path: "{{ item }}"
state: absent
loop:
- /etc/kubernetes/
- /root/.kube/config
- "{{ etcd_data }}"
- "{{ etcd_conf }}"
- /var/run/kubernetes
ignore_errors: yes
tags: ['k8s-config']
- hosts: k8s
gather_facts: False
tags: containerd
tasks:
- name: 清理 containerd 挂载点
shell: |
# 清理容器挂载点
mount | grep -E 'containerd|kubelet' | awk '{print $3}' | xargs -I {} umount -lf {} || true
# 清理 shm 挂载
mount | grep '/run/containerd' | awk '{print $3}' | xargs -I {} umount -lf {} || true
args:
warn: false
ignore_errors: yes
tags: ['containerd-mounts']
- name: 删除 containerd 相关文件
file:
path: "{{ item }}"
state: absent
loop:
- "/etc/containerd"
- "{{ containerd_data }}"
- /run/containerd
- /var/run/containerd
ignore_errors: yes
tags: ['containerd-data']
- name: 删除 containerd 二进制文件
file:
path: "/usr/bin/{{ item }}"
state: absent
loop:
- containerd
- containerd-shim
- containerd-shim-runc-v1
- containerd-shim-runc-v2
- containerd-stress
- crictl
- critest
- ctd-decoder
- ctr
- runc
ignore_errors: yes
tags: ['containerd-binaries']
- name: 删除 systemd containerd 服务
file:
path: /lib/systemd/system/containerd.service
state: absent
ignore_errors: yes
tags: ['containerd-service']
- hosts: k8s
gather_facts: False
tags: docker
tasks:
- name: 卸载 docker 相关软件
package:
name:
- docker-ce
- docker-ce-cli
- docker-compose-plugin
state: absent
ignore_errors: yes
tags: ['docker-packages']
- name: 删除 docker 数据
file:
path: "{{ docker_data_dir }}"
state: absent
ignore_errors: yes
tags: ['docker-data']
- hosts: k8s
gather_facts: False
tags: network
tasks:
- name: 清理 CNI 网络配置
file:
path: "{{ item }}"
state: absent
loop:
- /var/lib/cni
- /etc/cni/net.d
ignore_errors: yes
tags: ['cni-config']
- name: 清理 iptables 规则
shell: |
iptables -F
iptables -X
iptables -t nat -F
iptables -t nat -X
iptables -t mangle -F
iptables -t mangle -X
iptables -P FORWARD ACCEPT
args:
warn: false
ignore_errors: yes
tags: ['iptables']
- name: 清理 CNI 网络接口
shell: |
ip link | grep -E 'cni|flannel|vxlan|calico|tunl' | awk -F: '{print $2}' | xargs -I{} ip link delete {} || true
args:
warn: false
ignore_errors: yes
tags: ['network-interfaces']
- hosts: k8s
gather_facts: False
tags: system
tasks:
- name: 开启swap
replace:
path: /etc/fstab
regexp: '^#/(.*) swap (.*)'
replace: '/\1 swap \2'
ignore_errors: yes
tags: ['swap']
- name: 删除临时文件
file:
path: "{{ tmp_dir }}"
state: absent
ignore_errors: yes
tags: ['temp-files']
- name: 重新加载 systemd
command: systemctl daemon-reload
ignore_errors: yes
tags: ['systemd']
- hosts: ha
gather_facts: false
tags: ha
tasks:
- name: 停止负载均衡服务
systemd:
name: "{{ item }}"
state: stopped
loop:
- nginx
- keepalived
- haproxy
ignore_errors: yes
tags: ['ha-service-stop']
- name: 卸载高可用负载均衡服务
package:
name:
- nginx
- keepalived
- haproxy
state: absent
ignore_errors: yes
tags: ['ha-packages']
- name: 删除负载均衡配置文件
file:
path: "{{ item }}"
state: absent
loop:
- /etc/nginx
- /etc/keepalived
- /etc/haproxy
ignore_errors: yes
tags: ['ha-config']
#- hosts: k8s
# become: yes
# gather_facts: false
# tags: reboot
# tasks:
# - name: 1分钟后重启k8s服务器
# shell: "shutdown -r 1"
# tags: ['reboot']2. 执行 playbook
[k8s-deploy@k8s-master1 Centos7-ansible-k8s-containerd-202501031]$ ansible-playbook -i hosts.ini remove-k8s.yml
PLAY RECAP **************************************************************************************************************************************************************************************************************************************************
k8s-master1 : ok=22 changed=20 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
k8s-master2 : ok=22 changed=18 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
k8s-master3 : ok=22 changed=18 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
k8s-node1 : ok=19 changed=10 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
k8s-node2 : ok=19 changed=10 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0
k8s-node3 : ok=19 changed=10 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 # 只清理 Kubernetes 相关
ansible-playbook remove-k8s.yml --tags "k8s"
# 只清理 containerd
ansible-playbook remove-k8s.yml --tags "containerd"
# 只清理 docker
ansible-playbook remove-k8s.yml --tags "docker"
# 只清理网络配置
ansible-playbook remove-k8s.yml --tags "network"
# 只清理高可用组件
ansible-playbook remove-k8s.yml --tags "ha"
# 只清理系统配置
ansible-playbook remove-k8s.yml --tags "system"
# 清理所有(默认行为)
ansible-playbook remove-k8s.yml
# 组合多个标签
ansible-playbook remove-k8s.yml --tags "k8s,docker"
十、实验录屏
Centos7 录屏
Ubuntu-22.04.2 录屏