强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

SSH 服务器完全指南 / 第12章 自动化运维集成

第12章 自动化运维集成

12.1 自动化基础:无交互式 SSH

最佳方式:SSH 密钥认证

# 1. 为自动化用户生成专用密钥
ssh-keygen -t ed25519 -f ~/.ssh/id_automation -N "" -C "automation@ci-server"

# 2. 部署到所有目标服务器
for host in web{01..10}; do
    ssh-copy-id -i ~/.ssh/id_automation.pub deploy@$host
done

# 3. 使用 -i 指定密钥
ssh -i ~/.ssh/id_automation deploy@server "uptime"
# ~/.ssh/config
Host automation-*
    User deploy
    IdentityFile ~/.ssh/id_automation
    StrictHostKeyChecking accept-new
    BatchMode yes    # 不允许交互(如果密钥失败直接退出)

Host automation-web01
    HostName 10.0.0.10

Host automation-web02
    HostName 10.0.0.11

12.2 sshpass:密码自动化

使用场景

在无法使用密钥认证的临时场景下:

# 安装 sshpass
sudo apt install sshpass

# 使用 -p 参数(密码出现在命令行,有安全风险)
sshpass -p 'mypassword' ssh user@server "uptime"

# 使用 -f 参数(从文件读取密码,稍安全)
echo 'mypassword' > ~/.ssh/.pass
chmod 600 ~/.ssh/.pass
sshpass -f ~/.ssh/.pass ssh user@server "uptime"

# 使用环境变量
export SSHPASS='mypassword'
sshpass -e ssh user@server "uptime"

批量脚本

#!/bin/bash
# batch-exec.sh

SERVERS="web01 web02 web03"
COMMAND="$*"

for server in $SERVERS; do
    echo "--- $server ---"
    sshpass -f ~/.ssh/.pass ssh deploy@$server "$COMMAND"
done

sshpass 安全警告

⚠️ 安全风险: sshpass 在进程列表中可能暴露密码。仅在无法使用密钥认证的临时场景中使用。生产环境强烈推荐使用 SSH 密钥认证。


12.3 expect:自动化交互

基本用法

# 安装 expect
sudo apt install expect
#!/usr/bin/expect -f
# login.exp - 自动 SSH 登录脚本

set timeout 30
set host [lindex $argv 0]
set user [lindex $argv 1]
set password [lindex $argv 2]

spawn ssh -o StrictHostKeyChecking=no $user@$host

expect {
    "password:" {
        send "$password\r"
        expect {
            "$ " { send "uptime\r" }
            "# " { send "uptime\r" }
        }
    }
    timeout {
        puts "Connection timed out"
        exit 1
    }
}

expect "$ "
send "exit\r"
expect eof
# 使用
chmod +x login.exp
./login.exp 192.168.1.100 admin mypassword

expect 脚本模式

#!/usr/bin/expect -f
# deploy.exp - 自动部署脚本

log_file /var/log/deploy-$(date +%Y%m%d).log

set timeout 60
set server [lindex $argv 0]

spawn ssh deploy@$server

expect "passphrase"
send "mykeypassphrase\r"

expect "$ "
send "cd /opt/app && git pull\r"
expect "$ "

send "docker-compose up -d\r"
expect "$ "

send "exit\r"
expect eof

expect vs sshpass 对比

特性expectsshpass
安装复杂度
使用复杂度
灵活性高(可处理任意交互)低(仅密码)
应用场景复杂交互、SSH 密钥密码仅密码认证
推荐程度⭐⭐⭐⭐

12.4 批量管理脚本

简易并行执行器

#!/bin/bash
# parallel-exec.sh

SERVERS_FILE="servers.txt"
COMMAND="$*"
MAX_PARALLEL=10
LOG_DIR="/var/log/parallel-exec"

mkdir -p "$LOG_DIR"

echo "=== 批量执行 ==="
echo "服务器文件: $SERVERS_FILE"
echo "执行命令: $COMMAND"
echo ""

while IFS= read -r server; do
    [ -z "$server" ] && continue
    [[ "$server" =~ ^# ]] && continue
    
    (
        LOG_FILE="$LOG_DIR/$(echo $server | tr ':' '_').log"
        echo "[$server] 执行中..."
        ssh -o ConnectTimeout=10 -o BatchMode=yes "$server" "$COMMAND" > "$LOG_FILE" 2>&1
        if [ $? -eq 0 ]; then
            echo "[$server] ✅ 成功"
        else
            echo "[$server] ❌ 失败 (详见 $LOG_FILE)"
        fi
    ) &
    
    # 限制并发数
    while [ $(jobs -r | wc -l) -ge $MAX_PARALLEL ]; do
        sleep 0.5
    done
done < "$SERVERS_FILE"

wait
echo ""
echo "=== 执行完成 ==="

servers.txt 格式:

# servers.txt
# 格式: user@host:port 或 user@host
deploy@web01:22
deploy@web02:22
deploy@db01:22
# 监控服务器
monitor@mon01:22

健康检查脚本

#!/bin/bash
# health-check.sh

SERVERS="web01 web02 web03 db01 db02 cache01"
DATE=$(date '+%Y-%m-%d %H:%M:%S')

echo "=== 服务器健康检查 ($DATE) ==="
printf "%-15s %-8s %-10s %-20s %-10s\n" "Server" "Status" "Uptime" "Load" "Disk"
printf "%-15s %-8s %-10s %-20s %-10s\n" "-------" "------" "------" "----" "----"

for server in $SERVERS; do
    RESULT=$(ssh -o ConnectTimeout=5 -o BatchMode=yes deploy@$server \
        "echo UPTIME:\$(uptime -p) LOAD:\$(cat /proc/loadavg | awk '{print \$1,\$2,\$3}') DISK:\$(df -h / | tail -1 | awk '{print \$5}')" 2>/dev/null)
    
    if [ $? -eq 0 ]; then
        UPTIME=$(echo $RESULT | grep -oP 'UPTIME:\K[^ ]+ [^ ]+ [^ ]+ [^ ]+')
        LOAD=$(echo $RESULT | grep -oP 'LOAD:\K[^D]+')
        DISK=$(echo $RESULT | grep -oP 'DISK:\K.*')
        printf "%-15s %-8s %-10s %-20s %-10s\n" "$server" "✅ OK" "$UPTIME" "$LOAD" "$DISK"
    else
        printf "%-15s %-8s %-10s %-20s %-10s\n" "$server" "❌ FAIL" "-" "-" "-"
    fi
done

日志收集脚本

#!/bin/bash
# collect-logs.sh

SERVERS="web01 web02 web03"
DATE=$(date +%Y%m%d)
LOG_DIR="./collected_logs/$DATE"

mkdir -p "$LOG_DIR"

for server in $SERVERS; do
    echo "收集 $server 日志..."
    
    mkdir -p "$LOG_DIR/$server"
    
    # 收集 syslog
    scp deploy@$server:/var/log/syslog "$LOG_DIR/$server/" 2>/dev/null
    
    # 收集应用日志
    scp deploy@$server:/var/log/app/*.log "$LOG_DIR/$server/" 2>/dev/null
    
    # 收集最近 1000 行日志
    ssh deploy@$server "tail -1000 /var/log/auth.log" > "$LOG_DIR/$server/auth.log" 2>/dev/null
    
    echo "  ✅ $server 完成"
done

echo ""
echo "日志已收集到 $LOG_DIR"

12.5 Ansible 集成

Ansible SSH 配置

# ansible.cfg
[defaults]
inventory = ./inventory
remote_user = deploy
private_key_file = ~/.ssh/id_ansible
host_key_checking = False
timeout = 30
forks = 20

[ssh_connection]
ssh_args = -o ControlMaster=auto -o ControlPersist=60s -o StrictHostKeyChecking=no
pipelining = True
control_path_dir = ~/.ssh/ansible-cp

Inventory 配置

# inventory/hosts
[webservers]
web01 ansible_host=10.0.0.10
web02 ansible_host=10.0.0.11
web03 ansible_host=10.0.0.12

[dbservers]
db01 ansible_host=10.0.0.20 ansible_port=5432
db02 ansible_host=10.0.0.21

[all:vars]
ansible_user=deploy
ansible_ssh_private_key_file=~/.ssh/id_ansible
ansible_python_interpreter=/usr/bin/python3

# 通过跳板机
[internal:vars]
ansible_ssh_common_args='-o ProxyJump=jumpuser@bastion.example.com'

基本 Ansible 命令

# 测试连接
ansible all -m ping

# 执行命令
ansible webservers -m shell -a "uptime"

# 复制文件
ansible webservers -m copy -a "src=./app.conf dest=/etc/app/app.conf mode=0644"

# 服务管理
ansible webservers -m systemd -a "name=nginx state=restarted"

# 获取系统信息
ansible all -m setup -a "filter=ansible_distribution*"

Ansible Playbook 示例

# deploy.yml
---
- name: Deploy Application
  hosts: webservers
  become: true
  vars:
    app_version: "1.2.3"
    app_dir: /opt/app
  
  tasks:
    - name: Update application code
      git:
        repo: git@github.com:company/app.git
        dest: "{{ app_dir }}"
        version: "v{{ app_version }}"
      notify: restart app

    - name: Install dependencies
      apt:
        name:
          - python3
          - python3-pip
        state: present

    - name: Copy configuration
      template:
        src: app.conf.j2
        dest: /etc/app/app.conf
        mode: '0644'
      notify: restart app

    - name: Ensure service is running
      systemd:
        name: app
        state: started
        enabled: yes

  handlers:
    - name: restart app
      systemd:
        name: app
        state: restarted

Ansible 密钥管理

# key-management.yml
---
- name: Manage SSH Keys
  hosts: all
  become: true
  
  tasks:
    - name: Deploy admin keys
      authorized_key:
        user: "{{ item.user }}"
        key: "{{ lookup('file', item.key) }}"
        state: present
      loop:
        - { user: 'deploy', key: '~/.ssh/id_ansible.pub' }
        - { user: 'deploy', key: '~/.ssh/id_admin.pub' }
      when: ansible_user_id == 'root'

    - name: Remove old keys
      authorized_key:
        user: deploy
        key: "{{ lookup('file', 'keys/old_key.pub') }}"
        state: absent

    - name: Rotate keys
      user:
        name: deploy
        generate_ssh_key: yes
        ssh_key_type: ed25519
        ssh_key_file: .ssh/id_ed25519
        ssh_key_passphrase: "{{ vault_key_passphrase }}"
      when: rotate_keys | default(false)

Ansible Vault 安全存储

# 创建加密变量文件
ansible-vault create secrets.yml

# 编辑加密文件
ansible-vault edit secrets.yml

# 运行 playbook 时提供密码
ansible-playbook deploy.yml --ask-vault-pass

# 使用密码文件
ansible-playbook deploy.yml --vault-password-file ~/.vault_pass
# secrets.yml(加密存储)
ansible_ssh_pass: "{{ vault_ssh_pass }}"
ansible_become_pass: "{{ vault_sudo_pass }}"
api_key: "{{ vault_api_key }}"

12.6 SSH 在 CI/CD 中的使用

GitHub Actions

# .github/workflows/deploy.yml
name: Deploy
on:
  push:
    branches: [main]

jobs:
  deploy:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
      
      - name: Setup SSH
        uses: webfactory/ssh-agent@v0.7.0
        with:
          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
      
      - name: Add known hosts
        run: |
          mkdir -p ~/.ssh
          ssh-keyscan -H ${{ secrets.SERVER_HOST }} >> ~/.ssh/known_hosts
      
      - name: Deploy
        run: |
          ssh deploy@${{ secrets.SERVER_HOST }} 'cd /opt/app && git pull && docker-compose up -d'

GitLab CI

# .gitlab-ci.yml
deploy:
  stage: deploy
  image: alpine:latest
  before_script:
    - apk add openssh-client
    - eval $(ssh-agent -s)
    - echo "$SSH_PRIVATE_KEY" | ssh-add -
    - mkdir -p ~/.ssh
    - echo "$SSH_KNOWN_HOSTS" > ~/.ssh/known_hosts
  script:
    - ssh deploy@$SERVER_HOST 'cd /opt/app && git pull && systemctl restart app'
  only:
    - main

Jenkins Pipeline

// Jenkinsfile
pipeline {
    agent any
    
    environment {
        SSH_CREDS = credentials('ssh-deploy-key')
    }
    
    stages {
        stage('Deploy') {
            steps {
                sshagent(credentials: ['ssh-deploy-key']) {
                    sh '''
                        ssh -o StrictHostKeyChecking=no deploy@server \
                            "cd /opt/app && git pull && docker-compose up -d"
                    '''
                }
            }
        }
    }
}

12.7 使用场景

场景一:批量更新服务器

#!/bin/bash
# batch-update.sh

SERVERS=$(cat servers.txt)
UPDATE_CMD="sudo apt update && sudo apt upgrade -y && sudo reboot"

for server in $SERVERS; do
    echo "Updating $server..."
    ssh -o BatchMode=yes deploy@$server "$UPDATE_CMD" &
done

wait
echo "All servers updated. Waiting for reboot..."
sleep 60

for server in $SERVERS; do
    echo "Checking $server..."
    ssh -o BatchMode=yes -o ConnectTimeout=10 deploy@$server "uptime" || echo "$server not yet available"
done

场景二:紧急安全补丁

#!/bin/bash
# emergency-patch.sh

SERVERS=$(cat servers.txt)
PATCH="sudo apt update && sudo apt install -y --only-upgrade openssh-server"

echo "=== 紧急安全补丁部署 ==="
echo "目标: $(wc -l < servers.txt) 台服务器"
echo ""

for server in $SERVERS; do
    (
        RESULT=$(ssh -o BatchMode=yes deploy@$server "$PATCH" 2>&1)
        if [ $? -eq 0 ]; then
            echo "[$server] ✅ 补丁已安装"
        else
            echo "[$server] ❌ 失败: $RESULT"
        fi
    ) &
    
    while [ $(jobs -r | wc -l) -ge 10 ]; do
        sleep 0.5
    done
done

wait
echo ""
echo "=== 补丁部署完成 ==="

扩展阅读


下一章: 第13章 Docker 中的 SSH → 学习 Docker 容器中的 SSH 使用和安全考量。