强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

Nginx 从入门到精通 / 09 - 日志与监控 / Logging & Monitoring

日志与监控 / Logging & Monitoring

🟢 基础 / Basics — 日志配置

access_log 和 error_log

http {
    # 访问日志
    access_log /var/log/nginx/access.log;

    # 错误日志(级别:debug, info, notice, warn, error, crit, alert, emerg)
    error_log /var/log/nginx/error.log warn;

    server {
        listen 80;
        server_name example.com;

        # 可以为特定 location 关闭日志
        location /health {
            access_log off;
            return 200 "OK";
        }

        # 可以为特定 server 使用不同日志文件
        access_log /var/log/nginx/example.com.log;
    }
}

日志级别

debug    → 最详细(需要 --with-debug 编译)
info     → 一般信息
notice   → 正常但值得注意
warn     → 警告
error    → 错误(默认)
crit     → 严重错误
alert    → 需要立即处理
emerg    → 系统不可用
# 生产环境推荐
error_log /var/log/nginx/error.log warn;

# 调试时临时开启
error_log /var/log/nginx/error.log debug;
# 注意:debug 日志量极大,用完记得关闭

自定义日志格式

http {
    # 默认格式(combined)
    # log_format combined '$remote_addr - $remote_user [$time_local] '
    #                     '"$request" $status $body_bytes_sent '
    #                     '"$http_referer" "$http_user_agent"';

    # 自定义格式:包含更多有用信息
    log_format main '$remote_addr - $remote_user [$time_local] '
                    '"$request" $status $body_bytes_sent '
                    '"$http_referer" "$http_user_agent" '
                    'rt=$request_time '              # 请求总耗时
                    'urt=$upstream_response_time '   # 后端响应时间
                    'uct=$upstream_connect_time '    # 后端连接时间
                    'uaddr=$upstream_addr '          # 后端地址
                    'cache=$upstream_cache_status '  # 缓存状态
                    'host=$host '
                    'proto=$scheme';

    # JSON 格式(便于 ELK/Loki 解析)
    log_format json_log escape=json
        '{'
            '"time":"$time_iso8601",'
            '"remote_addr":"$remote_addr",'
            '"request":"$request",'
            '"status":$status,'
            '"body_bytes_sent":$body_bytes_sent,'
            '"request_time":$request_time,'
            '"upstream_response_time":"$upstream_response_time",'
            '"http_referer":"$http_referer",'
            '"http_user_agent":"$http_user_agent",'
            '"upstream_cache_status":"$upstream_cache_status"'
        '}';

    access_log /var/log/nginx/access.log main;
    # 或
    # access_log /var/log/nginx/access.json json_log;
}

🟡 进阶 / Intermediate — 日志管理与分析

日志轮转 / Log Rotation

# /etc/logrotate.d/nginx
/var/log/nginx/*.log {
    daily               # 每天轮转
    missingok           # 日志不存在不报错
    rotate 30           # 保留 30 天
    compress            # 压缩旧日志
    delaycompress       # 延迟一天压缩
    notifempty          # 空日志不轮转
    create 0640 www-data adm
    sharedscripts       # 多个日志文件只执行一次脚本
    postrotate
        # 通知 Nginx 重新打开日志文件
        [ -f /var/run/nginx.pid ] && kill -USR1 $(cat /var/run/nginx.pid)
    endscript
}
# 手动触发日志切割
sudo logrotate -f /etc/logrotate.d/nginx

# 不用 logrotate 时,手动切割:
sudo mv /var/log/nginx/access.log /var/log/nginx/access.log.$(date +%Y%m%d)
sudo kill -USR1 $(cat /var/run/nginx.pid)   # Nginx 重新打开日志文件

条件日志

# 只记录非健康检查的请求
map $request_uri $loggable {
    /health   0;
    /ping     0;
    default   1;
}

server {
    access_log /var/log/nginx/access.log main if=$loggable;
}

# 只记录错误请求
map $status $log_error {
    ~^[23]  0;    # 2xx, 3xx 不记录
    default 1;    # 其他都记录
}

access_log /var/log/nginx/error-requests.log main if=$log_error;

# 只记录慢请求
map $request_time $log_slow {
    default 0;
    "~^[1-9]"  1;   # 请求时间 >= 1 秒
}

access_log /var/log/nginx/slow-requests.log main if=$log_slow;

stub_status(内置状态页)

server {
    listen 80;
    server_name localhost;

    location /nginx_status {
        stub_status on;
        allow 127.0.0.1;      # 只允许本机访问
        allow 10.0.0.0/8;     # 允许内网
        deny all;
    }
}
输出示例:
Active connections: 291
server accepts handled requests
 16630948 16630948 31070465
Reading: 6 Writing: 179 Waiting: 106

解读:
- Active connections: 当前活跃连接数
- accepts: 已接受的总连接数
- handled: 已处理的总连接数(等于 accepts 表示没有丢弃)
- requests: 已处理的总请求数(一个连接可以处理多个请求)
- Reading: 正在读取请求头的连接数
- Writing: 正在发送响应的连接数
- Waiting: Keep-alive 等待中的连接数

日志分析实战

# TOP 10 访问 IP
awk '{print $1}' /var/log/nginx/access.log | sort | uniq -c | sort -rn | head -10

# TOP 10 请求路径
awk '{print $7}' /var/log/nginx/access.log | sort | uniq -c | sort -rn | head -10

# 统计 HTTP 状态码分布
awk '{print $9}' /var/log/nginx/access.log | sort | uniq -c | sort -rn

# 统计慢请求(响应时间 > 1s)
awk -F'rt=' '{print $2}' /var/log/nginx/access.log | awk '{if ($1 > 1.0) print}' | wc -l

# 统计每小时请求量
awk '{print $4}' /var/log/nginx/access.log | cut -d: -f1-2 | uniq -c

# 统计带宽使用
awk '{sum += $10} END {printf "%.2f GB\n", sum/1024/1024/1024}' /var/log/nginx/access.log

🔴 高级 / Advanced — 监控与告警

Prometheus + Grafana 监控

# 使用 nginx-prometheus-exporter
# 1. 启用 stub_status
location /nginx_status {
    stub_status on;
    allow 127.0.0.1;
    deny all;
}
# docker-compose.yml(Prometheus + Grafana + Nginx Exporter)
version: "3.8"
services:
  nginx-exporter:
    image: nginx/nginx-prometheus-exporter:latest
    command: ["-nginx.sc-uri", "http://host.docker.internal:8080/nginx_status"]
    ports:
      - "9113:9113"

  prometheus:
    image: prom/prometheus:latest
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
    ports:
      - "9090:9090"

  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
# prometheus.yml
scrape_configs:
  - job_name: "nginx"
    static_configs:
      - targets: ["nginx-exporter:9113"]

Nginx Plus 内置 API 监控

# Nginx Plus 专用(商业版)
server {
    listen 8080;
    location /api/ {
        api write=on;
        # 返回 JSON 格式的实时状态数据
    }
    location /dashboard.html {
        root /usr/share/nginx/html;
    }
}

实时日志流

# 实时查看访问日志
tail -f /var/log/nginx/access.log

# 实时统计请求速率
tail -f /var/log/nginx/access.log | pv -l -i 2 -r > /dev/null

# 实时查看错误日志
tail -f /var/log/nginx/error.log | grep --line-buffered "error\|warn"

# GoAccess 实时分析(终端仪表盘)
goaccess /var/log/nginx/access.log --log-format=COMBINED -o /var/www/html/report.html
# 或实时模式
goaccess /var/log/nginx/access.log --log-format=COMBINED -c

ELK / Loki 集成

# 输出 JSON 日志供 ELK/Loki 解析
log_format json_log escape=json
    '{'
        '"@timestamp":"$time_iso8601",'
        '"server":"$server_name",'
        '"remote_addr":"$remote_addr",'
        '"request":"$request",'
        '"method":"$request_method",'
        '"uri":"$uri",'
        '"args":"$args",'
        '"status":$status,'
        '"bytes":$body_bytes_sent,'
        '"request_time":$request_time,'
        '"upstream_time":"$upstream_response_time",'
        '"upstream_addr":"$upstream_addr",'
        '"cache":"$upstream_cache_status",'
        '"referer":"$http_referer",'
        '"user_agent":"$http_user_agent"'
    '}';

access_log /var/log/nginx/access.json json_log;
# Loki + Promtail 配置片段
scrape_configs:
  - job_name: nginx
    static_configs:
      - targets: [localhost]
        labels:
          job: nginx
          __path__: /var/log/nginx/*.log
    pipeline_stages:
      - json:
          expressions:
            status: status
            request_time: request_time
      - labels:
          status:

小结 / Summary

层级你需要知道的 / What You Need to Know
🟢 基础access_logerror_log,自定义 log_format
🟡 进阶日志轮转,条件日志,stub_status,日志分析命令
🔴 高级Prometheus 监控,JSON 日志,ELK/Loki 集成,GoAccess

下一章:安全加固 / Security Hardening