AWK & SED 生产力教程 / 第 12 章:报告生成
第 12 章:报告生成
一份好的报告,能让数据说话。AWK 和 SED 是生成结构化报告的利器。
12.1 文本报告基础
表格格式化
cat > data.txt << 'EOF'
Alice Engineering 15000 2000
Bob Marketing 12000 1500
Carol Engineering 16000 2500
Dave Sales 11000 1000
Eve Engineering 14000 1800
EOF
# 简单表格
$ awk '{
printf "%-12s %-15s %10d %10d\n", $1, $2, $3, $4
}' data.txt
# 带表头和分隔线
$ awk '
BEGIN {
printf "%-12s %-15s %10s %10s\n", "姓名", "部门", "薪资", "奖金"
printf "%-12s %-15s %10s %10s\n", "----------", "-------------", "----------", "----------"
}
{
printf "%-12s %-15s %10d %10d\n", $1, $2, $3, $4
}' data.txt
Unicode 表格
$ awk '
BEGIN {
print "┌────────────┬───────────────┬──────────┬──────────┐"
printf "│ %-10s │ %-13s │ %8s │ %8s │\n", "姓名", "部门", "薪资", "奖金"
print "├────────────┼───────────────┼──────────┼──────────┤"
}
{
printf "│ %-10s │ %-13s │ %8d │ %8d │\n", $1, $2, $3, $4
}
END {
print "└────────────┴───────────────┴──────────┴──────────┘"
}' data.txt
汇总统计表
$ awk '
{
dept_count[$2]++
dept_salary[$2] += $3
dept_bonus[$2] += $4
}
END {
printf "%-15s %6s %12s %12s %12s\n", "部门", "人数", "总薪资", "总奖金", "平均薪资"
printf "%-15s %6s %12s %12s %12s\n", "===============", "======", "============", "============", "============"
total_count = 0
total_salary = 0
total_bonus = 0
for (d in dept_count) {
printf "%-15s %6d %12d %12d %12.0f\n", d, dept_count[d], dept_salary[d], dept_bonus[d], dept_salary[d]/dept_count[d]
total_count += dept_count[d]
total_salary += dept_salary[d]
total_bonus += dept_bonus[d]
}
printf "%-15s %6s %12s %12s %12s\n", "===============", "======", "============", "============", "============"
printf "%-15s %6d %12d %12d %12.0f\n", "合计", total_count, total_salary, total_bonus, total_salary/total_count
}' data.txt
12.2 CSV 报告生成
生成 CSV 文件
# 将数据转换为 CSV
$ awk '{
printf "%s,%s,%d,%d,%d\n", $1, $2, $3, $4, $3+$4
}' data.txt > report.csv
# 带表头
$ awk 'BEGIN {
print "姓名,部门,薪资,奖金,总收入"
}
{
printf "%s,%s,%d,%d,%d\n", $1, $2, $3, $4, $3+$4
}' data.txt > report.csv
从现有数据生成 CSV
# 从 df 输出生成 CSV
$ df -h | awk 'NR>1 {
gsub(/%/, "", $5)
printf "%s,%s,%s,%s,%s,%s\n", $1, $2, $3, $4, $5, $6
}' > disk_report.csv
# 从 ps 输出生成 CSV
$ ps aux | awk 'NR>1 {
printf "%s,%s,%s,%s,%s\n", $1, $2, $3, $4, $11
}' > process_report.csv
CSV 格式化
# 添加 CSV 表头
$ echo "Name,Department,Salary" | cat - data.csv > report_with_header.csv
# CSV 列对齐显示
$ column -t -s, report.csv
# 排序 CSV(按第 3 列数值降序)
$ head -1 report.csv > sorted_report.csv
$ tail -n +2 report.csv | sort -t, -k3 -rn >> sorted_report.csv
12.3 HTML 报告生成
基本 HTML 表格
$ awk '
BEGIN {
print "<!DOCTYPE html>"
print "<html>"
print "<head>"
print "<style>"
print " body { font-family: Arial, sans-serif; margin: 20px; }"
print " table { border-collapse: collapse; width: 80%; margin: 20px auto; }"
print " th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }"
print " th { background-color: #4CAF50; color: white; }"
print " tr:nth-child(even) { background-color: #f2f2f2; }"
print " .right { text-align: right; }"
print "</style>"
print "</head>"
print "<body>"
print "<h1 style=\"text-align:center\">员工薪资报告</h1>"
print "<table>"
print "<tr><th>姓名</th><th>部门</th><th class=\"right\">薪资</th><th class=\"right\">奖金</th><th class=\"right\">总收入</th></tr>"
}
{
printf "<tr><td>%s</td><td>%s</td><td class=\"right\">%d</td><td class=\"right\">%d</td><td class=\"right\">%d</td></tr>\n", $1, $2, $3, $4, $3+$4
}
END {
print "</table>"
print "<p style=\"text-align:center; color: #666;\">生成时间: " strftime("%Y-%m-%d %H:%M:%S") "</p>"
print "</body>"
print "</html>"
}' data.txt > report.html
带统计的 HTML 报告
cat > generate_report.awk << 'EOF'
BEGIN {
print "<!DOCTYPE html>"
print "<html><head>"
print "<meta charset=\"UTF-8\">"
print "<style>"
print " body { font-family: 'Segoe UI', Arial, sans-serif; margin: 40px; background: #f5f5f5; }"
print " .container { max-width: 900px; margin: 0 auto; background: white; padding: 30px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }"
print " h1 { color: #2c3e50; text-align: center; }"
print " table { width: 100%; border-collapse: collapse; margin: 20px 0; }"
print " th { background: #3498db; color: white; padding: 12px; }"
print " td { padding: 10px; border-bottom: 1px solid #ddd; }"
print " tr:hover { background: #f5f5f5; }"
print " .summary { display: flex; justify-content: space-around; margin: 20px 0; }"
print " .stat-box { text-align: center; padding: 20px; background: #ecf0f1; border-radius: 8px; }"
print " .stat-number { font-size: 24px; font-weight: bold; color: #2c3e50; }"
print " .stat-label { color: #7f8c8d; }"
print "</style></head><body>"
print "<div class=\"container\">"
print "<h1>📊 数据分析报告</h1>"
}
{
data[NR] = $0
dept_count[$2]++
dept_salary[$2] += $3
total_salary += $3
total_count++
}
END {
# 统计卡片
printf "<div class=\"summary\">"
printf "<div class=\"stat-box\"><div class=\"stat-number\">%d</div><div class=\"stat-label\">总人数</div></div>", total_count
printf "<div class=\"stat-box\"><div class=\"stat-number\">%d</div><div class=\"stat-label\">总薪资</div></div>", total_salary
printf "<div class=\"stat-box\"><div class=\"stat-number\">%.0f</div><div class=\"stat-label\">平均薪资</div></div>", total_salary/total_count
printf "<div class=\"stat-box\"><div class=\"stat-number\">%d</div><div class=\"stat-label\">部门数</div></div>", length(dept_count)
printf "</div>"
# 详细表格
print "<table>"
print "<tr><th>姓名</th><th>部门</th><th>薪资</th><th>奖金</th><th>总收入</th></tr>"
for (i=1; i<=NR; i++) {
split(data[i], a, " ")
printf "<tr><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>\n", a[1], a[2], a[3], a[4], a[3]+a[4]
}
print "</table>"
# 部门统计
print "<h2>部门统计</h2>"
print "<table>"
print "<tr><th>部门</th><th>人数</th><th>平均薪资</th></tr>"
for (d in dept_count)
printf "<tr><td>%s</td><td>%d</td><td>%.0f</td></tr>\n", d, dept_count[d], dept_salary[d]/dept_count[d]
print "</table>"
print "</div></body></html>"
}
EOF
$ awk -f generate_report.awk data.txt > report.html
12.4 Markdown 报告
$ awk '
BEGIN {
print "# 员工薪资报告"
print ""
print "生成时间: " strftime("%Y-%m-%d %H:%M:%S")
print ""
print "## 详细数据"
print ""
printf "| %-10s | %-13s | %8s | %8s | %8s |\n", "姓名", "部门", "薪资", "奖金", "总收入"
printf "|%-11s|%-14s|%-9s|%-9s|%-9s|\n", "-----------", "--------------", "---------", "---------", "---------"
}
{
printf "| %-10s | %-13s | %8d | %8d | %8d |\n", $1, $2, $3, $4, $3+$4
dept_salary[$2] += $3
dept_count[$2]++
}
END {
print ""
print "## 部门统计"
print ""
printf "| %-13s | %6s | %12s |\n", "部门", "人数", "平均薪资"
printf "|%-14s|%-7s|%-13s|\n", "--------------", "-------", "-------------"
for (d in dept_count)
printf "| %-13s | %6d | %12.0f |\n", d, dept_count[d], dept_salary[d]/dept_count[d]
}' data.txt > report.md
12.5 ASCII 图表
柱状图
$ awk '
{
dept_salary[$2] += $3
dept_count[$2]++
}
END {
for (d in dept_count) {
avg = dept_salary[d] / dept_count[d]
bar_len = int(avg / 1000)
printf "%-13s |", d
for (i=0; i<bar_len; i++) printf "█"
printf " %10.0f\n", avg
}
}' data.txt
带刻度的柱状图
$ awk '
{
category[$2] += $3
}
END {
# 找出最大值
max_val = 0
for (c in category) {
if (category[c] > max_val) max_val = category[c]
}
# 绘制图表
bar_width = 40
for (c in category) {
bar_len = int(category[c] / max_val * bar_width)
printf "%-15s │", c
for (i=0; i<bar_len; i++) printf "█"
for (i=bar_len; i<bar_width; i++) printf "░"
printf("│ %10d\n", category[c])
}
# 刻度线
printf "%-15s └", ""
for (i=0; i<bar_width; i++) printf "─"
printf "┘\n"
printf "%-15s %10d", "", 0
printf "%*d\n", bar_width-10, max_val
}' data.txt
饼图(文本版)
$ awk '
{
dept_salary[$2] += $3
total += $3
}
END {
printf "部门薪资占比:\n\n"
for (d in dept_salary) {
pct = dept_salary[d] / total * 100
bar_len = int(pct / 2)
printf "%-13s %5.1f%% ", d, pct
for (i=0; i<bar_len; i++) printf "■"
printf "\n"
}
}' data.txt
12.6 报告邮件发送
#!/bin/bash
# send_report.sh — 生成并发送报告
REPORT_FILE="report_$(date +%Y%m%d).html"
# 生成报告
awk -f generate_report.awk data.txt > "$REPORT_FILE"
# 发送邮件(需要安装 mailx 或 sendmail)
if command -v mailx &>/dev/null; then
mailx -s "数据报告 $(date +%Y-%m-%d)" \
-a "Content-Type: text/html" \
admin@example.com < "$REPORT_FILE"
echo "报告已发送至 admin@example.com"
else
echo "请安装 mailx 以发送邮件"
echo "报告已保存至: $REPORT_FILE"
fi
12.7 综合实战
🏢 场景:月度运营报告
#!/bin/bash
# monthly_report.sh — 生成月度运营报告
MONTH=$(date -d "last month" +%Y-%m 2>/dev/null || date -v-1m +%Y-%m)
LOG_DIR="/var/log/nginx"
REPORT_DIR="/var/reports"
mkdir -p "$REPORT_DIR"
REPORT_FILE="${REPORT_DIR}/monthly_${MONTH}.html"
{
cat << 'HEADER'
<!DOCTYPE html>
<html><head>
<meta charset="UTF-8">
<style>
body { font-family: Arial, sans-serif; margin: 40px; }
.card { background: #f8f9fa; border-radius: 8px; padding: 20px; margin: 10px 0; }
h1 { color: #333; }
table { width: 100%; border-collapse: collapse; }
th, td { padding: 8px; border-bottom: 1px solid #ddd; }
th { background: #007bff; color: white; }
</style>
</head><body>
<h1>📊 月度运营报告</h1>
HEADER
echo "<h2>${MONTH} 数据概览</h2>"
echo "<div class='card'>"
# 从日志中提取统计数据
cat "${LOG_DIR}"/access.log* 2>/dev/null | awk -v month="$MONTH" '
{
split($4, t, ":")
date = substr(t[1], 2, 11)
if (date ~ month) {
total++
bytes += $10
if ($9 >= 400) errors++
ips[$1]++
paths[$7]++
}
}
END {
printf "<p>总请求数: <strong>%d</strong></p>\n", total
printf "<p>独立访客: <strong>%d</strong></p>\n", length(ips)
printf "<p>错误请求: <strong>%d</strong></p>\n", errors
printf "<p>总流量: <strong>%.2f GB</strong></p>\n", bytes/1073741824
print "</div>"
print "<h2>热门页面</h2>"
print "<table><tr><th>页面</th><th>访问次数</th></tr>"
for (p in paths) printf "<tr><td>%s</td><td>%d</td></tr>\n", p, paths[p]
print "</table>"
}'
echo "</body></html>"
} > "$REPORT_FILE"
echo "报告已生成: ${REPORT_FILE}"
12.8 报告生成速查
# 文本表格
awk '{printf "%-20s %10d\n", $1, $2}' data.txt
# CSV
awk '{printf "%s,%s,%d\n", $1, $2, $3}' data.txt > report.csv
# HTML 表格
awk 'BEGIN{print "<table>"} {printf "<tr><td>%s</td><td>%d</td></tr>\n", $1, $2} END{print "</table>"}' data.txt
# Markdown 表格
awk 'BEGIN{printf "| Name | Value |\n|------|-------|\n"} {printf "| %s | %d |\n", $1, $2}' data.txt
# ASCII 柱状图
awk '{printf "%-15s ", $1; for(i=0;i<$2/10;i++) printf "█"; printf "\n"}' data.txt
扩展阅读
下一章:第 13 章:脚本编写 — 错误处理、调试、可维护性、模块化。