OpenResty 高性能网关开发教程 / 第 09 章 - 缓存策略

第 09 章 - 缓存策略

9.1 缓存架构总览

客户端 → CDN → 网关缓存 → 应用缓存 → 数据库
           │        │           │
         命中率高  命中率中    命中率高
         毫秒级    毫秒级      毫秒级

缓存层次

层次	位置	存储	延迟	容量
L1	进程内	Lua table / LRU	微秒	小 (MB)
L2	共享内存	ngx.shared.DICT	微秒	中 (百 MB)
L3	Redis	Redis	毫秒	大 (GB)
L4	Nginx 代理缓存	磁盘/内存	毫秒	大

9.2 共享内存缓存（ngx.shared.DICT）

共享内存是 OpenResty 中最常用的缓存方式，多个 Worker 进程共享。

9.2.1 配置共享内存

http {
    # 定义共享内存区域
    lua_shared_dict cache_data    50m;   # 通用数据缓存
    lua_shared_dict cache_counter 10m;   # 计数器
    lua_shared_dict rate_limit    10m;   # 限流
    lua_shared_dict jwt_cache     5m;    # JWT 缓存

    # 注意：共享内存大小在配置后不能动态调整
    # 需要根据预期的 key 数量和大小合理分配
}

9.2.2 基本操作

local cache = ngx.shared.cache_data

-- 写入（带过期时间）
cache:set("user:1001", '{"name":"Alice","age":30}', 300)  -- 300 秒过期
cache:set("counter:api", 0)

-- 读取
local value, flags = cache:get("user:1001")
if value then
    ngx.say(value)
else
    ngx.say("cache miss")
end

-- 仅当 key 不存在时写入（用于并发控制）
local ok, err = cache:add("lock:resource1", "locked", 10)
if ok then
    -- 获得锁
    -- ... 执行操作 ...
    cache:delete("lock:resource1")
else
    ngx.say("Resource locked, retry later")
end

-- 原子递增
local new_val, err = cache:incr("counter:api", 1, 0)
-- 参数：key, 增量, 初始值, 过期时间

-- 设置过期时间
cache:expire("user:1001", 600)

-- 删除
cache:delete("user:1001")

-- 清除过期 key
cache:flush_expired(100)  -- 最多清除 100 个

-- 获取使用情况
local capacity = cache:capacity()     -- 总容量（字节）
local free = cache:free_space()       -- 剩余空间
local count = cache:free_space()      -- 可用空间

9.2.3 LRU 缓存（进程内）

对于热数据，使用进程内 LRU 缓存避免共享内存的序列化开销。

-- /usr/local/openresty/lua/cache/lru_cache.lua
local lrucache = require "resty.lrucache"

-- 创建 LRU 缓存实例（每个 Worker 独立）
local cache, err = lrucache.new(1000)  -- 最多 1000 个条目
if not cache then
    error("Failed to create LRU cache: " .. err)
end

local _M = {}

function _M.get(key)
    return cache:get(key)
end

function _M.set(key, value, ttl)
    cache:set(key, value, ttl or 60)
end

function _M.delete(key)
    cache:delete(key)
end

-- 多级缓存查找：LRU → 共享内存 → 后端
function _M.get_multi_level(key, fetch_func, l1_ttl, l2_ttl)
    -- L1: 进程内 LRU
    local value = cache:get(key)
    if value then
        return value, "l1"
    end

    -- L2: 共享内存
    local shared = ngx.shared.cache_data
    value = shared:get(key)
    if value then
        -- 回填 L1
        cache:set(key, value, l1_ttl or 10)
        return value, "l2"
    end

    -- L3: 后端获取
    value = fetch_func()
    if value then
        -- 写入 L1 和 L2
        cache:set(key, value, l1_ttl or 10)
        shared:set(key, value, l2_ttl or 60)
    end

    return value, "miss"
end

return _M

9.3 Nginx 代理缓存

使用 Nginx 的 proxy_cache 缓存后端响应。

http {
    # 定义缓存区域
    proxy_cache_path /var/cache/nginx levels=1:2
                     keys_zone=api_cache:100m   # 共享内存 100MB
                     max_size=10g               # 磁盘最大 10GB
                     inactive=60m               # 60 分钟未访问删除
                     use_temp_path=off;

    upstream backend {
        server 10.0.1.1:8080;
    }

    server {
        listen 8080;

        # 可缓存的 API
        location /api/products {
            proxy_pass http://backend;

            # 启用缓存
            proxy_cache api_cache;
            proxy_cache_key "$scheme$host$request_uri";
            proxy_cache_valid 200 5m;       # 200 响应缓存 5 分钟
            proxy_cache_valid 404 1m;       # 404 响应缓存 1 分钟
            proxy_cache_valid any 30s;      # 其他状态码 30 秒

            # 缓存锁（防止缓存击穿）
            proxy_cache_lock on;
            proxy_cache_lock_timeout 5s;
            proxy_cache_lock_age 5s;

            # 跳过缓存的条件
            proxy_cache_bypass $http_cache_control ~*no-cache;
            proxy_no_cache $http_authorization;  # 认证请求不缓存

            # 添加缓存状态头
            add_header X-Cache-Status $upstream_cache_status;
            # HIT: 命中  MISS: 未命中  EXPIRED: 已过期
            # STALE: 使用过期缓存  UPDATING: 正在更新
        }

        # 不缓存的 API（动态内容）
        location /api/users/me {
            proxy_pass http://backend;
            proxy_cache off;
        }
    }
}

9.4 Lua 响应缓存

对于更细粒度的缓存控制，使用 Lua 实现。

-- /usr/local/openresty/lua/cache/response_cache.lua
local _M = {}

local cjson = require "cjson"

-- 缓存配置
local cache_rules = {
    ["/api/products"] = {
        ttl = 300,         -- 缓存 5 分钟
        vary = {"Accept-Language"},  -- 按语言分别缓存
        method = "GET",
    },
    ["/api/categories"] = {
        ttl = 600,
        method = "GET",
    },
    ["/api/config"] = {
        ttl = 60,
        method = "GET",
    },
}

-- 生成缓存键
local function make_cache_key(uri, args, vary_headers)
    local parts = {uri}

    -- 添加查询参数
    if args then
        local sorted = {}
        for k, v in pairs(args) do
            table.insert(sorted, k .. "=" .. tostring(v))
        end
        table.sort(sorted)
        table.insert(parts, table.concat(sorted, "&"))
    end

    -- 添加 Vary 头
    if vary_headers then
        for _, header in ipairs(vary_headers) do
            table.insert(parts, ngx.req.get_headers()[header:lower()] or "")
        end
    end

    return table.concat(parts, "|")
end

-- 获取缓存规则
local function match_rule(uri)
    for pattern, rule in pairs(cache_rules) do
        if uri:match("^" .. pattern) then
            return rule
        end
    end
    return nil
end

-- 尝试从缓存读取
function _M.get()
    local method = ngx.req.get_method()
    if method ~= "GET" then
        return nil
    end

    local rule = match_rule(ngx.var.uri)
    if not rule then
        return nil
    end

    local args = ngx.req.get_uri_args()
    local cache_key = make_cache_key(ngx.var.uri, args, rule.vary)

    local cache = ngx.shared.cache_data
    local cached, flags = cache:get(cache_key)

    if cached then
        local data = cjson.decode(cached)
        ngx.header["X-Cache"] = "HIT"
        ngx.header["X-Cache-Key"] = cache_key
        return data
    end

    ngx.header["X-Cache"] = "MISS"
    return nil
end

-- 写入缓存
function _M.set(response_data)
    local method = ngx.req.get_method()
    if method ~= "GET" then
        return
    end

    local rule = match_rule(ngx.var.uri)
    if not rule then
        return
    end

    local args = ngx.req.get_uri_args()
    local cache_key = make_cache_key(ngx.var.uri, args, rule.vary)

    local cache = ngx.shared.cache_data
    local ok, err = cache:set(cache_key, cjson.encode(response_data), rule.ttl)

    if not ok then
        ngx.log(ngx.ERR, "Cache set failed: ", err)
    end
end

-- 清除指定模式的缓存
function _M.invalidate(pattern)
    local cache = ngx.shared.cache_data
    local keys = cache:get_keys(0)  -- 获取所有 key

    local count = 0
    for _, key in ipairs(keys) do
        if key:match(pattern) then
            cache:delete(key)
            count = count + 1
        end
    end

    return count
end

return _M

9.5 缓存失效策略

9.5.1 TTL 过期

-- 简单 TTL 过期
cache:set(key, value, 300)  -- 300 秒后自动过期

9.5.2 主动失效

-- /usr/local/openresty/lua/cache/invalidation.lua
local _M = {}

-- 基于事件的缓存失效
function _M.on_resource_update(resource_type, resource_id)
    local cache = ngx.shared.cache_data

    -- 策略 1：精确失效
    local exact_key = "/api/" .. resource_type .. "/" .. resource_id
    cache:delete(exact_key)

    -- 策略 2：模式失效（清除相关列表缓存）
    local list_pattern = "/api/" .. resource_type
    local keys = cache:get_keys(0)
    for _, key in ipairs(keys) do
        if key:match("^" .. list_pattern) then
            cache:delete(key)
        end
    end

    -- 策略 3：广播失效（通过 Redis pub/sub）
    _M.broadcast_invalidation(resource_type, resource_id)
end

-- 通过 Redis 发布缓存失效事件
function _M.broadcast_invalidation(resource_type, resource_id)
    local redis = require "resty.redis"
    local red = redis:new()
    local ok, err = red:connect("127.0.0.1", 6379)
    if ok then
        red:publish("cache_invalidation", cjson.encode({
            resource_type = resource_type,
            resource_id = resource_id,
            timestamp = ngx.time(),
        }))
        red:set_keepalive(10000, 100)
    end
end

-- 监听缓存失效事件
function _M.start_listener()
    local function listen(premature)
        if premature then return end

        local redis = require "resty.redis"
        local red = redis:new()
        red:connect("127.0.0.1", 6379)

        red:subscribe("cache_invalidation")
        while true do
            local res, err = red:read_reply()
            if res then
                local data = cjson.decode(res[3])
                -- 本地清除缓存
                local cache = ngx.shared.cache_data
                local keys = cache:get_keys(0)
                for _, key in ipairs(keys) do
                    if key:match(data.resource_type) then
                        cache:delete(key)
                    end
                end
            end
        end
    end

    ngx.timer.at(0, listen)
end

return _M

9.5.3 写入时失效（Write-Through）

-- 写入时同时更新缓存
local function write_through(key, value, write_func)
    -- 写入后端
    local result = write_func(value)
    if result then
        -- 更新缓存
        ngx.shared.cache_data:set(key, cjson.encode(result), 300)
    end
    return result
end

9.6 缓存穿透、击穿、雪崩

9.6.1 缓存穿透（Cache Penetration）

请求的数据在缓存和数据库中都不存在，每次都穿透到数据库。

解决方案：布隆过滤器 / 缓存空值

客户端 → [布隆过滤器] → 数据库
              │
         不在过滤器中 → 直接返回 404

-- /usr/local/openresty/lua/cache/bloom_filter.lua
local _M = {}

-- 简化版布隆过滤器（生产建议使用 redis-cell 或 Redis Bloom）
function _M.new(shared_dict_name, expected_items, false_positive_rate)
    local shared = ngx.shared[shared_dict_name]

    -- 计算最优参数
    local m = math.ceil(-expected_items * math.log(false_positive_rate) / (math.log(2)^2))
    local k = math.ceil((m / expected_items) * math.log(2))

    return {
        shared = shared,
        m = m,  -- 位数组大小
        k = k,  -- 哈希函数数量
    }
end

-- 多哈希函数
local function hash(key, seed)
    local h = seed
    for i = 1, #key do
        h = (h * 31 + string.byte(key, i)) % 2147483647
    end
    return h
end

function _M:add(key)
    for i = 1, self.k do
        local pos = hash(key, i) % self.m
        self.shared:set(tostring(pos), 1)
    end
end

function _M:exists(key)
    for i = 1, self.k do
        local pos = hash(key, i) % self.m
        if not self.shared:get(tostring(pos)) then
            return false
        end
    end
    return true  -- 可能存在（有误判率）
end

return _M

-- 缓存空值防穿透
local function get_with_null_cache(key, fetch_func, ttl, null_ttl)
    local cache = ngx.shared.cache_data

    -- 检查缓存
    local value = cache:get(key)
    if value then
        if value == "__NULL__" then
            return nil, "cached null"
        end
        return cjson.decode(value)
    end

    -- 查询后端
    value = fetch_func()

    if value then
        cache:set(key, cjson.encode(value), ttl or 300)
    else
        -- 缓存空值（短 TTL）
        cache:set(key, "__NULL__", null_ttl or 30)
    end

    return value
end

9.6.2 缓存击穿（Cache Breakdown）

热点 key 过期时，大量请求同时穿透到数据库。

-- /usr/local/openresty/lua/cache/cache_lock.lua
local _M = {}

-- 单飞模式（Singleflight）
-- 只允许一个请求去后端取数据，其他请求等待
function _M.singleflight(key, fetch_func, ttl)
    local cache = ngx.shared.cache_data
    local lock_key = "lock:" .. key

    -- 尝试从缓存获取
    local value = cache:get(key)
    if value then
        return cjson.decode(value), "hit"
    end

    -- 尝试获取锁
    local ok, err = cache:add(lock_key, 1, 5)  -- 5 秒锁超时
    if ok then
        -- 获得锁：去后端取数据
        local result = fetch_func()
        if result then
            cache:set(key, cjson.encode(result), ttl or 300)
        end
        cache:delete(lock_key)
        return result, "miss"
    else
        -- 未获得锁：等待并重试
        local max_wait = 50  -- 最多等待 50 * 100ms = 5s
        for i = 1, max_wait do
            ngx.sleep(0.1)  -- 100ms
            value = cache:get(key)
            if value then
                return cjson.decode(value), "wait_hit"
            end
        end
        -- 超时：自己去取
        return fetch_func(), "timeout"
    end
end

return _M

9.6.3 缓存雪崩（Cache Avalanche）

大量缓存同时过期，请求全部打到数据库。

-- /usr/local/openresty/lua/cache/avalanche_protection.lua
local _M = {}

-- 随机 TTL（避免同时过期）
function _M.random_ttl(base_ttl, jitter_percent)
    jitter_percent = jitter_percent or 0.1  -- 10% 抖动
    local jitter = base_ttl * jitter_percent
    return base_ttl + math.random(-jitter, jitter)
end

-- 缓存预热
function _M.warmup(keys, fetch_func, ttl)
    for _, key in ipairs(keys) do
        local value = fetch_func(key)
        if value then
            local cache = ngx.shared.cache_data
            cache:set(key, cjson.encode(value), _M.random_ttl(ttl))
        end
    end
end

-- 降级策略
function _M.get_with_fallback(key, fetch_func, ttl)
    local cache = ngx.shared.cache_data

    local value = cache:get(key)
    if value then
        return cjson.decode(value)
    end

    -- 使用 pcall 防止后端故障导致错误
    local ok, result = pcall(fetch_func)

    if ok and result then
        cache:set(key, cjson.encode(result), _M.random_ttl(ttl))
        return result
    end

    -- 后端故障：返回过期缓存（如果有的话）
    local stale_key = "stale:" .. key
    local stale = cache:get(stale_key)
    if stale then
        ngx.header["X-Cache-Stale"] = "true"
        return cjson.decode(stale)
    end

    return nil
end

return _M

9.7 缓存监控

-- /usr/local/openresty/lua/cache/monitor.lua
local _M = {}

local cjson = require "cjson"

-- 缓存统计
function _M.get_stats(dict_name)
    local cache = ngx.shared[dict_name]
    if not cache then
        return nil, "Cache not found"
    end

    return {
        capacity = cache:capacity(),
        free_space = cache:free_space(),
        keys = #cache:get_keys(0),
    }
end

-- 缓存命中率中间件
function _M.track_hit_rate()
    local cache_hit = ngx.header["X-Cache"]
    local stats = ngx.shared.cache_data

    stats:incr("total_requests", 1, 0)

    if cache_hit == "HIT" then
        stats:incr("cache_hits", 1, 0)
    end
end

-- 获取缓存命中率
function _M.get_hit_rate()
    local stats = ngx.shared.cache_data
    local total = stats:get("total_requests") or 0
    local hits = stats:get("cache_hits") or 0

    return {
        total = total,
        hits = hits,
        misses = total - hits,
        hit_rate = total > 0 and (hits / total * 100) or 0,
    }
end

-- 缓存管理接口
function _M.admin_api()
    local method = ngx.req.get_method()
    local args = ngx.req.get_uri_args()
    local dict_name = args.dict or "cache_data"

    if method == "GET" then
        local stats = _M.get_stats(dict_name)
        local hit_rate = _M.get_hit_rate()
        ngx.say(cjson.encode({
            stats = stats,
            hit_rate = hit_rate,
        }))
    elseif method == "DELETE" then
        local cache = ngx.shared[dict_name]
        cache:flush_all()
        ngx.say(cjson.encode({message = "Cache flushed"}))
    end
end

return _M

9.8 注意事项

共享内存容量：ngx.shared.DICT 的内存是预分配的，满了后新写入会失败。定期检查 free_space 并设置合理的过期时间。

序列化开销：缓存复杂对象需要 JSON 序列化/反序列化，高频场景考虑使用进程内 LRU 缓存减少序列化开销。

缓存一致性：多节点部署时，缓存失效不能只清理本地，需要通过 Redis pub/sub 或消息队列广播失效事件。

大对象缓存：单个 value 不宜超过 1MB，否则会影响共享内存性能。大对象使用 Nginx 代理缓存或 Redis。

上一章：← 第 08 章 - 反向代理与负载均衡 下一章：第 10 章 - 数据转换与协议处理 →