LevelDB 完全指南 / 第 6 章 · 自定义比较器

第 6 章 · 自定义比较器

6.1 默认比较器

LevelDB 默认使用 BytewiseComparator，即按字节逐位比较：

字典序比较规则：
  "apple" < "banana"    (因为 'a' < 'b')
  "abc"   < "abd"       (因为 'c' < 'd')
  "ab"    < "abc"       (短字符串 < 长字符串)
  "a\x00" < "a\x01"    (按字节值比较)
  "A"     < "a"         (大写字母 < 小写字母，因为 ASCII 码 65 < 97)

数字排序的问题

// 按字典序排列数字，结果可能不符合预期
db->Put(wopts, "key:1", "...");
db->Put(wopts, "key:2", "...");  
db->Put(wopts, "key:10", "...");
db->Put(wopts, "key:9", "...");

// 遍历顺序：key:1 → key:10 → key:2 → key:9  ← 字典序！
// 期望顺序：key:1 → key:2 → key:9 → key:10   ← 数值序

6.2 自定义比较器实现

C++ 实现：数值排序比较器

#include "leveldb/comparator.h"
#include "leveldb/db.h"
#include <cstring>
#include <cstdlib>

class NumericComparator : public leveldb::Comparator {
public:
    // 核心比较函数
    int Compare(const leveldb::Slice& a, const leveldb::Slice& b) const override {
        // 假设 key 格式为 "prefix:数字"
        int num_a = ExtractNumber(a);
        int num_b = ExtractNumber(b);

        if (num_a < num_b) return -1;
        if (num_a > num_b) return 1;

        // 数字相同时，按字节序比较（保证唯一性）
        return a.compare(b);
    }

    // 比较器名称（必须唯一，用于数据库兼容性检查）
    const char* Name() const override {
        return "NumericComparator";
    }

    // 以下方法可留空（高级优化）
    void FindShortestSeparator(std::string*, const leveldb::Slice&) const override {}
    void FindShortSuccessor(std::string*) const override {}

private:
    int ExtractNumber(const leveldb::Slice& key) const {
        std::string s = key.ToString();
        auto pos = s.rfind(':');
        if (pos == std::string::npos) return 0;
        return std::atoi(s.substr(pos + 1).c_str());
    }
};

int main() {
    NumericComparator cmp;
    leveldb::Options options;
    options.create_if_missing = true;
    options.comparator = &cmp;  // 使用自定义比较器

    leveldb::DB* db;
    leveldb::DB::Open(options, "/tmp/numeric_db", &db);

    db->Put(wopts, "item:10", "...");
    db->Put(wopts, "item:2", "...");
    db->Put(wopts, "item:1", "...");
    db->Put(wopts, "item:9", "...");

    auto* it = db->NewIterator(ropts);
    for (it->SeekToFirst(); it->Valid(); it->Next()) {
        std::cout << it->key().ToString() << std::endl;
    }
    // 输出: item:1 → item:2 → item:9 → item:10  ✅

    delete it;
    delete db;
}

Go 实现

type NumericComparer struct{}

func (c NumericComparer) Compare(a, b []byte) int {
    numA := extractNumber(a)
    numB := extractNumber(b)
    if numA < numB { return -1 }
    if numA > numB { return 1 }
    return bytes.Compare(a, b)
}

func (c NumericComparer) Name() string { return "NumericComparer" }

func (c NumericComparer) Separator(dst, a, b []byte) []byte { return nil }
func (c NumericComparer) Successor(dst, b []byte) []byte { return nil }

// 打开数据库时使用
db, err := leveldb.OpenFile("/tmp/db", &opt.Options{
    Comparer: NumericComparer{},
})

6.3 Key 设计最佳实践

原则一：可读前缀 + 业务标识

推荐的 Key 设计模式：

  {资源类型}:{资源ID}:{属性名}
  
示例：
  user:1001:name
  user:1001:email
  order:2026051001:status
  product:P001:price
  config:max_retry

原则二：时间戳 Key 需要补齐

// ❌ 错误：时间戳不补齐
std::string key = "log:" + std::to_string(timestamp);
// "log:1683724800" vs "log:9999999999"  → 字典序不等于时间序

// ✅ 正确：补零到固定长度
char key[32];
snprintf(key, sizeof(key), "log:%020ld", timestamp);
// "log:0000000001683724800" vs "log:0000000009999999999"

原则三：反向时间序 Key

// 场景：需要按时间从新到旧遍历
// 方法：使用 (MAX - timestamp) 作为 Key

uint64_t now = time(nullptr);
uint64_t reverse_ts = UINT64_MAX - now;

char key[32];
snprintf(key, sizeof(key), "rtlog:%020lu", reverse_ts);
// 遍历时 SeekToFirst 就是从最新开始

原则四：复合 Key 实现多维排序

// 场景：先按用户 ID 排序，再按时间排序
// Key 格式: user:{user_id:固定8字节}:{timestamp:固定8字节}

std::string MakeKey(uint32_t user_id, uint64_t timestamp) {
    std::string key(16, '\0');
    // 大端编码保证字典序 = 数值序
    key[0] = (user_id >> 24) & 0xFF;
    key[1] = (user_id >> 16) & 0xFF;
    key[2] = (user_id >> 8) & 0xFF;
    key[3] = user_id & 0xFF;
    key[4] = (timestamp >> 56) & 0xFF;
    key[5] = (timestamp >> 48) & 0xFF;
    // ... 依次填充
    return key;
}

6.4 Key 设计模式速查表

模式	Key 格式	用途	遍历方式
简单 KV	`key`	配置项	单点查询
资源属性	`type:id:attr`	多字段实体	前缀扫描
时间序列	`type:YYYYMMDDHHmmss`	日志/事件	范围扫描
反向时间	`type:(MAX-ts)`	最新优先	前缀扫描
复合索引	`idx:type:field1:field2`	多维查询	前缀扫描
反转域名	`com.example.www`	域名存储	前缀扫描

6.5 业务场景：电商订单系统

class OrderStore {
public:
    OrderStore(leveldb::DB* db) : db_(db) {}

    // 创建订单
    bool CreateOrder(uint64_t order_id, uint32_t user_id,
                     double amount, const std::string& status) {
        leveldb::WriteBatch batch;
        char ts[32];
        snprintf(ts, sizeof(ts), "%020lu", time(nullptr));

        // 主记录
        std::string main_key = "order:" + Uint64ToKey(order_id);
        std::string main_val = SerializeOrder(order_id, user_id, amount, status, ts);
        batch.Put(main_key, main_val);

        // 用户订单索引：支持按用户查询
        std::string user_idx = "idx:user_order:" + Uint32ToKey(user_id)
                              + ":" + Uint64ToKey(order_id);
        batch.Put(user_idx, "");

        // 时间索引：支持按时间查询
        std::string time_idx = "idx:time_order:" + std::string(ts)
                              + ":" + Uint64ToKey(order_id);
        batch.Put(time_idx, "");

        // 状态索引：支持按状态查询
        std::string status_idx = "idx:status:" + status
                                + ":" + Uint64ToKey(order_id);
        batch.Put(status_idx, main_val);

        return db_->Write(leveldb::WriteOptions(), &batch).ok();
    }

    // 查询用户的所有订单
    std::vector<uint64_t> GetUserOrders(uint32_t user_id, int limit = 20) {
        std::vector<uint64_t> orders;
        std::string prefix = "idx:user_order:" + Uint32ToKey(user_id) + ":";
        auto* it = db_->NewIterator(leveldb::ReadOptions());
        for (it->Seek(prefix);
             it->Valid() && it->key().starts_with(prefix) && orders.size() < limit;
             it->Next()) {
            uint64_t oid = KeyToUint64(it->key().ToString().substr(prefix.size()));
            orders.push_back(oid);
        }
        delete it;
        return orders;
    }

private:
    leveldb::DB* db_;

    std::string Uint32ToKey(uint32_t v) {
        std::string s(4, '\0');
        s[0] = (v >> 24); s[1] = (v >> 16);
        s[2] = (v >> 8);  s[3] = v;
        return s;
    }
    std::string Uint64ToKey(uint64_t v) {
        std::string s(8, '\0');
        for (int i = 7; i >= 0; i--) {
            s[i] = v & 0xFF; v >>= 8;
        }
        return s;
    }
    uint64_t KeyToUint64(const std::string& s) {
        uint64_t v = 0;
        for (int i = 0; i < 8; i++) v = (v << 8) | (uint8_t)s[i];
        return v;
    }
    std::string SerializeOrder(...) { /* JSON / Protobuf 序列化 */ return ""; }
};

6.6 注意事项

⚠️ 警告	说明
比较器名称不可变	打开已有数据库时，比较器的 `Name()` 必须与创建时一致
不可混用比较器	用不同比较器打开同一数据库会导致数据损坏
性能敏感	`Compare()` 会被频繁调用，避免复杂计算
一致性保证	比较器必须满足严格弱序（Strict Weak Ordering）

6.7 本章小结

要点	内容
默认比较器	BytewiseComparator，字典序
自定义比较器	继承 `Comparator` 接口，实现 `Compare()` 和 `Name()`
Key 设计	可读前缀、固定长度编码、复合索引
常见模式	资源属性、时间序列、反向排序、多维索引

扩展阅读

Comparator 接口：include/leveldb/comparator.h
BytewiseComparator 实现：util/comparator.cc
Key 编码设计：CockroachDB 的 Key 编码方案

← 第 5 章 · 批量写入 | 第 7 章 · 快照与 MVCC →