强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

PostGIS 完全指南 / 第 14 章:性能优化

第 14 章:性能优化

14.1 性能优化总览

空间查询性能优化的核心思路:

优化层次
├── 1. 数据层优化
│   ├── 选择合适的几何类型
│   ├── 选择合适的 SRID(投影 vs 地理)
│   └── 简化几何精度
│
├── 2. 索引层优化
│   ├── 创建空间索引
│   ├── 使用部分索引
│   └── 选择合适的索引类型
│
├── 3. 查询层优化
│   ├── 使用空间索引操作符
│   ├── 两步过滤法
│   └── 避免函数包裹索引列
│
├── 4. 架构层优化
│   ├── 表分区
│   ├── 读写分离
│   └── 预计算
│
└── 5. 配置层优化
    ├── shared_buffers
    ├── work_mem
    └── effective_cache_size

14.2 几何简化

ST_Simplify

-- 使用 Douglas-Peucker 算法简化几何
-- 原始多边形可能有数千个点
SELECT ST_NPoints(geom) AS original_points
FROM districts WHERE name = '北京市';

-- 简化(容差越大,简化越激进)
SELECT
    ST_NPoints(ST_Simplify(geom, 0.001)) AS simplified_1,
    ST_NPoints(ST_Simplify(geom, 0.005)) AS simplified_2,
    ST_NPoints(ST_Simplify(geom, 0.01))  AS simplified_3
FROM districts WHERE name = '北京市';

-- 创建简化后的列用于快速查询
ALTER TABLE districts ADD COLUMN geom_simple GEOMETRY(MultiPolygon, 4326);

UPDATE districts
SET geom_simple = ST_Simplify(geom, 0.001);

CREATE INDEX idx_districts_geom_simple ON districts USING GIST(geom_simple);

-- 近似查询使用简化几何
SELECT name FROM districts
WHERE geom_simple && ST_SetSRID(ST_MakePoint(116.4, 39.9), 4326);

-- 精确查询使用原始几何
SELECT name FROM districts
WHERE geom_simple && ST_SetSRID(ST_MakePoint(116.4, 39.9), 4326)
  AND ST_Contains(geom, ST_SetSRID(ST_MakePoint(116.4, 39.9), 4326));

ST_SimplifyPreserveTopology

-- 保持拓扑的简化(避免自相交)
SELECT ST_SimplifyPreserveTopology(geom, 0.001)
FROM districts;

精度降低

-- 降低坐标精度(减少小数位数)
-- 适用于初始数据精度远超实际需求的场景
UPDATE pois
SET geom = ST_GeomFromText(
    ST_AsText(geom, 6),  -- 保留 6 位小数(约 0.1 米精度)
    4326
);

14.3 索引策略

空间索引类型选择

-- GiST(默认推荐)
CREATE INDEX idx_table_geom ON table USING GIST(geom);

-- SP-GiST(大量随机点)
CREATE INDEX idx_table_geom ON table USING SPGIST(geom);

-- BRIN(物理有序的大表)
CREATE INDEX idx_table_geom ON table USING BRIN(geom);

部分索引

-- 只对活跃数据建索引
CREATE INDEX idx_events_active_geom
ON events USING GIST(geom)
WHERE status = 'active' AND created_at > now() - INTERVAL '30 days';

-- 只对特定类型建索引
CREATE INDEX idx_poi_restaurant_geom
ON pois USING GIST(geom)
WHERE category = '餐厅';

多列索引策略

-- PostgreSQL 不支持 GiST + B-tree 混合索引
-- 替代方案:分步查询

-- 方案 1: 先 B-tree 过滤,再 GiST 过滤
SELECT * FROM stores
WHERE store_type = '旗舰'           -- B-tree 索引
  AND geom && target_geom;           -- GiST 索引

CREATE INDEX idx_stores_type ON stores(store_type);
CREATE INDEX idx_stores_geom ON stores USING GIST(geom);

-- 方案 2: 使用 BRIN 索引(如果数据物理有序)
-- 适用于按时间插入的 GPS 轨迹数据

14.4 查询优化技巧

两步过滤法

-- ❌ 慢:直接使用复杂空间函数
SELECT name FROM stores
WHERE ST_Contains(
    (SELECT geom FROM districts WHERE name = '朝阳区'),
    stores.geom
);

-- ✅ 快:先用边界框过滤,再用精确函数
SELECT name FROM stores
WHERE geom && (SELECT geom FROM districts WHERE name = '朝阳区')
  AND ST_Contains(
      (SELECT geom FROM districts WHERE name = '朝阳区'),
      geom
  );

避免对索引列使用函数

-- ❌ 错误:函数包裹索引列,索引无法使用
SELECT name FROM stores
WHERE ST_AsText(geom) LIKE 'POINT(116%';

-- ✅ 正确:直接对几何列使用索引操作符
SELECT name FROM stores
WHERE geom && ST_MakeEnvelope(116, 39, 117, 40, 4326);

-- ❌ 错误:类型转换包裹索引列
SELECT name FROM stores
WHERE geom::geography && target::geography;

-- ✅ 正确:创建 Geography 列或使用 Geography 索引
ALTER TABLE stores ADD COLUMN geog geography(Point, 4326);
UPDATE stores SET geog = geom::geography;
CREATE INDEX idx_stores_geog ON stores USING GIST(geog);

使用 LATERAL JOIN 优化

-- ❌ 慢:相关子查询
SELECT d.name,
    (SELECT s.name FROM stores s
     ORDER BY s.geom <-> d.geom LIMIT 1) AS nearest_store
FROM districts d;

-- ✅ 快:LATERAL JOIN
SELECT d.name, s.name AS nearest_store
FROM districts d
CROSS JOIN LATERAL (
    SELECT name FROM stores
    ORDER BY geom <-> d.geom
    LIMIT 1
) s;

KNN 查询优化

-- 使用 <-> 操作符进行 KNN 查询
-- 必须确保 ORDER BY ... LIMIT 才能利用索引
SELECT name, ST_Distance(geom::geography, target::geography) AS distance
FROM stores
ORDER BY geom <-> target  -- 利用 GiST 索引排序
LIMIT 10;

-- 对比:不使用 LIMIT 时,<-> 不会利用索引
SELECT name, ST_Distance(geom::geography, target::geography) AS distance
FROM stores
WHERE ST_DWithin(geom::geography, target::geography, 5000)  -- 先缩小范围
ORDER BY geom <-> target  -- 再 KNN 排序
LIMIT 10;

14.5 查询计划分析

EXPLAIN ANALYZE 详解

EXPLAIN (ANALYZE, BUFFERS, VERBOSE, FORMAT TEXT)
SELECT name FROM stores
WHERE ST_DWithin(
    geom::geography,
    ST_SetSRID(ST_MakePoint(116.4074, 39.9042), 4326)::geography,
    3000
);

关键指标解读

Index Scan using idx_stores_geom on public.stores
  Output: name
  Index Cond: (stores.geom && st_expand(...))
  Filter: st_dwithin((stores.geom)::geography, ..., 3000)
  Rows Removed by Filter: 5
  Heap Fetches: 0
  Shared Hit Blocks: 12
  Shared Read Blocks: 0
  Planning Time: 0.234 ms
  Execution Time: 0.876 ms
指标健康值问题指示
Index ScanSeq Scan = 未使用索引
Rows Removed by Filter< 100高值 = 索引选择性差
Heap Fetches0高值 = 需要 VACUUM
Shared HitShared Read = 缓存未命中
Planning Time< 5ms高值 = 查询太复杂
Execution Time取决于场景关注趋势变化

常见问题排查

-- 问题:Seq Scan 而非 Index Scan
-- 可能原因 1: 未创建索引
CREATE INDEX idx_geom ON table USING GIST(geom);

-- 可能原因 2: 统计信息过时
ANALYZE table_name;

-- 可能原因 3: 表太小,优化器认为全表扫描更快
-- 这是正常的,无需优化

-- 可能原因 4: 函数包裹了索引列
-- 参考上文"避免对索引列使用函数"

-- 问题:Index Scan 但很慢
-- 可能原因: 查询范围太大,返回行数过多
-- 解决: 缩小查询范围或增加 LIMIT

14.6 表分区

按范围分区(时间序列空间数据)

-- 创建分区父表
CREATE TABLE gps_tracks (
    id BIGSERIAL,
    device_id INTEGER,
    recorded_at TIMESTAMPTZ NOT NULL,
    speed NUMERIC(5,2),
    geom GEOMETRY(Point, 4326)
) PARTITION BY RANGE (recorded_at);

-- 创建月度分区
CREATE TABLE gps_tracks_2025_01 PARTITION OF gps_tracks
    FOR VALUES FROM ('2025-01-01') TO ('2025-02-01');
CREATE TABLE gps_tracks_2025_02 PARTITION OF gps_tracks
    FOR VALUES FROM ('2025-02-01') TO ('2025-03-01');
-- ... 继续创建

-- 每个分区独立创建索引
CREATE INDEX idx_tracks_2025_01_geom ON gps_tracks_2025_01 USING GIST(geom);
CREATE INDEX idx_tracks_2025_02_geom ON gps_tracks_2025_02 USING GIST(geom);

-- 自动创建分区(使用 pg_partman 扩展)
CREATE EXTENSION IF NOT EXISTS partman;
SELECT partman.create_parent(
    'public.gps_tracks',
    'recorded_at',
    'native',
    'monthly'
);

按列表分区(按区域)

CREATE TABLE pois (
    id SERIAL,
    name TEXT,
    region VARCHAR(20) NOT NULL,
    geom GEOMETRY(Point, 4326)
) PARTITION BY LIST (region);

CREATE TABLE pois_east PARTITION OF pois FOR VALUES IN ('华东', '华南');
CREATE TABLE pois_north PARTITION OF pois FOR VALUES IN ('华北', '东北');
CREATE TABLE pois_central PARTITION OF pois FOR VALUES IN ('华中', '西南', '西北');

按空间分区

-- 使用 H3 或 S2 空间分区索引
-- 方案:在表中添加 H3 索引列
CREATE EXTENSION IF NOT EXISTS h3;

ALTER TABLE pois ADD COLUMN h3_index h3index;
UPDATE pois SET h3_index = h3_geo_to_h3(ST_Y(geom), ST_X(geom), 7);

CREATE INDEX idx_pois_h3 ON pois(h3_index);

-- 查询时先用 H3 索引过滤
SELECT * FROM pois
WHERE h3_index IN (
    SELECT h3_geo_to_h3(39.9042, 116.4074, 7)  -- 目标 H3 格
    UNION
    SELECT h3_k_ring(h3_geo_to_h3(39.9042, 116.4074, 7), 1)  -- 相邻格
);

14.7 预计算

预计算距离矩阵

-- 预计算热点区域的距离矩阵
CREATE TABLE distance_matrix AS
SELECT
    a.id AS from_id,
    b.id AS to_id,
    ROUND(ST_Distance(a.geom::geography, b.geom::geography)) AS distance_m
FROM hotspots a, hotspots b
WHERE a.id < b.id;

CREATE INDEX idx_dm_from ON distance_matrix(from_id);
CREATE INDEX idx_dm_to ON distance_matrix(to_id);

-- 查询时直接使用预计算结果
SELECT from_id, to_id, distance_m
FROM distance_matrix
WHERE from_id = 1
ORDER BY distance_m
LIMIT 5;

物化视图

-- 创建空间物化视图
CREATE MATERIALIZED VIEW mv_store_coverage AS
SELECT
    store_id,
    ST_Union(ST_Buffer(geom::geography, 3000)::geometry) AS coverage_3km
FROM stores
GROUP BY store_id;

CREATE UNIQUE INDEX idx_mv_store_coverage ON mv_store_coverage(store_id);
CREATE INDEX idx_mv_store_coverage_geom ON mv_store_coverage USING GIST(coverage_3km);

-- 定期刷新
REFRESH MATERIALIZED VIEW CONCURRENTLY mv_store_coverage;

14.8 数据库配置优化

关键参数

-- 查看当前配置
SHOW shared_buffers;          -- 建议: 总内存的 25%
SHOW effective_cache_size;    -- 建议: 总内存的 50-75%
SHOW work_mem;                -- 建议: 256MB-1GB (排序/哈希操作)
SHOW maintenance_work_mem;    -- 建议: 1-2GB (VACUUM/CREATE INDEX)
SHOW random_page_cost;        -- SSD 设为 1.1
SHOW effective_io_concurrency; -- SSD 设为 200

-- 空间查询优化建议
SET work_mem = '512MB';                -- 复杂空间连接需要更多内存
SET enable_seqscan = off;              -- 仅调试时使用,强制使用索引
SET random_page_cost = 1.1;            -- SSD 存储

统计信息优化

-- 增加几何列的统计精度
ALTER TABLE stores ALTER COLUMN geom SET STATISTICS 1000;
ANALYZE stores;

-- 查看统计信息
SELECT
    attname,
    n_distinct,
    most_common_vals,
    histogram_bounds
FROM pg_stats
WHERE tablename = 'stores' AND attname = 'geom';

14.9 并行查询

-- 启用并行查询
SET max_parallel_workers_per_gather = 4;
SET parallel_tuple_cost = 0.01;
SET parallel_setup_cost = 100;

-- 查看查询计划是否使用并行
EXPLAIN ANALYZE
SELECT count(*) FROM pois
WHERE geom && ST_MakeEnvelope(116, 39, 117, 40, 4326);

-- 并行创建索引
CREATE INDEX CONCURRENTLY idx_pois_geom ON pois USING GIST(geom);

14.10 监控与诊断

空间查询监控

-- 查看活跃的空间查询
SELECT pid, query, state, wait_event_type, now() - query_start AS duration
FROM pg_stat_activity
WHERE query ILIKE '%st_%'
  AND state = 'active';

-- 终止长时间运行的空间查询
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE query ILIKE '%st_%'
  AND now() - query_start > INTERVAL '5 minutes';

-- 索引使用统计
SELECT
    schemaname, tablename, indexname,
    idx_scan, idx_tup_read, idx_tup_fetch,
    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
JOIN pg_index USING (indexrelid)
WHERE idx_scan > 0
ORDER BY idx_tup_read DESC;

表膨胀检查

-- 检查表膨胀
SELECT
    schemaname, tablename,
    pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) AS total_size,
    n_dead_tup,
    n_live_tup,
    ROUND(n_dead_tup::NUMERIC / GREATEST(n_live_tup, 1) * 100, 1) AS dead_pct
FROM pg_stat_user_tables
WHERE n_dead_tup > 10000
ORDER BY n_dead_tup DESC;

-- 重建膨胀的索引
REINDEX INDEX CONCURRENTLY idx_stores_geom;

14.11 性能基准测试

-- 创建基准测试函数
CREATE OR REPLACE FUNCTION benchmark_spatial_query(
    query_text TEXT,
    iterations INTEGER DEFAULT 10
) RETURNS TABLE(
    avg_time NUMERIC,
    min_time NUMERIC,
    max_time NUMERIC,
    stddev_time NUMERIC
) AS $$
DECLARE
    times NUMERIC[];
    start_time TIMESTAMPTZ;
    elapsed NUMERIC;
BEGIN
    FOR i IN 1..iterations LOOP
        start_time := clock_timestamp();
        EXECUTE query_text;
        elapsed := EXTRACT(EPOCH FROM (clock_timestamp() - start_time)) * 1000;
        times := array_append(times, elapsed);
    END LOOP;

    RETURN QUERY
    SELECT
        ROUND(avg(t), 2),
        ROUND(min(t), 2),
        ROUND(max(t), 2),
        ROUND(stddev(t), 2)
    FROM unnest(times) AS t;
END;
$$ LANGUAGE plpgsql;

-- 使用
SELECT * FROM benchmark_spatial_query(
    $$SELECT count(*) FROM pois WHERE geom && ST_MakeEnvelope(116, 39, 117, 40, 4326)$$,
    20
);

14.12 本章小结

优化层次关键措施预期提升
数据简化ST_Simplify, 降低精度2-10x
空间索引GiST/SP-GiST/BRIN10-100x
两步过滤&& 粗过滤 + 精确判断2-5x
表分区按时间/区域分区2-10x
预计算物化视图、距离矩阵10-100x
配置调优shared_buffers, work_mem1.5-3x

扩展阅读