OpenCV 计算机视觉完全教程 / 第 12 章 — 视频处理

第 12 章 — 视频处理

12.1 视频读取

import cv2

# 打开视频文件
cap = cv2.VideoCapture("video.mp4")

# 或打开摄像头（0 = 默认摄像头）
# cap = cv2.VideoCapture(0)

# 检查是否成功打开
if not cap.isOpened():
    print("无法打开视频")
    exit()

# 获取视频属性
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = total_frames / fps if fps > 0 else 0

print(f"分辨率: {width}×{height}")
print(f"帧率: {fps:.1f} FPS")
print(f"总帧数: {total_frames}")
print(f"时长: {duration:.1f} 秒")

# 逐帧读取
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # 处理每一帧
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    cv2.imshow("视频", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

视频属性常量

属性	常量	说明
帧率	`CAP_PROP_FPS`	每秒帧数
宽度	`CAP_PROP_FRAME_WIDTH`	帧宽度
高度	`CAP_PROP_FRAME_HEIGHT`	帧高度
总帧数	`CAP_PROP_FRAME_COUNT`	总帧数
当前帧	`CAP_PROP_POS_FRAMES`	当前位置
时间戳	`CAP_PROP_POS_MSEC`	当前时间(ms)
编解码器	`CAP_PROP_FOURCC`	4 字符编码

12.2 视频写入

import cv2
import numpy as np

cap = cv2.VideoCapture(0)  # 摄像头

# 获取摄像头参数
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# 创建 VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # 编解码器
# 常用: 'mp4v', 'XVID', 'MJPG', 'H264'
out = cv2.VideoWriter("output.mp4", fourcc, fps, (w, h))

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # 可以对帧做处理
    processed = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    processed = cv2.cvtColor(processed, cv2.COLOR_GRAY2BGR)  # 转回三通道

    out.write(processed)  # 写入帧
    cv2.imshow("录制中...", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()

常用编解码器

编解码器	FourCC	后缀	说明
H.264	`avc1` / `H264`	.mp4	压缩率高，兼容性好
H.265	`HEVC`	.mp4	更高压缩率
MJPEG	`MJPG`	.avi	无损/低压缩
XVID	`XVID`	.avi	MPEG-4
VP9	`VP09`	.webm	开源

12.3 帧差分运动检测

"""
motion_detector.py — 基于帧差分的运动检测
"""
import cv2
import numpy as np

def detect_motion():
    cap = cv2.VideoCapture(0)

    # 读取第一帧作为背景参考
    ret, prev_frame = cap.read()
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    prev_gray = cv2.GaussianBlur(prev_gray, (21, 21), 0)

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray = cv2.GaussianBlur(gray, (21, 21), 0)

        # 帧差分
        delta = cv2.absdiff(prev_gray, gray)
        _, thresh = cv2.threshold(delta, 25, 255, cv2.THRESH_BINARY)

        # 形态学操作去噪
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
        thresh = cv2.dilate(thresh, kernel, iterations=2)
        thresh = cv2.erode(thresh, kernel, iterations=1)

        # 查找运动区域
        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
                                        cv2.CHAIN_APPROX_SIMPLE)

        result = frame.copy()
        motion_count = 0
        for cnt in contours:
            area = cv2.contourArea(cnt)
            if area < 500:
                continue
            motion_count += 1
            x, y, w, h = cv2.boundingRect(cnt)
            cv2.rectangle(result, (x, y), (x+w, y+h), (0, 255, 0), 2)

        cv2.putText(result, f"Motions: {motion_count}", (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        cv2.imshow("运动检测", result)
        cv2.imshow("差分掩码", thresh)

        prev_gray = gray
        if cv2.waitKey(30) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

12.4 背景减除

import cv2

cap = cv2.VideoCapture("video.mp4")

# 方法 1: MOG2（高斯混合模型）
bg_subtractor = cv2.createBackgroundSubtractorMOG2(
    history=500,          # 历史帧数
    varThreshold=16,      # 方差阈值
    detectShadows=True    # 检测阴影
)

# 方法 2: KNN
# bg_subtractor = cv2.createBackgroundSubtractorKNN(
#     history=500, dist2Threshold=400, detectShadows=True
# )

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # 应用背景减除
    fg_mask = bg_subtractor.apply(frame)

    # 可视化
    # 获取背景模型
    bg_image = bg_subtractor.getBackgroundImage()

    cv2.imshow("前景掩码", fg_mask)
    cv2.imshow("原始", frame)

    if cv2.waitKey(30) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

背景减除方法对比

方法	类	速度	效果	适用
MOG2	`createBackgroundSubtractorMOG2`	快	好	通用
KNN	`createBackgroundSubtractorKNN`	中	好	复杂场景
GMG	`bgsegm.createBackgroundSubtractorGMG`	慢	一般	静态背景

12.5 光流估计

12.5.1 稀疏光流（Lucas-Kanade）

import cv2
import numpy as np

cap = cv2.VideoCapture("video.mp4")

# 参数
lk_params = dict(
    winSize=(15, 15),
    maxLevel=2,
    criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)
)

# 读取第一帧
ret, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)

# 检测初始特征点
p0 = cv2.goodFeaturesToTrack(old_gray, maxCorners=100,
                              qualityLevel=0.3, minDistance=7)

# 创建随机颜色
colors = np.random.randint(0, 255, (100, 3))

mask = np.zeros_like(old_frame)

while True:
    ret, frame = cap.read()
    if not ret:
        break
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # 计算光流（追踪特征点）
    p1, st, err = cv2.calcOpticalFlowPyrLK(
        old_gray, gray, p0, None, **lk_params
    )

    if p1 is not None:
        # 只保留好的追踪点
        good_new = p1[st == 1]
        good_old = p0[st == 1]

        # 绘制轨迹
        for i, (new, old) in enumerate(zip(good_new, good_old)):
            a, b = new.ravel()
            c, d = old.ravel()
            mask = cv2.line(mask, (int(a), int(b)), (int(c), int(d)),
                           colors[i % 100].tolist(), 2)
            frame = cv2.circle(frame, (int(a), int(b)), 3,
                              colors[i % 100].tolist(), -1)

        # 更新特征点
        p0 = good_new.reshape(-1, 1, 2)

    result = cv2.add(frame, mask)
    cv2.imshow("光流追踪", result)

    old_gray = gray
    if cv2.waitKey(30) & 0xFF == ord('q'):
        break

    # 每隔 N 帧重新检测特征点
    if len(p0) < 20:
        p0 = cv2.goodFeaturesToTrack(gray, maxCorners=100,
                                      qualityLevel=0.3, minDistance=7)
        mask = np.zeros_like(old_frame)

cap.release()
cv2.destroyAllWindows()

12.5.2 稠密光流（Farneback）

import cv2
import numpy as np

cap = cv2.VideoCapture("video.mp4")
ret, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
hsv = np.zeros_like(old_frame)
hsv[..., 1] = 255  # 饱和度设满

while True:
    ret, frame = cap.read()
    if not ret:
        break
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # 计算稠密光流
    flow = cv2.calcOpticalFlowFarneback(
        old_gray, gray, None,
        pyr_scale=0.5,  # 金字塔缩放
        levels=3,        # 金字塔层数
        winsize=15,      # 窗口大小
        iterations=3,    # 迭代次数
        poly_n=5,        # 多项式展开邻域
        poly_sigma=1.2,  # 高斯标准差
        flags=0
    )

    # 转换为极坐标
    magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])

    # HSV 可视化（H=方向，V=幅度）
    hsv[..., 0] = angle * 180 / np.pi / 2
    hsv[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
    rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

    cv2.imshow("稠密光流", rgb)

    old_gray = gray
    if cv2.waitKey(30) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

12.6 目标追踪器

import cv2

cap = cv2.VideoCapture(0)

# 选择追踪器
# OpenCV 4.x 推荐: CSRT, KCF
tracker = cv2.TrackerCSRT_create()
# 其他选项:
# tracker = cv2.TrackerKCF_create()
# tracker = cv2.legacy.TrackerMOSSE_create()

# 读取第一帧，选择 ROI
ret, frame = cap.read()
bbox = cv2.selectROI("选择目标", frame, False)
tracker.init(frame, bbox)

while True:
    ret, frame = cap.read()
    if not ret:
        break

    # 更新追踪器
    success, bbox = tracker.update(frame)

    if success:
        x, y, w, h = [int(v) for v in bbox]
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        cv2.putText(frame, "Tracking", (x, y-10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
    else:
        cv2.putText(frame, "Lost!", (100, 80),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    cv2.imshow("追踪", frame)
    if cv2.waitKey(30) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

追踪器对比

追踪器	速度	精度	鲁棒性	说明
CSRT	★★★☆☆	★★★★★	★★★★	最佳精度（推荐）
KCF	★★★★☆	★★★★	★★★	平衡选择
MOSSE	★★★★★	★★★	★★	最快
MedianFlow	★★★★☆	★★★	★★	匀速运动

12.7 视频处理最佳实践

"""
video_utils.py — 视频处理工具集
"""
import cv2
import time

def get_video_info(path):
    """获取视频元信息"""
    cap = cv2.VideoCapture(path)
    info = {
        "width": int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
        "height": int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
        "fps": cap.get(cv2.CAP_PROP_FPS),
        "total_frames": int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
        "codec": int(cap.get(cv2.CAP_PROP_FOURCC)),
    }
    info["duration_sec"] = info["total_frames"] / info["fps"]
    cap.release()
    return info

def process_video(input_path, output_path, process_func):
    """通用视频处理框架"""
    cap = cv2.VideoCapture(input_path)
    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))

    frame_idx = 0
    start_time = time.time()

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        processed = process_func(frame)
        out.write(processed)

        frame_idx += 1
        if frame_idx % 100 == 0:
            elapsed = time.time() - start_time
            speed = frame_idx / elapsed
            eta = (total - frame_idx) / speed
            print(f"进度: {frame_idx}/{total} ({frame_idx/total*100:.1f}%) "
                  f"速度: {speed:.1f} FPS ETA: {eta:.0f}s")

    cap.release()
    out.release()
    print(f"处理完成: {output_path}")

12.8 扩展阅读

资源	链接	说明
OpenCV 视频文档	docs.opencv.org/4.x/dd/d43/tutorial_py_video_display	视频 I/O
光流教程	docs.opencv.org/4.x/d4/dee/tutorial_optical_flow	光流详解
下一章	第 13 章 — DNN 深度学习模块	模型加载/推理

本章小结: 掌握了视频读取与写入、帧差分运动检测、背景减除、光流估计和目标追踪等核心视频处理技术。