强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

Hunspell 拼写检查完全教程 / 第 07 章:编程接口

第 07 章:编程接口

7.1 概述

Hunspell 提供了多种编程语言的绑定,核心是 C/C++ 库 libhunspell。本章涵盖各主流语言的集成方案。

语言绑定库安装方式成熟度
C/C++libhunspell系统包★★★★★
Pythonpyhunspell / pyhunspell2 / pyspellcheckerpip★★★★
Node.jsnspell / nodehun / hunspell-asmnpm★★★★
Gogohunspell / go-spellinggo get★★★
Rusthunspell-rs / hunspell-syscargo★★★
Javahunspell-bridj / jhunspellmaven★★
C#/.NETHunspell4Net / WeCantSpell.Hunspellnuget★★★
PHPpspell (PHP 扩展)pecl★★★

7.2 C API

7.2.1 头文件

/* hunspell.h — 核心 API 声明 */
#include <hunspell/hunspell.h>

7.2.2 核心 API 函数

函数说明参数
Hunspell_create(affpath, dpath)创建词典句柄aff 和 dic 文件路径
Hunspell_destroy(pHunspell)释放句柄句柄指针
Hunspell_spell(pHunspell, word)检查单词是否正确返回 1=正确, 0=错误
Hunspell_suggest(pHunspell, slst, word)获取建议列表建议数组指针、单词
Hunspell_add(pHunspell, word)添加单词到会话词典单词
Hunspell_add_with_affix(pHunspell, word, model)以词根为模板添加单词、模型词
Hunspell_remove(pHunspell, word)从词典中移除单词
Hunspell_analyze(pHunspell, slst, word)形态学分析结果数组、单词
Hunspell_stem(pHunspell, slst, word)词干提取结果数组、单词
Hunspell_generate(pHunspell, slst, word, word2)生成词形结果数组、词、模型
Hunspell_free_list(pHunspell, slst, n)释放建议/分析列表数组指针、数量

7.2.3 完整 C 示例

/* spellcheck_example.c — Hunspell C API 完整示例 */
#include <hunspell/hunspell.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_WORD_LEN 256
#define MAX_SUGGESTIONS 20

int main(int argc, char *argv[]) {
    const char *aff_path = "/usr/share/hunspell/en_US.aff";
    const char *dic_path = "/usr/share/hunspell/en_US.dic";
    
    /* 可通过命令行参数指定词典路径 */
    if (argc >= 3) {
        aff_path = argv[1];
        dic_path = argv[2];
    }
    
    /* 1. 创建 Hunspell 句柄 */
    Hunhandle *handle = Hunspell_create(aff_path, dic_path);
    if (!handle) {
        fprintf(stderr, "错误:无法加载词典 %s / %s\n", aff_path, dic_path);
        return 1;
    }
    printf("词典加载成功: %s\n", dic_path);
    
    /* 2. 拼写检查 */
    const char *test_words[] = {
        "hello", "world", "helo", "programming",
        "progrmming", "correct", "corect", NULL
    };
    
    printf("\n=== 拼写检查 ===\n");
    for (int i = 0; test_words[i]; i++) {
        int correct = Hunspell_spell(handle, test_words[i]);
        printf("  %-15s → %s\n", test_words[i], correct ? "✓ 正确" : "✗ 错误");
    }
    
    /* 3. 获取建议 */
    printf("\n=== 拼写建议 ===\n");
    char **sug_list = NULL;
    const char *misspelled[] = {"helo", "progrmming", "wrold", NULL};
    
    for (int i = 0; misspelled[i]; i++) {
        int count = Hunspell_suggest(handle, &sug_list, misspelled[i]);
        printf("  '%s' 的建议 (%d 个):", misspelled[i], count);
        for (int j = 0; j < count && j < 5; j++) {
            printf(" %s", sug_list[j]);
        }
        printf("\n");
        Hunspell_free_list(handle, &sug_list, count);
    }
    
    /* 4. 形态学分析 */
    printf("\n=== 形态学分析 ===\n");
    char **stem_list = NULL;
    const char *analyze_words[] = {"running", "wolves", "unhappiness", NULL};
    
    for (int i = 0; analyze_words[i]; i++) {
        /* 词干提取 */
        int stem_count = Hunspell_stem(handle, &stem_list, analyze_words[i]);
        printf("  '%s' → 词干:", analyze_words[i]);
        for (int j = 0; j < stem_count; j++) {
            printf(" %s", stem_list[j]);
        }
        printf("\n");
        Hunspell_free_list(handle, &stem_list, stem_count);
        
        /* 形态分析 */
        char **morph_list = NULL;
        int morph_count = Hunspell_analyze(handle, &morph_list, analyze_words[i]);
        if (morph_count > 0) {
            printf("  '%s' → 形态:", analyze_words[i]);
            for (int j = 0; j < morph_count; j++) {
                printf(" [%s]", morph_list[j]);
            }
            printf("\n");
            Hunspell_free_list(handle, &morph_list, morph_count);
        }
    }
    
    /* 5. 添加单词到会话词典 */
    printf("\n=== 添加单词 ===\n");
    Hunspell_add(handle, "Hunspell");
    Hunspell_add(handle, "Nemeth");
    
    int check1 = Hunspell_spell(handle, "Hunspell");
    int check2 = Hunspell_spell(handle, "Nemeth");
    printf("  添加后 'Hunspell': %s\n", check1 ? "✓" : "✗");
    printf("  添加后 'Nemeth': %s\n", check2 ? "✓" : "✗");
    
    /* 6. 清理 */
    Hunspell_destroy(handle);
    printf("\n资源已释放\n");
    
    return 0;
}

编译运行:

gcc spellcheck_example.c -o spellcheck_example $(pkg-config --cflags --libs hunspell)
./spellcheck_example

7.2.4 批量检查文本

/* spellcheck_text.c — 批量检查文本文件 */
#include <hunspell/hunspell.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define MAX_WORD 256

/* 从文本中提取下一个单词 */
int next_word(FILE *fp, char *word, int max_len) {
    int c, i = 0;
    
    /* 跳过非字母字符 */
    while ((c = fgetc(fp)) != EOF && !isalpha(c)) {
        if (c == '\n') return -2; /* 行结束标记 */
    }
    if (c == EOF) return -1;
    
    /* 收集字母 */
    word[i++] = c;
    while ((c = fgetc(fp)) != EOF && (isalpha(c) || c == '\'' || c == '-')) {
        if (i < max_len - 1) word[i++] = c;
    }
    word[i] = '\0';
    
    if (c != EOF) ungetc(c, fp);
    return i > 0 ? 0 : -1;
}

int main(int argc, char *argv[]) {
    if (argc < 2) {
        fprintf(stderr, "用法: %s <文件> [词典路径]\n", argv[0]);
        return 1;
    }
    
    const char *aff = "/usr/share/hunspell/en_US.aff";
    const char *dic = "/usr/share/hunspell/en_US.dic";
    if (argc >= 4) {
        aff = argv[2];
        dic = argv[3];
    }
    
    Hunhandle *H = Hunspell_create(aff, dic);
    if (!H) { fprintf(stderr, "加载词典失败\n"); return 1; }
    
    FILE *fp = fopen(argv[1], "r");
    if (!fp) { perror("打开文件失败"); Hunspell_destroy(H); return 1; }
    
    char word[MAX_WORD];
    int line = 1, total = 0, errors = 0;
    
    while (1) {
        int result = next_word(fp, word, MAX_WORD);
        if (result == -2) { line++; continue; }
        if (result == -1) break;
        
        total++;
        if (!Hunspell_spell(H, word)) {
            char **sugs = NULL;
            int n = Hunspell_suggest(H, &sugs, word);
            printf("行 %d: '%s'", line, word);
            if (n > 0) {
                printf(" → 建议: %s", sugs[0]);
                if (n > 1) printf(", %s", sugs[1]);
            }
            printf("\n");
            Hunspell_free_list(H, &sugs, n);
            errors++;
        }
    }
    
    printf("\n总计: %d 词, %d 拼写错误 (%.1f%%)\n",
           total, errors, total > 0 ? 100.0 * errors / total : 0.0);
    
    fclose(fp);
    Hunspell_destroy(H);
    return 0;
}

7.3 Python

7.3.1 方案对比

包名说明推荐度
pyspellchecker纯 Python 实现,不依赖 Hunspell 二进制★★★★★
pyhunspelllibhunspell 的 Python 绑定★★★
pyhunspell2pyhunspell 的现代版本★★★
cyhunspellCython 绑定★★
hunspell-interface子进程包装★★

7.3.2 pyspellchecker(推荐)

pip install pyspellchecker
#!/usr/bin/env python3
"""pyspellchecker 基本使用示例"""
from spellchecker import SpellChecker

# 1. 创建检查器实例(默认英语)
spell = SpellChecker()

# 2. 检查单词是否正确
print(spell.correction("helo"))      # → "hello"
print(spell.correction("wrold"))     # → "world"
print(spell["hello"])                # → 0(词频,0 表示不在词典中)
print(spell["helo"])                 # → 0

# 3. 获取建议列表
print(spell.candidates("helo"))      # → {'helo', 'hello', 'helot', ...}
print(spell.candidates("wrold"))     # → {'world', 'wold', 'would', ...}

# 4. 检查整个文本
text = "This sentense has a few typose in it"
misspelled = spell.unknown(text.split())
print(f"拼写错误: {misspelled}")     # → {'sentense', 'typose'}

# 5. 获取建议及概率
for word in misspelled:
    correction = spell.correction(word)
    candidates = spell.candidates(word)
    print(f"  '{word}' → '{correction}' (候选: {candidates})")

7.3.3 pyspellchecker 高级功能

#!/usr/bin/env python3
"""pyspellchecker 高级功能"""
from spellchecker import SpellChecker
import re

# ========== 多语言支持 ==========
# 内置语言: en, es, fr, de, pt, ru, ar, lv
spell_en = SpellChecker(language='en')
spell_fr = SpellChecker(language='fr')
spell_de = SpellChecker(language='de')

# ========== 自定义词典 ==========
spell = SpellChecker()

# 加载自定义词典文件
spell.word_frequency.load_text_file('.hunspell/project.dic')

# 或者从集合加载
custom_words = {'API', 'JSON', 'HTTP', 'GraphQL', 'Docker', 'Kubernetes'}
spell.word_frequency.load_words(custom_words)

# 单个添加
spell.word_frequency.add('TypeScript')
spell.word_frequency.add('Golang')

# 临时忽略(不影响词典,只影响当前实例)
# pyspellchecker 没有直接的忽略,通过添加实现

# ========== 词频分析 ==========
# 获取单词在词典中的频率
print(spell['the'])       # → 高频词
print(spell['hello'])     # → 中频词
print(spell['zyzzyva'])   # → 低频词

# ========== 文本检查封装 ==========
def spellcheck_text(text: str, spell: SpellChecker) -> list[dict]:
    """详细文本拼写检查"""
    results = []
    # 提取单词(支持连字符和缩写)
    words = re.findall(r"\b[a-zA-Z'-]+\b", text)
    
    # 计算位置
    pos = 0
    for word in words:
        idx = text.find(word, pos)
        pos = idx + len(word)
        
        if len(word) < 3:  # 跳过短词
            continue
            
        if spell.unknown([word]):
            correction = spell.correction(word)
            candidates = list(spell.candidates(word))[:5]
            results.append({
                'word': word,
                'position': idx,
                'correction': correction,
                'candidates': candidates
            })
    
    return results

# 使用
text = "This documnet explians the basc usage of pyspellcheker"
results = spellcheck_text(text, spell)
for r in results:
    print(f"  位置 {r['position']}: '{r['word']}' → '{r['correction']}'")

7.3.4 pyhunspell(libhunspell 绑定)

# 安装(需要系统安装 libhunspell-dev)
pip install pyhunspell
# 或
pip install pyhunspell2
#!/usr/bin/env python3
"""pyhunspell 使用示例"""
import hunspell

# 1. 创建 Hunspell 对象
# 参数: aff 文件路径, dic 文件路径
hobj = hunspell.HunSpell(
    '/usr/share/hunspell/en_US.aff',
    '/usr/share/hunspell/en_US.dic'
)

# 2. 拼写检查
print(hobj.spell('hello'))     # → True
print(hobj.spell('helo'))      # → False

# 3. 获取建议
suggestions = hobj.suggest('helo')
print(f"建议: {suggestions}")   # → ['hello', 'Helo', 'helot', 'help']

# 4. 添加单词
hobj.add('Hunspell')
print(hobj.spell('Hunspell'))  # → True

# 5. 词干提取
stems = hobj.stem('running')
print(f"词干: {stems}")        # → ['run']

# 6. 形态分析
analysis = hobj.analyze('unhappiness')
print(f"形态: {analysis}")     # → ['un+happi+ness']

# 7. 生成词形
generated = hobj.generate('happy', 'unhappy')
print(f"生成: {generated}")

7.3.5 子进程方案(无需编译绑定)

#!/usr/bin/env python3
"""使用 subprocess 调用 hunspell 命令行(无需绑定库)"""
import subprocess
import re

class HunspellChecker:
    """Hunspell 命令行封装"""
    
    def __init__(self, dictionary: str = "en_US", personal_dict: str = None):
        self.dictionary = dictionary
        self.personal_dict = personal_dict
    
    def _run(self, args: list[str], input_text: str = "") -> str:
        cmd = ["hunspell"] + args
        if self.personal_dict:
            cmd.extend(["-p", self.personal_dict])
        result = subprocess.run(
            cmd, input=input_text,
            capture_output=True, text=True
        )
        return result.stdout
    
    def check_word(self, word: str) -> bool:
        """检查单词是否正确"""
        output = self._run(["-d", self.dictionary, "-l"], word)
        return word not in output
    
    def suggest(self, word: str, limit: int = 5) -> list[str]:
        """获取拼写建议"""
        output = self._run(["-a", "-d", self.dictionary, "-L", str(limit)], word)
        for line in output.strip().split("\n"):
            if line.startswith("&"):
                match = re.match(r"& \S+ \d+ \d+: (.+)", line)
                if match:
                    return [s.strip() for s in match.group(1).split(",")]
        return []
    
    def find_misspellings(self, text: str) -> list[dict]:
        """找出所有拼写错误"""
        output = self._run(["-a", "-d", self.dictionary], text)
        
        errors = []
        for line in output.strip().split("\n"):
            if line.startswith("&"):
                match = re.match(r"& (\S+) \d+ \d+: (.+)", line)
                if match:
                    errors.append({
                        'word': match.group(1),
                        'suggestions': [s.strip() for s in match.group(2).split(",")]
                    })
            elif line.startswith("#"):
                match = re.match(r"# (\S+) \d+", line)
                if match:
                    errors.append({
                        'word': match.group(1),
                        'suggestions': []
                    })
        return errors
    
    def get_stem(self, word: str) -> str:
        """获取词干"""
        output = self._run(["-s", "-d", self.dictionary], word)
        for line in output.strip().split("\n"):
            if "->" in line:
                return line.split("->")[1].strip()
        return word

# 使用示例
checker = HunspellChecker("en_US")
print(checker.check_word("hello"))           # True
print(checker.check_word("helo"))            # False
print(checker.suggest("helo"))               # ['hello', 'Helo', ...]
print(checker.find_misspellings("This sentense has typose"))
# [{'word': 'sentense', 'suggestions': ['sentence', ...]}, ...]

7.4 Node.js

7.4.1 nspell(推荐)

npm install nspell
// nspell 基本使用
const nspell = require('nspell');
const fs = require('fs');

// 加载词典
const aff = fs.readFileSync('/usr/share/hunspell/en_US.aff');
const dic = fs.readFileSync('/usr/share/hunspell/en_US.dic');
const spell = nspell(aff, dic);

// 拼写检查
console.log(spell.correct('hello'));    // true
console.log(spell.correct('helo'));     // false

// 获取建议
console.log(spell.suggest('helo'));
// ['hello', 'Helo', 'helot', 'help']

// 词干提取
console.log(spell.stem('running'));     // ['run']

// 添加个人词典
spell.add('Hunspell');
console.log(spell.correct('Hunspell')); // true

// 添加词根形式
spell.add('API', 'word');               // 添加为 "word" 类型
spell.add('APIs', 'word');

// 移除单词
spell.remove('Hunspell');
console.log(spell.correct('Hunspell')); // false

7.4.2 Express.js 中间件

// spellcheck_middleware.js — Express.js 拼写检查中间件
const nspell = require('nspell');
const fs = require('fs');
const path = require('path');

class SpellCheckerService {
  constructor(dicts, customDictPath = null) {
    // 加载多个词典
    this.checkers = {};
    for (const [lang, dictPath] of Object.entries(dicts)) {
      const aff = fs.readFileSync(`${dictPath}.aff`);
      const dic = fs.readFileSync(`${dictPath}.dic`);
      this.checkers[lang] = nspell(aff, dic);
    }
    
    // 加载自定义词典
    if (customDictPath && fs.existsSync(customDictPath)) {
      const customWords = fs.readFileSync(customDictPath, 'utf-8')
        .split('\n')
        .filter(line => line && !line.startsWith('#'));
      for (const checker of Object.values(this.checkers)) {
        customWords.forEach(word => checker.add(word.trim()));
      }
    }
  }
  
  checkText(text, lang = 'en') {
    const checker = this.checkers[lang];
    if (!checker) throw new Error(`不支持的语言: ${lang}`);
    
    const words = text.match(/\b[a-zA-Z'-]+\b/g) || [];
    const errors = [];
    const seen = new Set();
    
    for (const word of words) {
      if (word.length < 3 || seen.has(word.toLowerCase())) continue;
      seen.add(word.toLowerCase());
      
      if (!checker.correct(word)) {
        errors.push({
          word,
          suggestions: checker.suggest(word).slice(0, 5)
        });
      }
    }
    return errors;
  }
}

// 初始化服务
const spellService = new SpellCheckerService(
  { en: '/usr/share/hunspell/en_US' },
  path.join(__dirname, '.hunspell', 'project.dic')
);

// Express 中间件
function spellcheckMiddleware(req, res, next) {
  if (req.body && req.body.text) {
    const lang = req.body.lang || 'en';
    const errors = spellService.checkText(req.body.text, lang);
    req.spellcheck = { errors, count: errors.length };
  }
  next();
}

// API 路由
const express = require('express');
const app = express();
app.use(express.json());

app.post('/api/spellcheck', spellcheckMiddleware, (req, res) => {
  res.json({
    errors: req.spellcheck.errors,
    count: req.spellcheck.count
  });
});

app.listen(3000, () => console.log('拼写检查服务运行在 :3000'));

7.4.3 Browserify / Webpack 打包

// browser_spell.js — 浏览器端拼写检查
// 使用 nspell 的浏览器兼容版本

import nspell from 'nspell';

async function loadDictionary(lang = 'en-US') {
  // 从 CDN 或本地加载词典
  const [aff, dic] = await Promise.all([
    fetch(`/dictionaries/${lang}/${lang}.aff`).then(r => r.arrayBuffer()),
    fetch(`/dictionaries/${lang}/${lang}.dic`).then(r => r.arrayBuffer())
  ]);
  
  return nspell(Buffer.from(aff), Buffer.from(dic));
}

// 使用
const spell = await loadDictionary('en_US');

function checkInputElement(inputElement) {
  const text = inputElement.value;
  const words = text.match(/\b[a-zA-Z'-]+\b/g) || [];
  
  const errors = words
    .filter(w => w.length >= 3)
    .filter(w => !spell.correct(w))
    .map(w => ({ word: w, suggestions: spell.suggest(w).slice(0, 3) }));
  
  return errors;
}

7.5 Go

7.5.1 gohunspell

go get github.com/kapsteur/gohunspell
// main.go — Go Hunspell 示例
package main

import (
	"fmt"
	"log"
	"strings"

	gohunspell "github.com/kapsteur/gohunspell"
)

func main() {
	// 1. 创建 Hunspell 实例
	affPath := "/usr/share/hunspell/en_US.aff"
	dicPath := "/usr/share/hunspell/en_US.dic"

	hunspell, err := gohunspell.NewHunspell(affPath, dicPath)
	if err != nil {
		log.Fatalf("加载词典失败: %v", err)
	}
	defer hunspell.DeleteHunspell()

	// 2. 拼写检查
	testWords := []string{"hello", "helo", "world", "wrold", "programming"}
	for _, word := range testWords {
		correct := hunspell.Spell(word)
		status := "✓"
		if !correct {
			status = "✗"
		}
		fmt.Printf("  %s %s\n", status, word)
	}

	// 3. 获取建议
	fmt.Println("\n=== 建议 ===")
	misspelled := []string{"helo", "wrold", "progrmming"}
	for _, word := range misspelled {
		suggestions := hunspell.Suggest(word)
		fmt.Printf("  '%s' → %s\n", word, strings.Join(suggestions[:min(5, len(suggestions))], ", "))
	}

	// 4. 词干提取
	fmt.Println("\n=== 词干 ===")
	stems := hunspell.Stem("running")
	fmt.Printf("  'running' → %s\n", strings.Join(stems, ", "))
}

func min(a, b int) int {
	if a < b {
		return a
	}
	return b
}

7.5.2 Go HTTP 拼写检查服务

// server.go — Go HTTP 拼写检查 API
package main

import (
	"encoding/json"
	"log"
	"net/http"
	"sync"

	gohunspell "github.com/kapsteur/gohunspell"
)

type SpellRequest struct {
	Text string `json:"text"`
	Lang string `json:"lang"`
}

type SpellError struct {
	Word        string   `json:"word"`
	Suggestions []string `json:"suggestions"`
}

type SpellResponse struct {
	Errors []SpellError `json:"errors"`
	Count  int          `json:"count"`
}

type SpellServer struct {
	checkers map[string]*gohunspell.Hunspell
	mu       sync.RWMutex
}

func NewSpellServer(dicts map[string][2]string) (*SpellServer, error) {
	s := &SpellServer{
		checkers: make(map[string]*gohunspell.Hunspell),
	}
	for lang, paths := range dicts {
		h, err := gohunspell.NewHunspell(paths[0], paths[1])
		if err != nil {
			return nil, fmt.Errorf("加载 %s 词典失败: %w", lang, err)
		}
		s.checkers[lang] = h
	}
	return s, nil
}

func (s *SpellServer) HandleSpellcheck(w http.ResponseWriter, r *http.Request) {
	var req SpellRequest
	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
		http.Error(w, "无效请求", 400)
		return
	}

	s.mu.RLock()
	checker, ok := s.checkers[req.Lang]
	s.mu.RUnlock()

	if !ok {
		checker = s.checkers["en"] // 默认英语
	}

	words := extractWords(req.Text)
	errors := make([]SpellError, 0)
	seen := make(map[string]bool)

	for _, word := range words {
		if len(word) < 3 || seen[word] {
			continue
		}
		seen[word] = true

		if !checker.Spell(word) {
			suggestions := checker.Suggest(word)
			if len(suggestions) > 5 {
				suggestions = suggestions[:5]
			}
			errors = append(errors, SpellError{
				Word:        word,
				Suggestions: suggestions,
			})
		}
	}

	json.NewEncoder(w).Encode(SpellResponse{
		Errors: errors,
		Count:  len(errors),
	})
}

func main() {
	server, err := NewSpellServer(map[string][2]string{
		"en": {"/usr/share/hunspell/en_US.aff", "/usr/share/hunspell/en_US.dic"},
	})
	if err != nil {
		log.Fatal(err)
	}

	http.HandleFunc("/api/spellcheck", server.HandleSpellcheck)
	log.Println("拼写检查服务运行在 :8080")
	log.Fatal(http.ListenAndServe(":8080", nil))
}

7.6 Rust

7.6.1 hunspell-rs

# Cargo.toml
[dependencies]
hunspell-rs = "0.4"
// src/main.rs — Rust Hunspell 示例
use hunspell_rs::{Hunspell, HunspellInitCheckType};

fn main() {
    // 1. 创建 Hunspell 实例
    let aff_path = "/usr/share/hunspell/en_US.aff";
    let dic_path = "/usr/share/hunspell/en_US.dic";
    
    let hunspell = Hunspell::new(aff_path, dic_path);
    
    // 2. 拼写检查
    let test_words = vec!["hello", "helo", "world", "wrold"];
    println!("=== 拼写检查 ===");
    for word in &test_words {
        let correct = hunspell.check(word);
        println!("  {} {}", if correct { "✓" } else { "✗" }, word);
    }
    
    // 3. 获取建议
    println!("\n=== 建议 ===");
    let misspelled = vec!["helo", "wrold"];
    for word in &misspelled {
        let suggestions = hunspell.suggest(word);
        println!("  '{}' → {:?}", word, &suggestions[..suggestions.len().min(5)]);
    }
    
    // 4. 形态分析
    println!("\n=== 形态分析 ===");
    let analysis = hunspell.analyze("unhappiness");
    println!("  'unhappiness' → {:?}", analysis);
    
    // 5. 词干提取
    let stems = hunspell.stem("running");
    println!("  'running' → {:?}", stems);
    
    // 6. 添加单词
    hunspell.add("Hunspell");
    println!("\n  添加后 'Hunspell': {}", hunspell.check("Hunspell"));
}

7.7 PHP

7.7.1 pspell 扩展

# 安装 pspell 扩展
sudo apt install php-pspell
# 或编译 PHP 时启用 --with-pspell
<?php
// spellcheck.php — PHP Hunspell 示例

// 1. 加载词典
$dict = pspell_new("en", "", "", "", PSPELL_FAST | PSPELL_RUN_TOGETHER);
if (!$dict) {
    die("错误:无法加载词典\n");
}

// 2. 拼写检查
$test_words = ["hello", "helo", "world", "wrold", "programming"];
echo "=== 拼写检查 ===\n";
foreach ($test_words as $word) {
    $correct = pspell_check($dict, $word);
    echo sprintf("  %s %s\n", $correct ? "✓" : "✗", $word);
}

// 3. 获取建议
echo "\n=== 建议 ===\n";
$misspelled = ["helo", "wrold", "progrmming"];
foreach ($misspelled as $word) {
    $suggestions = pspell_suggest($dict, $word);
    echo sprintf("  '%s' → %s\n", $word, implode(", ", array_slice($suggestions, 0, 5)));
}

// 4. 添加个人词典单词
pspell_add_to_personal($dict, "API");
pspell_add_to_personal($dict, "JSON");
pspell_save_wordlist($dict, "/tmp/personal_dict.txt");

echo "\n  添加后 'API': " . (pspell_check($dict, "API") ? "✓" : "✗") . "\n";

// 5. 文本检查函数
function spellcheck_text(string $text, $dict): array {
    $errors = [];
    $words = preg_split('/\s+/', preg_replace('/[^\w\s\'-]/', '', $text));
    
    foreach ($words as $word) {
        if (strlen($word) < 3) continue;
        if (!pspell_check($dict, $word)) {
            $errors[] = [
                'word' => $word,
                'suggestions' => array_slice(pspell_suggest($dict, $word), 0, 5)
            ];
        }
    }
    return $errors;
}

// 使用
$text = "This documnet explians the basc usage of pspell.";
$results = spellcheck_text($text, $dict);
echo "\n=== 文本检查 ===\n";
foreach ($results as $r) {
    echo sprintf("  '%s' → 建议: %s\n", $r['word'], implode(', ', $r['suggestions']));
}
?>

7.8 多语言集成策略

7.8.1 统一接口设计

#!/usr/bin/env python3
"""多语言拼写检查器 — 统一接口"""
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional

@dataclass
class SpellResult:
    word: str
    is_correct: bool
    suggestions: list[str]
    language: str

class SpellCheckerBackend(ABC):
    """拼写检查器后端抽象基类"""
    
    @abstractmethod
    def check(self, word: str) -> bool:
        ...
    
    @abstractmethod
    def suggest(self, word: str, limit: int = 5) -> list[str]:
        ...
    
    @abstractmethod
    def add(self, word: str) -> None:
        ...

class HunspellBackend(SpellCheckerBackend):
    """基于子进程的 Hunspell 后端"""
    
    def __init__(self, dictionary: str, personal_dict: Optional[str] = None):
        self.dictionary = dictionary
        self.personal_dict = personal_dict
        self._custom_words = set()
    
    def check(self, word: str) -> bool:
        if word in self._custom_words:
            return True
        import subprocess
        result = subprocess.run(
            ["hunspell", "-d", self.dictionary, "-l"],
            input=word, capture_output=True, text=True
        )
        return word not in result.stdout
    
    def suggest(self, word: str, limit: int = 5) -> list[str]:
        import subprocess, re
        args = ["hunspell", "-a", "-d", self.dictionary, "-L", str(limit)]
        if self.personal_dict:
            args.extend(["-p", self.personal_dict])
        result = subprocess.run(args, input=word, capture_output=True, text=True)
        for line in result.stdout.strip().split("\n"):
            if line.startswith("&"):
                match = re.match(r"& \S+ \d+ \d+: (.+)", line)
                if match:
                    return [s.strip() for s in match.group(1).split(",")]
        return []
    
    def add(self, word: str) -> None:
        self._custom_words.add(word)

class MultiLanguageSpellChecker:
    """多语言拼写检查管理器"""
    
    def __init__(self):
        self._backends: dict[str, SpellCheckerBackend] = {}
    
    def register(self, language: str, backend: SpellCheckerBackend):
        self._backends[language] = backend
    
    def check(self, word: str, language: str) -> SpellResult:
        backend = self._backends.get(language)
        if not backend:
            raise ValueError(f"未注册语言: {language}")
        
        is_correct = backend.check(word)
        suggestions = backend.suggest(word) if not is_correct else []
        
        return SpellResult(
            word=word,
            is_correct=is_correct,
            suggestions=suggestions,
            language=language
        )
    
    def check_multilingual(self, text: str, detect_lang=None) -> list[SpellResult]:
        """检查混合语言文本"""
        import re
        words = re.findall(r'\b[a-zA-Z\u00C0-\u024F\u0400-\u04FF]{3,}\b', text)
        results = []
        for word in words:
            lang = detect_lang(word) if detect_lang else "en"
            if lang in self._backends:
                results.append(self.check(word, lang))
        return results

# 使用示例
checker = MultiLanguageSpellChecker()
checker.register("en", HunspellBackend("en_US"))
checker.register("fr", HunspellBackend("fr"))
checker.register("de", HunspellBackend("de_DE"))

# 单语言检查
result = checker.check("helo", "en")
print(f"'{result.word}' → {'✓' if result.is_correct else '✗'} {result.suggestions}")

# 多语言检查
text = "This English text avec du Français und Deutsch mischung"
results = checker.check_multilingual(text, detect_lang=lambda w: "en")  # 简化:假设英语

7.9 本章小结

语言推荐方案安装命令适用场景
C/C++libhunspellapt install libhunspell-dev底层集成
Pythonpyspellcheckerpip install pyspellchecker快速开发
Node.jsnspellnpm install nspellWeb 应用
Gogohunspellgo get kapsteur/gohunspell微服务
Rusthunspell-rscargo add hunspell-rs高性能服务
PHPpspellapt install php-pspellWeb 后端

扩展阅读