Aspell 拼写检查完全教程 / 第6章 编程接口
第 6 章:编程接口
本章介绍如何通过编程方式调用 Aspell——包括 C API、Python 绑定,以及其他语言的集成方法。
6.1 编程接口总览
| 语言 / 方式 | 库 / 绑定 | 说明 |
|---|---|---|
| C / C++ | libaspell | 官方共享库,所有绑定的基础 |
| Python | aspell-python | 第三方 Python 绑定(ctypes) |
| Perl | Text::Aspell | Perl CPAN 模块 |
| Ruby | raspell | Ruby gem |
| 管道协议 | aspell -a | 跨语言通用方案(推荐) |
推荐:对于大多数场景,建议使用管道协议(
aspell -a)集成——它跨语言、跨平台、无需编译依赖,且性能足够。
6.2 C API
6.2.1 API 头文件
// 头文件位置
#include <aspell.h>
6.2.2 核心数据结构
// 拼写检查器实例
AspellSpeller *speller;
// 配置对象
AspellConfig *config;
// 可能的错误信息
AspellCanHaveError *possible_err;
// 建议列表迭代器
AspellWordList *suggestions;
AspellStringEnumeration *elements;
6.2.3 完整 C 示例
/* spell_check.c — Aspell C API 完整示例 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <aspell.h>
/* 检查单个单词 */
int check_word(AspellSpeller *speller, const char *word) {
/* aspell_speller_check 返回:
* 1 = 拼写正确
* 0 = 拼写错误
* -1 = 错误
*/
int result = aspell_speller_check(speller, word, strlen(word));
if (result == 1) {
printf("✓ 正确: %s\n", word);
} else if (result == 0) {
printf("✗ 错误: %s\n", word);
/* 获取建议 */
const AspellWordList *suggestions =
aspell_speller_suggest(speller, word, strlen(word));
AspellStringEnumeration *elements =
aspell_word_list_elements(suggestions);
const char *suggestion;
printf(" 建议: ");
int first = 1;
while ((suggestion = aspell_string_enumeration_next(elements)) != NULL) {
if (!first) printf(", ");
printf("%s", suggestion);
first = 0;
}
printf("\n");
delete_aspell_string_enumeration(elements);
} else {
fprintf(stderr, "错误: %s\n",
aspell_speller_error_message(speller));
}
return result;
}
/* 添加单词到个人词典 */
void add_to_personal(AspellSpeller *speller, const char *word) {
aspell_speller_add_to_personal(speller, word, strlen(word));
printf("已添加到个人词典: %s\n", word);
}
/* 保存个人词典 */
void save_personal(AspellSpeller *speller) {
AspellCanHaveError *ret = aspell_speller_save_all_word_lists(speller);
if (ret != NULL) {
fprintf(stderr, "保存词典失败: %s\n",
aspell_error_message(ret));
delete_aspell_can_have_error(ret);
} else {
printf("个人词典已保存\n");
}
}
int main(int argc, char *argv[]) {
/* 创建配置 */
AspellConfig *config = new_aspell_config();
aspell_config_replace(config, "lang", "en_US");
aspell_config_replace(config, "sug-mode", "ultra");
/* 创建拼写检查器 */
AspellCanHaveError *possible_err = new_aspell_speller(config);
if (aspell_error_number(possible_err) != 0) {
fprintf(stderr, "初始化失败: %s\n",
aspell_error_message(possible_err));
delete_aspell_can_have_error(possible_err);
return 1;
}
AspellSpeller *speller = to_aspell_speller(possible_err);
/* 检查单词 */
const char *words[] = {"hello", "teh", "world", "aspell", "programing"};
int num_words = sizeof(words) / sizeof(words[0]);
for (int i = 0; i < num_words; i++) {
check_word(speller, words[i]);
}
/* 添加一个新词 */
add_to_personal(speller, "aspell");
save_personal(speller);
/* 清理 */
delete_aspell_speller(speller);
delete_aspell_config(config);
return 0;
}
6.2.4 编译与运行
# 编译(需要 libaspell-dev)
gcc -o spell_check spell_check.c $(pkg-config --cflags --libs aspell)
# 运行
./spell_check
# 输出:
# ✓ 正确: hello
# ✗ 错误: teh
# 建议: the, tea, tee
# ✓ 正确: world
# ✓ 正确: aspell
# ✗ 错误: programing
# 建议: programming, program ring, program-ring, program
# 已添加到个人词典: aspell
# 个人词典已保存
6.2.5 C API 参考
| 函数 | 说明 |
|---|---|
new_aspell_config() | 创建新配置对象 |
aspell_config_replace(config, key, value) | 设置配置项 |
new_aspell_speller(config) | 创建拼写检查器实例 |
aspell_speller_check(speller, word, len) | 检查单词 |
aspell_speller_suggest(speller, word, len) | 获取建议列表 |
aspell_speller_add_to_personal(speller, word, len) | 添加到个人词典 |
aspell_speller_add_to_session(speller, word, len) | 添加到会话(临时) |
aspell_speller_save_all_word_lists(speller) | 保存个人词典 |
aspell_speller_error_message(speller) | 获取错误信息 |
delete_aspell_speller(speller) | 销毁实例 |
delete_aspell_config(config) | 销毁配置 |
6.3 Python 绑定
6.3.1 安装 aspell-python
# 使用 pip 安装
pip install aspell-python-py3
# 或者使用系统包管理器
sudo apt-get install python3-aspell
6.3.2 基本使用
#!/usr/bin/env python3
"""aspell_basic.py — Aspell Python 绑定基本示例"""
import aspell
# 创建 Speller 实例(默认英语)
s = aspell.Speller('en')
# 检查单词
word = "teh"
if s.check(word):
print(f"✓ 正确: {word}")
else:
print(f"✗ 错误: {word}")
suggestions = s.suggest(word)
print(f" 建议: {', '.join(suggestions[:5])}")
# 输出:
# ✗ 错误: teh
# 建议: the, tea, tee
6.3.3 批量检查
#!/usr/bin/env python3
"""aspell_batch.py — 批量检查文本中的拼写错误"""
import aspell
from typing import Dict, List
def check_text(text: str, lang: str = 'en') -> Dict[str, List[str]]:
"""
检查文本中的拼写错误,返回错误单词及其建议。
Args:
text: 要检查的文本
lang: 语言代码
Returns:
字典,键为错误单词,值为建议列表
"""
s = aspell.Speller(lang)
errors = {}
# 简单分词(按空白和标点)
import re
words = re.findall(r'\b[a-zA-Z]+\b', text)
for word in words:
if not s.check(word):
if word not in errors:
errors[word] = s.suggest(word)[:5]
return errors
def main():
text = """
Aspell is a poweful spell checker for Unix systems.
It was designed as a replacment for ispell.
The program uses phonetc algorithms.
"""
errors = check_text(text)
if errors:
print(f"发现 {len(errors)} 个拼写错误:")
for word, suggestions in errors.items():
print(f" ✗ {word}")
print(f" 建议: {', '.join(suggestions)}")
else:
print("✓ 拼写检查通过")
if __name__ == '__main__':
main()
# 输出:
# 发现 3 个拼写错误:
# ✗ poweful
# 建议: powerful, poetically, pooful
# ✗ replacment
# 建议: replacement, replacemen, replaceable
# ✗ phonetc
# 建议: phoned, phones, photonic, Phoenicia, honk
6.3.4 自定义词典
#!/usr/bin/env python3
"""aspell_custom_dict.py — 使用自定义词典"""
import aspell
def check_with_custom_dict(text: str, dict_path: str) -> None:
"""使用自定义个人词典检查文本"""
s = aspell.Speller('en')
# 加载自定义个人词典
s.add_to_personal(dict_path)
import re
words = re.findall(r'\b[a-zA-Z]+\b', text)
for word in words:
if not s.check(word):
suggestions = s.suggest(word)[:3]
print(f"✗ {word} → {', '.join(suggestions)}")
# 使用项目词典检查
check_with_custom_dict(
"Aspell checks Dockerfiles and Kubernetes configs",
dict_path="./project.pws"
)
6.3.5 Flask Web 应用示例
#!/usr/bin/env python3
"""spell_server.py — Flask 拼写检查 Web 服务"""
from flask import Flask, request, jsonify
import aspell
import re
app = Flask(__name__)
# 全局 Speller 实例(线程安全注意)
speller = aspell.Speller('en')
@app.route('/check', methods=['POST'])
def check_spelling():
"""检查文本拼写"""
data = request.get_json()
text = data.get('text', '')
words = re.findall(r'\b[a-zA-Z]+\b', text)
errors = {}
for word in words:
if not speller.check(word) and word not in errors:
errors[word] = speller.suggest(word)[:5]
return jsonify({
'errors': errors,
'total_words': len(words),
'error_count': len(errors)
})
@app.route('/suggest/<word>', methods=['GET'])
def get_suggestions(word: str):
"""获取单词的拼写建议"""
suggestions = speller.suggest(word)
return jsonify({
'word': word,
'correct': speller.check(word),
'suggestions': suggestions[:10]
})
if __name__ == '__main__':
app.run(debug=True, port=5000)
# 启动服务
python spell_server.py
# 测试检查接口
curl -X POST http://localhost:5000/check \
-H "Content-Type: application/json" \
-d '{"text": "Aspell is a poweful spell checker"}'
# 输出:
# {
# "errors": {"poweful": ["powerful", "poetically", ...]},
# "total_words": 6,
# "error_count": 1
# }
6.3.6 Python API 参考
| 方法 | 说明 |
|---|---|
aspell.Speller(lang) | 创建 Speller 实例 |
speller.check(word) | 检查单词(返回 bool) |
speller.suggest(word) | 获取建议列表(返回 list) |
speller.add_to_personal(word) | 添加到个人词典 |
speller.save_all() | 保存个人词典 |
6.4 管道协议集成(推荐方案)
管道协议是跨语言集成的推荐方案,无需编译依赖。
6.4.1 Python 管道集成
#!/usr/bin/env python3
"""aspell_pipe.py — 通过管道协议调用 Aspell(推荐方案)"""
import subprocess
from typing import Dict, List, Tuple
class AspellChecker:
"""通过 aspell -a 管道协议进行拼写检查"""
def __init__(self, lang: str = 'en', mode: str = None):
cmd = ['aspell', '-a', '-d', lang]
if mode:
cmd.extend(['--mode', mode])
self.process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
# 读取问候行
self.process.stdout.readline()
def check_word(self, word: str) -> Tuple[bool, List[str]]:
"""检查单个单词,返回 (是否正确, 建议列表)"""
self.process.stdin.write(f'{word}\n')
self.process.stdin.flush()
line = self.process.stdout.readline().strip()
if line.startswith('*'):
return True, []
elif line.startswith('&') or line.startswith('#'):
# 解析建议
parts = line.split(':')
if len(parts) > 1:
suggestions = [s.strip() for s in parts[1].split(',')]
return False, suggestions
return False, []
return False, []
def check_text(self, text: str) -> Dict[str, List[str]]:
"""检查多行文本,返回 {错误单词: [建议]} 字典"""
errors = {}
for word in text.split():
# 简单清理标点
clean_word = word.strip('.,!?;:"\'()[]{}')
if not clean_word or not clean_word.isalpha():
continue
is_correct, suggestions = self.check_word(clean_word)
if not is_correct and clean_word not in errors:
errors[clean_word] = suggestions
return errors
def close(self):
"""关闭 Aspell 进程"""
self.process.stdin.close()
self.process.wait()
# 使用示例
if __name__ == '__main__':
checker = AspellChecker('en')
text = "Aspell is a poweful spell checker for teh Unix system"
errors = checker.check_text(text)
print(f"发现 {len(errors)} 个拼写错误:")
for word, suggestions in errors.items():
print(f" ✗ {word} → {', '.join(suggestions[:3])}")
checker.close()
# 输出:
# 发现 2 个拼写错误:
# ✗ poweful → powerful, poetically, pooful
# ✗ teh → the, tea, tee
6.4.2 Go 管道集成
// aspell.go — Go 语言管道集成
package aspell
import (
"bufio"
"fmt"
"io"
"os/exec"
"strings"
)
type Checker struct {
cmd *exec.Cmd
stdin io.WriteCloser
stdout *bufio.Scanner
}
func NewChecker(lang string) (*Checker, error) {
cmd := exec.Command("aspell", "-a", "-d", lang)
stdin, err := cmd.StdinPipe()
if err != nil {
return nil, err
}
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}
if err := cmd.Start(); err != nil {
return nil, err
}
scanner := bufio.NewScanner(stdout)
scanner.Scan() // 读取问候行
return &Checker{cmd: cmd, stdin: stdin, stdout: scanner}, nil
}
func (c *Checker) Check(word string) (bool, []string) {
fmt.Fprintf(c.stdin, "%s\n", word)
c.stdout.Scan()
line := c.stdout.Text()
if strings.HasPrefix(line, "*") {
return true, nil
}
if strings.HasPrefix(line, "&") || strings.HasPrefix(line, "#") {
parts := strings.SplitN(line, ":", 2)
if len(parts) > 1 {
suggestions := strings.Split(strings.TrimSpace(parts[1]), ",")
for i := range suggestions {
suggestions[i] = strings.TrimSpace(suggestions[i])
}
return false, suggestions
}
}
return false, nil
}
func (c *Checker) Close() {
c.stdin.Close()
c.cmd.Wait()
}
6.4.3 Node.js 管道集成
// aspell.mjs — Node.js 管道集成
import { spawn } from 'child_process';
import { createInterface } from 'readline';
class AspellChecker {
constructor(lang = 'en') {
this.process = spawn('aspell', ['-a', '-d', lang]);
this.readline = createInterface({ input: this.process.stdout });
this.pending = [];
this.readline.on('line', (line) => {
if (this.pending.length > 0) {
const resolve = this.pending.shift();
resolve(line);
}
});
// 消费问候行
this.readline.once('line', () => {});
}
async check(word) {
return new Promise((resolve) => {
this.pending.push((line) => {
if (line.startsWith('*')) {
resolve({ correct: true, suggestions: [] });
} else {
const parts = line.split(':');
const suggestions = parts.length > 1
? parts[1].split(',').map(s => s.trim())
: [];
resolve({ correct: false, suggestions });
}
});
this.process.stdin.write(`${word}\n`);
});
}
close() {
this.readline.close();
this.process.stdin.end();
}
}
// 使用示例
const checker = new AspellChecker('en');
const words = ['hello', 'teh', 'world', 'programing'];
for (const word of words) {
const result = await checker.check(word);
if (result.correct) {
console.log(`✓ ${word}`);
} else {
console.log(`✗ ${word} → ${result.suggestions.slice(0, 3).join(', ')}`);
}
}
checker.close();
# 运行
node aspell.mjs
# 输出:
# ✓ hello
# ✗ teh → the, tea, tee
# ✓ world
# ✗ programing → programming, program ring, program
6.5 Perl 绑定(Text::Aspell)
#!/usr/bin/perl
# aspell.pl — Perl 绑定示例
use strict;
use warnings;
use Text::Aspell;
my $speller = Text::Aspell->new;
$speller->set_option('lang', 'en_US');
# 检查单词
my @words = qw/hello teh world programing/;
foreach my $word (@words) {
if ($speller->check($word)) {
print "✓ $word\n";
} else {
my @suggestions = $speller->suggest($word);
print "✗ $word → " . join(', ', @suggestions[0..2]) . "\n";
}
}
# 安装
cpan Text::Aspell
# 或
sudo apt-get install libtext-aspell-perl
6.6 多语言集成最佳实践
6.6.1 进程池管理
对于高并发场景,管理多个 Aspell 进程:
#!/usr/bin/env python3
"""aspell_pool.py — Aspell 进程池管理"""
import subprocess
from queue import Queue
from threading import Thread, Lock
class AspellPool:
"""管理多个 Aspell 进程以支持并发检查"""
def __init__(self, pool_size: int = 4, lang: str = 'en'):
self.pool = Queue()
self.lock = Lock()
for _ in range(pool_size):
proc = subprocess.Popen(
['aspell', '-a', '-d', lang],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
proc.stdout.readline() # 消费问候行
self.pool.put(proc)
def check_word(self, word: str):
"""从池中获取进程检查单词"""
proc = self.pool.get()
try:
proc.stdin.write(f'{word}\n')
proc.stdin.flush()
line = proc.stdout.readline().strip()
if line.startswith('*'):
return True, []
else:
parts = line.split(':')
suggestions = []
if len(parts) > 1:
suggestions = [s.strip() for s in parts[1].split(',')]
return False, suggestions
finally:
self.pool.put(proc)
def close(self):
"""关闭所有进程"""
while not self.pool.empty():
proc = self.pool.get()
proc.stdin.close()
proc.wait()
# 使用示例
pool = AspellPool(pool_size=4)
# 并发检查
import concurrent.futures
words = ['hello', 'teh', 'world', 'programing', 'aspell', 'python']
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = {executor.submit(pool.check_word, w): w for w in words}
for future in concurrent.futures.as_completed(futures):
word = futures[future]
is_correct, suggestions = future.result()
if is_correct:
print(f"✓ {word}")
else:
print(f"✗ {word} → {', '.join(suggestions[:3])}")
pool.close()
6.6.2 错误处理
#!/usr/bin/env python3
"""aspell_error_handling.py — 健壮的 Aspell 集成"""
import subprocess
import shutil
class AspellError(Exception):
"""Aspell 相关错误"""
pass
class AspellNotAvailable(AspellError):
"""Aspell 未安装或不可用"""
pass
class AspellDictionaryError(AspellError):
"""词典相关错误"""
pass
def ensure_aspell():
"""确保 aspell 已安装"""
if not shutil.which('aspell'):
raise AspellNotAvailable(
"aspell 未找到。请安装: "
"sudo apt-get install aspell aspell-en"
)
def create_checker(lang='en', mode=None, personal=None):
"""创建健壮的 Aspell 检查器"""
ensure_aspell()
cmd = ['aspell', '-a', '-d', lang]
if mode:
cmd.extend(['--mode', mode])
if personal:
cmd.extend(['--personal', personal])
try:
proc = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
# 读取问候行,检查是否启动成功
greeting = proc.stdout.readline()
if not greeting.startswith('@'):
error = proc.stderr.read()
raise AspellDictionaryError(
f"Aspell 启动失败 (语言: {lang}): {error}"
)
return proc
except FileNotFoundError:
raise AspellNotAvailable("无法启动 aspell 进程")
# 使用示例
try:
proc = create_checker(lang='en_US', personal='./project.pws')
proc.stdin.write('teh\n')
proc.stdin.flush()
print(proc.stdout.readline())
except AspellNotAvailable as e:
print(f"安装错误: {e}")
except AspellDictionaryError as e:
print(f"词典错误: {e}")
6.7 性能对比
| 集成方式 | 启动时间 | 单词检查延迟 | 内存占用 | 依赖 |
|---|---|---|---|---|
| C API (libaspell) | 快 | 最低 | 中 | libaspell-dev |
| Python 绑定 | 中 | 低 | 中 | aspell-python-py3 |
| 管道协议 | 中 | 中 | 低 | 仅 aspell 二进制 |
| Shell 调用 | 慢 | 高 | 低 | 仅 aspell 二进制 |
建议:
- 性能关键场景:使用 C API
- 一般应用:使用管道协议(推荐)
- 简单脚本:直接调用
aspell list
6.8 本章小结
| 要点 | 说明 |
|---|---|
| C API | libaspell,性能最佳,适合系统级集成 |
| Python 绑定 | aspell-python-py3,方便但需安装依赖 |
| 管道协议 | 推荐方案,跨语言、无依赖、易维护 |
| 进程池 | 高并发场景需管理多个 Aspell 进程 |
| 错误处理 | 始终检查 aspell 是否可用、词典是否存在 |
下一步
→ 第 7 章:创建自定义词典 — 学习从零创建自定义词典,包括 affix 文件和词表格式。