博客使用whoosh+jieba作搜索
Posted 74 months ago jieba search whoosh 搜索 分词
博客一直没有搜索,本来想用es ,但是想用更硬核一点的所以选用了whoosh ,whoosh是纯py编写的
上代码
单例模式初始化一次whoosh
更新后注销调对象
唯一难受的是 博客从30m/上下的内存占用 彪升到226m!!!!!!!!!!
网上的whoosh 都是贼简单的demo
import os
from jieba.analyse import ChineseAnalyzer
from whoosh.qparser import MultifieldParser
from services import Singleton
from logic.articleDao import articleDao
from whoosh.index import create_in
from whoosh.fields import Schema,ID,TEXT
ana=ChineseAnalyzer()
class Search(metaclass=Singleton):
def __init__(self):
self.list = articleDao.listAllNoPage()
schema = Schema(
id=ID(stored=True, analyzer=ana),
title=TEXT(stored=True, analyzer=ana),
content=TEXT(stored=True, analyzer=ana),
keyword=TEXT(stored=True, analyzer=ana),
desc=TEXT(stored=True, analyzer=ana),)
if not os.path.exists("index"):
os.mkdir("index")
ix= create_in("index",schema)
writer = ix.writer()
for art in self.list:
writer.add_document(
id=str(art.id),
title=art.title,
content=art.content,
keyword=art.keyword,
desc=art.desc)
writer.commit()
self.ix= ix
def search(self,keyword):
searcher = self.ix.searcher()
query = MultifieldParser(["content","title","desc","keyword"],schema=self.ix.schema).parse(keyword)
res=searcher.search(query,limit=len(self.list))
result = []
for r in res:
result.append(r.get("id"))
searcher.close()
return result
@classmethod
def clear(cls):
cls._instances = {}