IKAnalyzerDemo 全文检索及分词实例


/**
* IK Analyzer Demo
* @param args
*/
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.IKSegmentation;
import org.wltea.analyzer.Lexeme;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;

import cn.jh.db.jdbc.DBO;
import cn.jh.db.jdbc.DBOManager;

/**
* @author linly
*
*/
public class IKAnalyzerDemo {
static final File INDEX_DIR = new File("index");
static void test() throws Exception{
String fieldName = "text";
//检索内容
IndexWriter iwriter = null;

RAMDirectory ramDir = new RAMDirectory();
IKAnalyzer ika=new IKAnalyzer();




DBO newsdbo = DBOManager.createDBO("CMS34");
{
iwriter=new IndexWriter(ramDir, ika, true , IndexWriter.MaxFieldLength.LIMITED);
IndexWriter fileWriter = new IndexWriter(FSDirectory.open(INDEX_DIR),ika, true, IndexWriter.MaxFieldLength.LIMITED);
fileWriter.addIndexesNoOptimize(new Directory[]{ramDir});

Document doc = new Document();
doc.add(new Field(fieldName, "1234", Field.Store.YES, Field.Index.ANALYZED));
iwriter.addDocument(doc);
iwriter.commit();
iwriter.close();
fileWriter.commit();
fileWriter.close();
}
for(int i=0;i<1;i++){
iwriter = new IndexWriter(ramDir, ika, false , IndexWriter.MaxFieldLength.LIMITED);
int m=1000*i;
System.out.println("select Title from News_tab order by newsId desc limit "+m+", 1000 ");
List<String> titles = newsdbo.getObjList("select Title from News_tab limit "+m+", 5000 ", String.class);
for(String t:titles){
Document doc = new Document();
doc.add(new Field(fieldName, t, Field.Store.YES, Field.Index.ANALYZED));
iwriter.addDocument(doc);
}
iwriter.close();


IndexWriter fileWriter = new IndexWriter(FSDirectory.open(INDEX_DIR),ika, false, IndexWriter.MaxFieldLength.LIMITED);
fileWriter.addIndexesNoOptimize(new Directory[]{ramDir});
// = new IndexWriter(ramDir, ika, true );
fileWriter.commit();
fileWriter.close();

}







}
static void test2() throws Exception{


IKAnalyzer ika=new IKAnalyzer();
RAMDirectory directory = null;

IndexSearcher isearcher = null;

//建立内存索引对象



IndexWriter iwriter = null;

//实例化搜索器
//
// directory = new RAMDirectory();
// iwriter = new IndexWriter(directory, ika, true , IndexWriter.MaxFieldLength.LIMITED);
// iwriter.addIndexesNoOptimize(new Directory[]{FSDirectory.open(INDEX_DIR)});
// iwriter.close();

isearcher = new IndexSearcher(FSDirectory.open(INDEX_DIR));

//在索引器中使用IKSimilarity相似度评估器
isearcher.setSimilarity(new IKSimilarity());

String keyword = "铲除网上黄毒需法律亮剑";

IKSegmentation ikSeg = new IKSegmentation(new StringReader(keyword),
true);
keyword="";
try {
Lexeme l = null;
while ((l = ikSeg.next()) != null) {
if (l.getLexemeType() == 0){
System.out.println(l.getLexemeText());
keyword+=l.getLexemeText()+" ";
}

}
} catch (IOException e) {
e.printStackTrace();
}
String fieldName = "text";
//使用IKQueryParser查询分析器构造Query对象
Query query = IKQueryParser.parse(fieldName, keyword);

//搜索相似度最高的5条记录
int max=20;
TopDocs topDocs = isearcher.search(query , max);
System.out.println("命中:" + topDocs.totalHits);
//输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < topDocs.totalHits&&i<max; i++){
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("内容:" + targetDoc.toString()+"\t\t"+targetDoc);
}




}
public static void main(String[] args) throws Exception{
test();
test2();

// test2();
// test();
// test2();
// test();
// test2();
// test();
// test2();

}
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值