Lucene创建索引

最新推荐文章于 2022-01-24 13:49:04 发布

常生果

最新推荐文章于 2022-01-24 13:49:04 发布

阅读量368

点赞数

分类专栏： Lucene

Lucene 专栏收录该内容

7 篇文章

订阅专栏

//Lucene创建索引
package com.lunces;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

public class First {

public static void main(String[] args) throws IOException {
long startTime = new Date().getTime();
/*
* Analyzer：建立索引时使用的分析器主要子类有StandardAnalyzer（一个汉字一个词）
* 还可以由第三方提供如开源社区提供一些中文分词器
*/
Analyzer analyzer = new StandardAnalyzer();
/*
* Directory：代表索引文档的存储位置这是一个抽象类有FSDirectory和RAMDirectory两个主要子类
* 前者将索引写入文件系统，后者将索引文档写入内存
*/
Directory dir = FSDirectory.open(Paths.get("E:\\LuceneIndex"));
// 操作索引库的配置信息
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
// 建立索引的核心类，用来操作索引（增、删、改）
IndexWriter iw = new IndexWriter(dir, iwc);
addDoc(iw, "1", "libiao1", "张三是中国人1",20160101);
addDoc(iw, "2", "libiao2", "李四是美国人2",20160102);
addDoc(iw, "3", "libiao3", "王五是中国人3",20160103);
addDoc(iw, "4", "libiao4", "马六是俄国人4",20160104);
addDoc(iw, "5", "libiao5", "赵七是中国人5",20160105);
addDoc(iw, "6", "qqqq", "苏八是中国人5",20160106);
addDoc(iw, "7", "bbbb", "我是中国人5",20160107);
iw.close();

long endTime = new Date().getTime();
System.out.println((endTime - startTime) + "s");
}

private static void addDoc(IndexWriter iw, String id, String name, String title, Integer dt) throws IOException {
Document doc = new Document();
/*
* @NumericDocValuesField:存储long类型。日期与时间也可以转换为数字类型存储与筛选
*
* @DoubleDocValuesField:存储double类型
*
* @StringField:构造函数。内部调用setTokenized(false)设置不分词。一般用于国家名、作者名、id等
*
* @TextField:构造函数。内部调用setTokenized(true)实现分词。一般用于文档正文
*/
doc.add(new StringField("id", id,Store.YES));
doc.add(new StringField("name", name, Store.YES));
doc.add(new TextField("title", title, Store.YES));
doc.add(new StringField("dt", dt.toString(),Store.YES));//用于查询的属性
doc.add(new NumericDocValuesField("dt", dt));//用于排序的属性

iw.addDocument(doc);
}

}