感谢这篇文章,让我一下就看懂了,牛皮:
搜索引擎技术系列教材 (四)- lucene - 向Lucene中导入14万条产品数据
==================================================================
内容:
14万条原始数据存储在TXT中,取出后存到list中,通过 LUCENE利用list中数据建立索引,将索引存在Directory中,然后找出匹配度最高的10条数据。
主要代码:
package com.how2java;
import java.io.IOException;
import java.io.StringReader;
import java.util.List;
import java.util.Scanner;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class TestLucene {
public static void main(String[] args) throws Exception {
// 1. 准备中文分词器
IKAnalyzer analyzer = new IKAnalyzer();
// 2. 索引
Directory index = createIndex(analyzer);
// 3. 查询器
Scanner s = new Scanner(System.in);
while (true) {
System.out.print("请输入查询关键字:");
String keyword = s.nextLine();
System.out.println("当前关键字是:" + keyword);
Query query = new QueryParser("name", analyzer).parse(keyword);
// 4. 搜索出10条相关的内容
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher = new IndexSearcher(reader);
int numberPerPage = 10;
ScoreDoc[] hits = searcher.search(query, numberPerPage).scoreDocs;
// 5. 显示查询结果
showSearchResults(searcher, hits, query, analyzer);
// 6. 关闭查询
reader.close();
}
}
private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, IKAnalyzer analyzer) throws Exception {
System.out.println("找到 " + hits.length + " 个命中.");
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("", "");
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
System.out.println("找到 " + hits.length + " 个命中.");
System.out.println("序号t匹配度得分t结果");
for (int i = 0; i < hits.length; ++i) {
ScoreDoc scoreDoc = hits[i];
int docId = scoreDoc.doc;
document d = searcher.doc(docId);
List fields = d.getFields();
//命中的第几条
System.out.print((i + 1));
//匹配度得分
System.out.print("t" + scoreDoc.score);
System.out.print("内容");
//命中的内容详情
for (IndexableField f : fields) {
if ("name".equals(f.name())) {
TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
System.out.print("t" + fieldContent);
} else {
System.out.print("t" + d.get(f.name()));
}
}
System.out.println("
");
}
}
private static Directory createIndex(IKAnalyzer analyzer) throws IOException {
Directory index = new RAMDirectory();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
IndexWriter writer = new IndexWriter(index, config);
String fileName = "E:\ideaMyProject\demo-LUCENE\demo3_14w\140k_products.txt";
//14万条数据存入list集合中
List products = ProductUtil.file2list(fileName);
int total = products.size();
int count = 0;
int per = 0;
int oldPer = 0;
for (Product p : products) {
//创建索引
addDoc(writer, p);
count++;
per = count * 100 / total;
if (per != oldPer) {
oldPer = per;
System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n", total, per);
}
}
writer.close();
return index;
}
private static void addDoc(IndexWriter w, Product p) throws IOException {
document doc = new document();
doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
doc.add(new TextField("name", p.getName(), Field.Store.YES));
doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
doc.add(new TextField("code", p.getCode(), Field.Store.YES));
w.adddocument(doc);
}
}
测试查找"鞋子"相关的10条数据,神奇~



