栏目分类:
子分类:
返回
名师互学网用户登录
快速导航关闭
当前搜索
当前分类
子分类
实用工具
热门搜索
名师互学网 > IT > 面试经验 > Java面试题

介绍一下Lucene建立索引的过程

Java面试题 更新时间: 发布时间: IT归档 最新发布 模块sitemap 名妆网 法律咨询 聚返吧 英语巴士网 伯小乐 网商动力

介绍一下Lucene建立索引的过程

代码如下:

1. package utils;
2.
3. import java.io.File;
4. import java.io.FileReader;
5. import java.io.IOException;
6. import java.io.Reader;
7.
8. import org.apache.lucene.analysis.standard.StandardAnalyzer;
9. import org.apache.lucene.document.document;
10. import org.apache.lucene.document.Field;
11. import org.apache.lucene.index.IndexWriter;
12.
13. public class Indexer {
14.
15. public int index(String indexDir, String dataDir) throws IOException
16. {
17. File indexDirFile = new File(indexDir);
18. File dataDirFile = new File(dataDir);
19. int numIndexed = index(indexDirFile, dataDirFile);
20. return 0;
21. }
22.
23. private int index(File indexDirFile, File dataDirFile) throws IOException {
24. if(!dataDirFile.exists() || !dataDirFile.isDirectory())
25. {
26. throw new IOException(dataDirFile + ” does not exist or is not a directory”);
27. }
28. IndexWriter writer = new IndexWriter(indexDirFile, new StandardAnalyzer(), true);
29. writer.setUseCompoundFile(false);
30. indexDirectory(writer, dataDirFile);
31.
32. int numIndexed = writer.docCount();
33. writer.optimize();
34. writer.close();
35. return numIndexed;
36. }
37.
38. private void indexDirectory(IndexWriter writer, File dataDirFile) throws IOException {
39. File[] files = dataDirFile.listFiles();
40. for(int i = 0; i
41. {
42. File f = files[i];
43. if(f.isDirectory())
44. {
45. indexDirectory(writer, f);
46. }else if(f.getName().endsWith(”.java”) || f.getName().endsWith(”.txt”))//需要索引的文件类型
47. {
48. indexFile(writer, f);
49. }
50.
51. }
52.
53. }
54.
55. private void indexFile(IndexWriter writer, File f) throws IOException {
56. if(f.isHidden() || !f.exists() || !f.canRead())
57. {
58. return;
59. }
60. System.out.println(”Indexing” + f.getCanonicalPath());
61. document doc = new document();
62. Reader txtReader = new FileReader(f);
63. doc.add(new Field(”path”,f.getCanonicalPath(),Field.Store.YES,Field.Index.UN_TOKENIZED));
64. doc.add(new Field(”contents”,txtReader));
65. doc.add(new Field(”name”,f.getName(),Field.Store.YES,Field.Index.UN_TOKENIZED));
66. writer.adddocument(doc);
67. }
68.
69. }
70.
71.

调用的代码如下:

1. String filesRepoDir = “C:/workspace-2.0″;//需要被索引的目录
2. String indexDir = “C:/apache-tomcat-6.0.18/webapps/index”;//存放索引的目录
3. Indexer indexer= new Indexer();
4. indexer.index(indexDir, filesRepoDir);

转载请注明:文章转载自 www.mshxw.com
本文地址:https://www.mshxw.com/it/265058.html
我们一直用心在做
关于我们 文章归档 网站地图 联系我们

版权所有 (c)2021-2022 MSHXW.COM

ICP备案号:晋ICP备2021003244-6号