更新的答案
我们最终从原始答案中删除了自定义插件,因为很难使其在Elastic
Cloud中正常工作。相反,我们只是为自动填充创建了一个单独的文档,并将其从所有其他文档中删除了。
物体
public class Suggest{ private String autocompleteOutput; private Map<String, AutoComplete> autoComplete; @JsonCreator Suggest() { } public Suggest(String autocompleteOutput, Map<String, AutoComplete> autoComplete) { this.autocompleteOutput = autocompleteOutput; this.autoComplete = autoComplete; } public String getAutocompleteOutput() { return autocompleteOutput; } public void setAutocompleteOutput(String autocompleteOutput) { this.autocompleteOutput = autocompleteOutput; } public Map<String, AutoComplete> getAutoComplete() { return autoComplete; } public void setAutoComplete(Map<String, AutoComplete> autoComplete) { this.autoComplete = autoComplete; }}public class AutoComplete { private String[] input; @JsonCreator AutoComplete() { } public AutoComplete(String[] input) { this.input = input; } public String[] getInput() { return input; }}具有以下映射
{ "suggest": { "dynamic_templates": [ { "autocomplete": { "path_match": "autoComplete.*", "match_mapping_type": "*", "mapping": { "type": "completion", "analyzer": "lowercase_keyword_analyzer" } } } ], "properties": {} }}这使我们可以使用_source中的autocompleteOutput字段
原始答案
经过一番研究,我最终创建了一个新的Elasticsearch 5.1.1插件
创建一个lucene过滤器
import org.apache.lucene.analysis.TokenFilter;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;import java.io.IOException;import java.util.*;public class PermutationTokenFilter extends TokenFilter { private final CharTermAttribute charTermAtt; private final PositionIncrementAttribute posIncrAtt; private final OffsetAttribute offsetAtt; private Iterator<String> permutations; private int origOffset; protected PermutationTokenFilter(TokenStream input) { super(input); this.charTermAtt = addAttribute(CharTermAttribute.class); this.posIncrAtt = addAttribute(PositionIncrementAttribute.class); this.offsetAtt = addAttribute(OffsetAttribute.class); } @Override public final boolean incrementToken() throws IOException { while (true) { //see if permutations have been created already if (permutations == null) { //see if more tokens are available if (!input.incrementToken()) { return false; } else { //Get value String value = String.valueOf(charTermAtt); //permute over buffer value and create iterator permutations = permutation(value).iterator(); origOffset = posIncrAtt.getPositionIncrement(); } } //see if there are remaining permutations if (permutations.hasNext()) { //Reset the attribute to starting point clearAttributes(); //use the next permutation String permutation = permutations.next(); //add te permutation to the attributes and remove old attributes charTermAtt.setEmpty().append(permutation); posIncrAtt.setPositionIncrement(origOffset); offsetAtt.setOffset(0,permutation.length()); //remove permutation from iterator permutations.remove(); origOffset = 0; return true; } permutations = null; } } private Set<String> permutation(String value) { value = value.trim().replaceAll(" +", " "); // Use sets to eliminate semantic duplicates (a a b is still a a b even if you switch the two 'a's in case one word occurs multiple times in a single value) // Switch to HashSet for better performance Set<String> set = new HashSet<String>(); String[] words = value.split(" "); // Termination condition: only 1 permutation for a array of 1 word if (words.length == 1) { set.add(value); } else if (words.length <= 6) { // Give each word a chance to be the first in the permuted array for (int i = 0; i < words.length; i++) { // Remove the word at index i from the array String pre = ""; for (int j = 0; j < i; j++) { pre += words[j] + " "; } String post = " "; for (int j = i + 1; j < words.length; j++) { post += words[j] + " "; } String remaining = (pre + post).trim(); // Recurse to find all the permutations of the remaining words for (String permutation : permutation(remaining)) { // Concatenate the first word with the permutations of the remaining words set.add(words[i] + " " + permutation); } } } else { Collections.addAll(set, words); set.add(value); } return set; }}该过滤器将采用原始输入令牌“所有可用颜色”并将其置换为所有可能的组合(请参阅原始问题)
创建工厂
import org.apache.lucene.analysis.TokenStream;import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;import org.elasticsearch.common.settings.Settings;import org.elasticsearch.env.Environment;import org.elasticsearch.index.IndexSettings;public class PermutationTokenFilterFactory extends AbstractTokenFilterFactory { public PermutationTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { super(indexSettings, name, settings); } public PermutationTokenFilter create(TokenStream input) { return new PermutationTokenFilter(input); }}需要此类来为Elasticsearch插件提供过滤器。
创建Elasticsearch插件
请遵循本指南为Elasticsearch插件设置所需的配置。
<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>be.smartspoken</groupId> <artifactId>permutation-plugin</artifactId> <version>5.1.1-SNAPSHOT</version> <packaging>jar</packaging> <name>Plugin: Permutation</name> <description>Permutation plugin for elasticsearch</description> <properties> <lucene.version>6.3.0</lucene.version> <elasticsearch.version>5.1.1</elasticsearch.version> <java.version>1.8</java.version> <log4j2.version>2.7</log4j2.version> </properties> <dependencies> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-api</artifactId> <version>${log4j2.version}</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-core</artifactId> <version>${log4j2.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-test-framework</artifactId> <version>${lucene.version}</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>${lucene.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>${lucene.version}</version> <scope>provided</scope> </dependency> <dependency> <groupId>org.elasticsearch</groupId> <artifactId>elasticsearch</artifactId> <version>${elasticsearch.version}</version> <scope>provided</scope> </dependency> </dependencies> <build> <resources> <resource> <directory>src/main/resources</directory> <filtering>false</filtering> <excludes> <exclude>*.properties</exclude> </excludes> </resource> </resources> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-assembly-plugin</artifactId> <version>2.6</version> <configuration> <appendAssemblyId>false</appendAssemblyId> <outputDirectory>${project.build.directory}/releases/</outputDirectory> <descriptors> <descriptor>${basedir}/src/main/assemblies/plugin.xml</descriptor> </descriptors> </configuration> <executions> <execution> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>3.3</version> <configuration> <source>${java.version}</source> <target>${java.version}</target> </configuration> </plugin> </plugins> </build></project>确保在pom.xml文件中使用正确的Elasticsearch,Lucene和Log4J(2)版本。并提供正确的配置文件
import be.smartspoken.plugin.permutation.filter.PermutationTokenFilterFactory;import org.elasticsearch.index.analysis.TokenFilterFactory;import org.elasticsearch.indices.analysis.AnalysisModule;import org.elasticsearch.plugins.AnalysisPlugin;import org.elasticsearch.plugins.Plugin;import java.util.HashMap;import java.util.Map;public class PermutationPlugin extends Plugin implements AnalysisPlugin{ @Override public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getTokenFilters() { Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> extra = new HashMap<>(); extra.put("permutation", PermutationTokenFilterFactory::new); return extra; }}向插件提供工厂。
安装新插件后,您需要重新启动Elasticsearch。
使用插件
添加一个新的自定义分析器,以“修改” 2.x的功能
Settings.builder() .put("number_of_shards", 2) .loadFromSource(jsonBuilder() .startObject() .startObject("analysis") .startObject("analyzer") .startObject("permutation_analyzer") .field("tokenizer", "keyword") .field("filter", new String[]{"permutation","lowercase"}) .endObject() .endObject() .endObject() .endObject().string()) .loadFromSource(jsonBuilder() .startObject() .startObject("analysis") .startObject("analyzer") .startObject("lowercase_keyword_analyzer") .field("tokenizer", "keyword") .field("filter", new String[]{"lowercase"}) .endObject() .endObject() .endObject() .endObject().string()) .build();现在,您要做的就是为对象映射提供自定义分析器
{ "my_object": { "dynamic_templates": [{ "autocomplete": { "path_match": "my.autocomplete.object.path", "match_mapping_type": "*", "mapping": { "type": "completion", "analyzer": "permutation_analyzer", "search_analyzer": "lowercase_keyword_analyzer" } } }], "properties": { } }}这也将提高性能,因为您不必再等待构建排列了。



