org.elasticsearch.search.SearchService createContext 创建 org.elasticsearch.search.SearchService parseSource org.elasticsearch.search.aggregations.AggregatorFactories build 执行 org.elasticsearch.search.aggregations.AggregationPhase preProcess org.elasticsearch.search.aggregations.AggregatorFactory create org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory createInternal //这个方法会触发全局序号的生成 TermsAggregatorFactory.doCreateInternal。 //获取globalValueCount决定是否global_ordinals_low_cardinality, global_ordinals_low_cardinality中又因不是ValuesSource.Bytes.FieldData,创建global_ordinals。 ValuesSource$WithOrdinals.globalMaxOrd。 //通过获取一个segment的globalOrdinals,触发如果cache中没有一个shardId+field对应的globalOrdinals,load 所有segment ord,建立global ords。 ValuesSource$FieldData.globalOrdinalsValues。 SortedSetDVOrdinalsIndexFieldData.loadGlobal。 IndicesFieldDataCache$IndexFieldCache.load SortedSetDVOrdinalsIndexFieldData.localGlobalDirect。 GlobalOrdinalsBuilder.build。 //globalOrdinals主要类 GlobalOrdinalsIndexFieldData。 MultiDocValues$OrdinalMap
OrdinalMap生成的一些数据结构
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
globalOrd和segmentOrd的差值
final PackedLongValues globalOrdDeltas;
// globalOrd -> first segment container
gloabalOrd到第一次出现的segment的映射
final PackedLongValues firstSegments;
// for every segment, segmentOrd -> globalOrd
存的是每个segment和全局编号的映射
final LongValues segmentToGlobalOrds[];
// the map from/to segment ids
// 两种segment之间的映射
final SegmentMap segmentMap;
org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData 的load函数返回全局的FieldData
@Override
public AtomicOrdinalsFieldData load(LeafReaderContext context) {
assert source.getReaderCacheHelper().getKey() == context.parent.reader().getReaderCacheHelper().getKey();
return new AbstractAtomicOrdinalsFieldData(scriptFunction) {
@Override
public SortedSetDocValues getOrdinalsValues() {
final SortedSetDocValues values = segmentAfd[context.ord].getOrdinalsValues();
if (values.getValueCount() == ordinalMap.getValueCount()) {
// segment ordinals match global ordinals
return values;
}
final TermsEnum[] atomicLookups = getOrLoadTermsEnums();
return new GlobalOrdinalMapping(ordinalMap, values, atomicLookups, context.ord);
}
@Override
public long ramBytesUsed() {
return segmentAfd[context.ord].ramBytesUsed();
}
@Override
public Collection getChildResources() {
return segmentAfd[context.ord].getChildResources();
}
@Override
public void close() {}
};
}
@Override
public SortedSetDocValues ordinals(ValuesHolder values) {
if (multiValued) {
return new MultiDocs(this, values);
} else {
return (SortedSetDocValues) DocValues.singleton(new SingleDocs(this, values));
}
}
@Override
public SortedSetDocValues getOrdinalsValues() {
return ordinals.ordinals(new ValuesHolder(bytes, termOrdToBytesOffset));
}
text类型 org.elasticsearch.index.mapper.TextFieldMapper
使用的是PagedBytesIndexFieldData来创建的
@Override
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
if (fielddata == false) {
throw new IllegalArgumentException("Fielddata is disabled on text fields by default. Set fielddata=true on [" + name()
+ "] in order to load fielddata in memory by uninverting the inverted index. Note that this can however "
+ "use significant memory. Alternatively use a keyword field instead.");
}
return new PagedBytesIndexFieldData.Builder(fielddataMinFrequency, fielddataMaxFrequency, fielddataMinSegmentSize);
}
org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData loadDirect
此步会做df的过滤
TermsEnum termsEnum = estimator.beforeLoad(terms);
data = new PagedBytesAtomicFieldData(bytesReader, termOrdToBytesOffset.build(), ordinals);
bytesReader: 包含 所有的term
termOrdToBytesOffset: 保存term的ord到offset的映射
ordinals:保存docid 和 ord的关系
下面是docid和ord的关系
* Example for an index of 3 docs (O=ordinal, P = pointer)
* Level 0:
* ordinals [1] [4] [2]
* nextLevelSlices 2 0 1
* Level 1:
* ordinals [0 0] [2 0] [3 4]
* nextLevelSlices 0 0 1
* Level 2:
* ordinals [0 0 0 0] [5 0 0 0]
* nextLevelSlices 0 0
[]中的值代表ord, [1], [4] , [2]所在的位置代表docid。nextLevelSlices 所指的值代表指针,指向下一个level。
例如 docid 为1的文档,包含的ord值为1、3、4、5
org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData loadDirect
public class PagedBytesAtomicFieldData extends AbstractAtomicOrdinalsFieldData {
private final PagedBytes.Reader bytes;
private final PackedLongValues termOrdToBytesOffset;
protected final Ordinals ordinals;
public PagedBytesAtomicFieldData(PagedBytes.Reader bytes, PackedLongValues termOrdToBytesOffset, Ordinals ordinals) {
super(DEFAULT_script_FUNCTION);
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.ordinals = ordinals;
}
@Override
public void close() {
}
@Override
public long ramBytesUsed() {
long size = ordinals.ramBytesUsed();
// PackedBytes
size += bytes.ramBytesUsed();
// PackedInts
size += termOrdToBytesOffset.ramBytesUsed();
return size;
}
@Override
public Collection getChildResources() {
List resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("ordinals", ordinals));
resources.add(Accountables.namedAccountable("term bytes", bytes));
resources.add(Accountables.namedAccountable("term offsets", termOrdToBytesOffset));
return Collections.unmodifiableList(resources);
}
@Override
public SortedSetDocValues getOrdinalsValues() {
return ordinals.ordinals(new ValuesHolder(bytes, termOrdToBytesOffset));
}
private static class ValuesHolder implements Ordinals.ValuesHolder {
private final BytesRef scratch = new BytesRef();
private final PagedBytes.Reader bytes;
private final PackedLongValues termOrdToBytesOffset;
ValuesHolder(PagedBytes.Reader bytes, PackedLongValues termOrdToBytesOffset) {
this.bytes = bytes;
this.termOrdToBytesOffset = termOrdToBytesOffset;
}
@Override
public BytesRef lookupOrd(long ord) {
assert ord >= 0;
bytes.fill(scratch, termOrdToBytesOffset.get(ord));
return scratch;
}
}
}
org.elasticsearch.index.fielddata.ordinals.MultiOrdinals
@Override
public SortedSetDocValues ordinals(ValuesHolder values) {
if (multiValued) {
// ordinal + values = SortedSetDocValues
// values 为 offset + bytes数组 length + 长度为length的具体的值
return new MultiDocs(this, values);
} else {
return (SortedSetDocValues) DocValues.singleton(new SingleDocs(this, values));
}
}
this
public MultiOrdinals(OrdinalsBuilder builder, float acceptableOverheadRatio) {
multiValued = builder.getNumMultiValuesDocs() > 0;
valueCount = builder.getValueCount();
PackedLongValues.Builder endOffsetsBuilder = PackedLongValues.monotonicBuilder(OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
PackedLongValues.Builder ordsBuilder = PackedLongValues.packedBuilder(OFFSETS_PAGE_SIZE, acceptableOverheadRatio);
long lastEndOffset = 0;
for (int i = 0; i < builder.maxDoc(); ++i) {
final LongsRef docOrds = builder.docOrds(i);
final long endOffset = lastEndOffset + docOrds.length;
endOffsetsBuilder.add(endOffset);
for (int j = 0; j < docOrds.length; ++j) {
ordsBuilder.add(docOrds.longs[docOrds.offset + j]);
}
lastEndOffset = endOffset;
}
endOffsets = endOffsetsBuilder.build();
ords = ordsBuilder.build();
assert endOffsets.size() == builder.maxDoc();
assert ords.size() == builder.getTotalNumOrds() : ords.size() + " != " + builder.getTotalNumOrds();
}
不论是MultiDocs还是SingleDocs 都包含
this.valueCount = (int) ordinals.valueCount; // ord结束的偏移量(docid为一组) this.endOffsets = ordinals.endOffsets; // ords this.ords = ordinals.ords; // ord -》 具体值 this.values = values;
org.elasticsearch.search.sort.FieldSortBuilder build
org.elasticsearch.percolator.PercolateQueryBuilder getForField
org.elasticsearch.index.mapper.MappedFieldType fielddataBuilder
org.elasticsearch.index.fielddata.IndexFieldDataService
public> IFD getForField(MappedFieldType fieldType, String fullyQualifiedIndexName) { final String fieldName = fieldType.name(); IndexFieldData.Builder builder = fieldType.fielddataBuilder(fullyQualifiedIndexName); IndexFieldDataCache cache; synchronized (this) { cache = fieldDataCaches.get(fieldName); if (cache == null) { String cacheType = indexSettings.getValue(INDEX_FIELDDATA_CACHE_KEY); if (FIELDDATA_CACHE_VALUE_NODE.equals(cacheType)) { cache = indicesFieldDataCache.buildIndexFieldDataCache(listener, index(), fieldName); } else if ("none".equals(cacheType)){ cache = new IndexFieldDataCache.None(); } else { throw new IllegalArgumentException("cache type not supported [" + cacheType + "] for field [" + fieldName + "]"); } fieldDataCaches.put(fieldName, cache); } } return (IFD) builder.build(indexSettings, fieldType, cache, circuitBreakerService, mapperService); }



