Skip to content
5 changes: 5 additions & 0 deletions docs/changelog/136147.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 136147
summary: Fixed geo point block loader slowness
area: Mapping
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.apache.lucene.geo.LatLonGeometry;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
Expand All @@ -34,6 +35,7 @@
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.core.CheckedFunction;
import org.elasticsearch.geometry.Point;
import org.elasticsearch.geometry.utils.WellKnownBinary;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.fielddata.FieldDataContext;
Expand Down Expand Up @@ -62,6 +64,7 @@

import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteOrder;
import java.util.Collections;
import java.util.List;
import java.util.Map;
Expand All @@ -70,6 +73,7 @@
import java.util.function.Function;

import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES;
import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE;

/**
* Field Mapper for geo_point types.
Expand Down Expand Up @@ -389,7 +393,7 @@ public static class GeoPointFieldType extends AbstractPointFieldType<GeoPoint> i
private final IndexMode indexMode;
private final boolean isSyntheticSource;

private GeoPointFieldType(
GeoPointFieldType(
String name,
IndexType indexType,
boolean stored,
Expand Down Expand Up @@ -546,28 +550,119 @@ public TimeSeriesParams.MetricType getMetricType() {

@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (blContext.fieldExtractPreference() == DOC_VALUES && hasDocValues()) {
return new BlockDocValuesReader.LongsBlockLoader(name());
// load from doc values
if (hasDocValues()) {
if (blContext.fieldExtractPreference() == DOC_VALUES) {
return new BlockDocValuesReader.LongsBlockLoader(name());
} else if (blContext.fieldExtractPreference() == NONE && isSyntheticSource) {
// when the preference is not explicitly set to DOC_VALUES, we expect a BytesRef -> see PlannerUtils.toElementType()
return new BytesRefFromLongsBlockLoader(name());
}
// if we got here, then either synthetic source is not enabled or the preference prohibits us from using doc_values
}

// There are two scenarios possible once we arrive here:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this comment still applies?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not entirely, this one was moved up:

            // - doc_values are enabled - _ignored_source field does not exist since we have doc_values. We will use
            // blockLoaderFromSource which reads "native" synthetic source.

I think the new comments are sufficient?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm, I think I only see the diff that removes those lines?

//
// * Stored source - we'll just use blockLoaderFromSource
// * Synthetic source. However, because of the fieldExtractPreference() check above it is still possible that doc_values are
// present here.
// So we have two subcases:
// - doc_values are enabled - _ignored_source field does not exist since we have doc_values. We will use
// blockLoaderFromSource which reads "native" synthetic source.
// - doc_values are disabled - we know that _ignored_source field is present and use a special block loader unless it's a multi
// field.
// doc_values are disabled, fallback to ignored_source, except for multi fields since then don't have fallback synthetic source
if (isSyntheticSource && hasDocValues() == false && blContext.parentField(name()) == null) {
return blockLoaderFromFallbackSyntheticSource(blContext);
}

// otherwise, load from _source (synthetic or otherwise) - very slow
return blockLoaderFromSource(blContext);
}
}

/**
* This is a GeoPoint-specific block loader that helps deal with an edge case where doc_values are available, yet
* FieldExtractPreference = NONE. When this happens, the BlockLoader sanity checker (see PlannerUtils.toElementType) expects a BytesRef.
* This implies that we need to load the value from _source. This however is very slow, especially when synthetic source is enabled.
* We're better off reading from doc_values and converting to BytesRef to satisfy the checker. This is what this block loader is for.
*/
static final class BytesRefFromLongsBlockLoader extends BlockDocValuesReader.DocValuesBlockLoader {

private final String fieldName;

BytesRefFromLongsBlockLoader(String fieldName) {
this.fieldName = fieldName;
}

@Override
public Builder builder(BlockFactory factory, int expectedCount) {
return factory.bytesRefs(expectedCount);
}

@Override
public AllReader reader(LeafReaderContext context) throws IOException {
SortedNumericDocValues docValues = context.reader().getSortedNumericDocValues(fieldName);
if (docValues != null) {
return new BytesRefsFromLong(docValues, (geoPointLong) -> {
GeoPoint gp = new GeoPoint().resetFromEncoded(geoPointLong);
byte[] wkb = WellKnownBinary.toWKB(new Point(gp.getX(), gp.getY()), ByteOrder.LITTLE_ENDIAN);
return new BytesRef(wkb);
});
}
return new ConstantNullsReader();
}
}

private static final class BytesRefsFromLong extends BlockDocValuesReader {

private final SortedNumericDocValues numericDocValues;
private final Function<Long, BytesRef> longsToBytesRef;

BytesRefsFromLong(SortedNumericDocValues numericDocValues, Function<Long, BytesRef> longsToBytesRef) {
this.numericDocValues = numericDocValues;
this.longsToBytesRef = longsToBytesRef;
}

@Override
protected int docId() {
return numericDocValues.docID();
}

@Override
public String toString() {
return "BlockDocValuesReader.BytesRefsFromLong";
}

@Override
public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset, boolean nullsFiltered)
throws IOException {

try (BlockLoader.BytesRefBuilder builder = factory.bytesRefs(docs.count() - offset)) {
for (int i = offset; i < docs.count(); i++) {
int doc = docs.get(i);
read(doc, builder);
}
return builder.build();
}
}

@Override
public void read(int docId, BlockLoader.StoredFields storedFields, BlockLoader.Builder builder) throws IOException {
read(docId, (BlockLoader.BytesRefBuilder) builder);
}

private void read(int doc, BlockLoader.BytesRefBuilder builder) throws IOException {
// no more values remaining
if (numericDocValues.advanceExact(doc) == false) {
builder.appendNull();
return;
}
int count = numericDocValues.docValueCount();
if (count == 1) {
BytesRef bytesRefValue = longsToBytesRef.apply(numericDocValues.nextValue());
builder.appendBytesRef(bytesRefValue);
return;
}
builder.beginPositionEntry();
for (int v = 0; v < count; v++) {
BytesRef bytesRefValue = longsToBytesRef.apply(numericDocValues.nextValue());
builder.appendBytesRef(bytesRefValue);
}
builder.endPositionEntry();
}
}

/** GeoPoint parser implementation */
static class GeoPointParser extends PointParser<GeoPoint> {
private final boolean storeMalformedDataForSyntheticSource;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,28 @@
package org.elasticsearch.index.mapper;

import org.apache.lucene.tests.geo.GeoTestUtil;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.geo.SimpleFeatureFactory;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.geometry.Point;
import org.elasticsearch.geometry.utils.WellKnownBinary;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.script.ScriptCompiler;

import java.io.IOException;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;

public class GeoPointFieldTypeTests extends FieldTypeTestCase {

Expand Down Expand Up @@ -147,4 +155,142 @@ public void testFetchVectorTile() throws IOException {
assertThat(sourceValue.size(), equalTo(1));
assertThat(sourceValue.get(0), equalTo(featureFactory.points(geoPoints)));
}

public void testBlockLoaderWhenDocValuesAreEnabledAndThePreferenceIsToUseDocValues() {
// given
GeoPointFieldMapper.GeoPointFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType("potato");
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(false, MappedFieldType.FieldExtractPreference.DOC_VALUES);

// when
BlockLoader loader = fieldType.blockLoader(blContextMock);

// then
// verify that we use the correct block value reader
assertThat(loader, instanceOf(BlockDocValuesReader.LongsBlockLoader.class));
}

public void testBlockLoaderWhenDocValuesAreEnabledAndThereIsNoPreference() {
// given
GeoPointFieldMapper.GeoPointFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType("potato");
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(false, MappedFieldType.FieldExtractPreference.NONE);

// when
BlockLoader loader = fieldType.blockLoader(blContextMock);

// then
// verify that we use the correct block value reader
assertThat(loader, instanceOf(BlockSourceReader.GeometriesBlockLoader.class));
}

public void testBlockLoaderWhenFieldIsStoredAndThePreferenceIsToUseStoredFields() {
// given
GeoPointFieldMapper.GeoPointFieldType fieldType = createFieldType(true, false, false);
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(false, MappedFieldType.FieldExtractPreference.STORED);

// when
BlockLoader loader = fieldType.blockLoader(blContextMock);

// then
// verify that we use the correct block value reader
assertThat(loader, instanceOf(BlockSourceReader.GeometriesBlockLoader.class));
}

public void testBlockLoaderWhenFieldIsStoredAndThereIsNoPreference() {
// given
GeoPointFieldMapper.GeoPointFieldType fieldType = createFieldType(true, false, false);
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(false, MappedFieldType.FieldExtractPreference.NONE);

// when
BlockLoader loader = fieldType.blockLoader(blContextMock);

// then
// verify that we use the correct block value reader
assertThat(loader, instanceOf(BlockSourceReader.GeometriesBlockLoader.class));
}

public void testBlockLoaderWhenSyntheticSourceIsEnabledAndFieldIsStoredInIgnoredSource() {
// given
GeoPointFieldMapper.GeoPointFieldType fieldType = createFieldType(false, false, true);
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(true, MappedFieldType.FieldExtractPreference.NONE);

// when
BlockLoader loader = fieldType.blockLoader(blContextMock);

// then
// verify that we use the correct block value reader
assertThat(loader, instanceOf(FallbackSyntheticSourceBlockLoader.class));
}

public void testBlockLoaderWhenSyntheticSourceAndDocValuesAreEnabled() {
// given
GeoPointFieldMapper.GeoPointFieldType fieldType = createFieldType(false, true, true);
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(true, MappedFieldType.FieldExtractPreference.NONE);

// when
BlockLoader loader = fieldType.blockLoader(blContextMock);

// then
// verify that we use the correct block value reader
assertThat(loader, instanceOf(GeoPointFieldMapper.BytesRefFromLongsBlockLoader.class));
}

public void testBlockLoaderFallsBackToSource() {
// given
GeoPointFieldMapper.GeoPointFieldType fieldType = new GeoPointFieldMapper.GeoPointFieldType("potato");
MappedFieldType.BlockLoaderContext blContextMock = mockBlockLoaderContext(
false,
MappedFieldType.FieldExtractPreference.EXTRACT_SPATIAL_BOUNDS
);

// when
BlockLoader loader = fieldType.blockLoader(blContextMock);

// then
// verify that we use the correct block value reader
assertThat(loader, instanceOf(BlockSourceReader.GeometriesBlockLoader.class));
}

private MappedFieldType.BlockLoaderContext mockBlockLoaderContext(
boolean enableSyntheticSource,
MappedFieldType.FieldExtractPreference fieldExtractPreference
) {
Settings.Builder builder = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1);

if (enableSyntheticSource) {
builder.put("index.mapping.source.mode", "synthetic");
}

Settings settings = builder.build();

IndexSettings indexSettings = new IndexSettings(IndexMetadata.builder("index").settings(builder).build(), settings);

MappedFieldType.BlockLoaderContext blContextMock = mock(MappedFieldType.BlockLoaderContext.class);
doReturn(fieldExtractPreference).when(blContextMock).fieldExtractPreference();
doReturn(indexSettings).when(blContextMock).indexSettings();

return blContextMock;
}

private GeoPointFieldMapper.GeoPointFieldType createFieldType(
boolean isStored,
boolean hasDocValues,
boolean isSyntheticSourceEnabled
) {
return new GeoPointFieldMapper.GeoPointFieldType(
"potato",
hasDocValues ? IndexType.docValuesOnly() : IndexType.NONE,
isStored,
null,
null,
null,
Collections.emptyMap(),
TimeSeriesParams.MetricType.COUNTER,
IndexMode.LOGSDB,
isSyntheticSourceEnabled
);
}

}
Loading