Skip to content
6 changes: 6 additions & 0 deletions docs/changelog/120974.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 120974
summary: Tweak `copy_to` handling in synthetic `_source` to account for nested objects
area: Mapping
type: bug
issues:
- 120831
Original file line number Diff line number Diff line change
Expand Up @@ -1602,6 +1602,74 @@ synthetic_source with copy_to pointing inside object:
hits.hits.2.fields:
c.copy: [ "100", "hello", "zap" ]

---
synthetic_source with copy_to inside nested object:
- do:
indices.create:
index: test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
name:
type: keyword
my_values:
type: nested
properties:
k:
type: keyword
copy_to: my_values.copy
second_level:
type: nested
properties:
k2:
type: keyword
copy_to: my_values.copy
copy:
type: keyword
dummy:
type: keyword

- do:
index:
index: test
id: 1
refresh: true
body:
name: "A"
my_values:
k: "hello"

- do:
index:
index: test
id: 2
refresh: true
body:
name: "B"
my_values:
second_level:
k2: "hello"

- do:
search:
index: test
sort: name

- match:
hits.hits.0._source:
name: "A"
my_values:
k: "hello"
- match:
hits.hits.1._source:
name: "B"
my_values:
second_level:
k2: "hello"

---
synthetic_source with copy_to pointing to ambiguous field:
- do:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -367,19 +367,6 @@ public final DocumentParserContext maybeCloneForArray(Mapper mapper) throws IOEx
return this;
}

/**
* Creates a sub-context from the current {@link DocumentParserContext} to indicate that the source for the sub-context has been
* recorded and avoid duplicate recording for parts of the sub-context. Applies to synthetic source only.
*/
public final DocumentParserContext cloneWithRecordedSource() throws IOException {
if (canAddIgnoredField()) {
DocumentParserContext subcontext = createChildContext(parent());
subcontext.setRecordedSource(); // Avoids double-storing parts of the source for the same parser subtree.
return subcontext;
}
return this;
}

/**
* Add the given {@code field} to the _field_names field
*
Expand Down Expand Up @@ -466,10 +453,6 @@ public boolean isCopyToDestinationField(String name) {
return copyToFields.contains(name);
}

public Set<String> getCopyToFields() {
return copyToFields;
}

/**
* Add a new mapper dynamically created while parsing.
*
Expand Down Expand Up @@ -706,6 +689,26 @@ public LuceneDocument doc() {
* @param doc the document to target
*/
public final DocumentParserContext createCopyToContext(String copyToField, LuceneDocument doc) throws IOException {
/*
Mark field as containing copied data meaning it should not be present
in synthetic _source (to be consistent with stored _source).
Ignored source values take precedence over standard synthetic source implementation
so by adding the `XContentDataHelper.voidValue()` entry we disable the field in synthetic source.
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
in copied field after reindexing.
*/
if (mappingLookup.isSourceSynthetic() && indexSettings().getSkipIgnoredSourceWrite() == false) {
ObjectMapper parent = root().findParentMapper(copyToField);
// There are scenarios when this is false:
// 1. all values of the field that is the source of copy_to are null
// 2. copy_to points at a field inside a disabled object
// 3. copy_to points at dynamic field which is not yet applied to mapping, we will process it properly after the dynamic update
if (parent != null) {
int offset = parent.isRoot() ? 0 : parent.fullPath().length() + 1;
ignoredFieldValues.add(new IgnoredSourceFieldMapper.NameValue(copyToField, offset, XContentDataHelper.voidValue(), doc));
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think getCopyToFields is not called any more? If so, remove it - and maybe inline markFieldAsCopyTo here and remove it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not related to this change, but also cloneWithRecordedSource(...) is not used.

Copy link
Contributor Author

@lkts lkts Jan 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

markFieldAsCopyTo is done after parsing and this is done before and i don't know if this is important. I won't experiment.


ContentPath path = new ContentPath();
XContentParser parser = DotExpandingXContentParser.expandDots(new CopyToParser(copyToField, parser()), path);
return new Wrapper(root(), this) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -161,33 +159,7 @@ public void postParse(DocumentParserContext context) {
return;
}

Collection<NameValue> ignoredValuesToWrite = context.getIgnoredFieldValues();
if (context.getCopyToFields().isEmpty() == false && indexSettings.getSkipIgnoredSourceWrite() == false) {
/*
Mark fields as containing copied data meaning they should not be present
in synthetic _source (to be consistent with stored _source).
Ignored source values take precedence over standard synthetic source implementation
so by adding the `XContentDataHelper.voidValue()` entry we disable the field in synthetic source.
Otherwise, it would be constructed f.e. from doc_values which leads to duplicate values
in copied field after reindexing.
*/
var mutableList = new ArrayList<>(ignoredValuesToWrite);
for (String copyToField : context.getCopyToFields()) {
ObjectMapper parent = context.parent().findParentMapper(copyToField);
if (parent == null) {
// There are scenarios when this can happen:
// 1. all values of the field that is the source of copy_to are null
// 2. copy_to points at a field inside a disabled object
// 3. copy_to points at dynamic field which is not yet applied to mapping, we will process it properly on re-parse.
continue;
}
int offset = parent.isRoot() ? 0 : parent.fullPath().length() + 1;
mutableList.add(new IgnoredSourceFieldMapper.NameValue(copyToField, offset, XContentDataHelper.voidValue(), context.doc()));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is this call to context.doc(). This is a root document and not a correct nested document.

}
ignoredValuesToWrite = mutableList;
}

for (NameValue nameValue : ignoredValuesToWrite) {
for (NameValue nameValue : context.getIgnoredFieldValues()) {
nameValue.doc().add(new StoredField(NAME, encode(nameValue)));
}
}
Expand Down