Skip to content
1 change: 1 addition & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ endif::[]
* Added tests for Quarkus / RestEasy, adjusted vert.x router transaction name priority - {pull}1765[#1765]
* Added support for Spring WebMVC 6.x and Spring Boot 3.x - {pull}3094[#3094]
* Added `service.environment` to logs for service correlation - {pull}3115[#3115]
* Optimize agent overhead when an excessive number of spans is created with `trace_methods`, `@Traced` or `@CaptureSpan` - {pull}3151[#3151]

[float]
===== Bug fixes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,11 @@ public void decrementReferences() {
super.decrementReferences();
}

@Override
public boolean shouldSkipChildSpanCreation() {
return transaction != null && transaction.shouldSkipChildSpanCreation();
}

@Override
protected void recycle() {
tracer.recycle(this);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,20 @@ public DroppedSpanStats getDroppedSpanStats() {
return droppedSpanStats;
}

boolean isSpanLimitReached() {
@Override
public boolean shouldSkipChildSpanCreation() {
boolean drop = spanCount.isSpanLimitReached(maxSpans);
if (drop) {
// when dropping, the caller is expected to optimize and avoid span creation. As a consequence we have
// to artificially increase those counters to make it as if the span was actually created and dropped
// before reporting
spanCount.getTotal().incrementAndGet();
spanCount.getDropped().incrementAndGet();
}
return drop;
}

boolean isSpanLimitReached() {
return getSpanCount().isSpanLimitReached(maxSpans);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import co.elastic.apm.agent.bci.bytebuddy.SimpleMethodSignatureOffsetMappingFactory;
import co.elastic.apm.agent.configuration.CoreConfiguration;
import co.elastic.apm.agent.impl.ElasticApmTracer;
import co.elastic.apm.agent.sdk.logging.Logger;
import co.elastic.apm.agent.sdk.logging.LoggerFactory;
import co.elastic.apm.agent.tracer.GlobalTracer;
import co.elastic.apm.agent.impl.transaction.AbstractSpan;
import co.elastic.apm.agent.impl.transaction.Span;
Expand Down Expand Up @@ -126,6 +128,8 @@ public String getAdviceClassName() {

public static class TraceMethodAdvice {

public static final Logger logger = LoggerFactory.getLogger(TraceMethodAdvice.class);

private static final ElasticApmTracer tracer = GlobalTracer.get().require(ElasticApmTracer.class);
private static final long traceMethodThresholdMicros;

Expand All @@ -146,6 +150,11 @@ public static Object onMethodEnter(@Advice.Origin Class<?> clazz,
span.withName(signature).activate();
}
} else if (parent.isSampled()) {
if (parent.shouldSkipChildSpanCreation()) {
// span limit reached means span will not be reported, thus we can optimize and skip creating one
logger.debug("Not creating span for {} because span limit is reached.", signature);
return null;
}
span = parent.createSpan()
.withName(signature)
.activate();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,11 @@ protected void recycle() {
protected TestSpan thiz() {
return null;
}

@Override
public boolean shouldSkipChildSpanCreation() {
return false;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@

import co.elastic.apm.agent.MockTracer;
import co.elastic.apm.agent.TransactionUtils;
import co.elastic.apm.agent.configuration.CoreConfiguration;
import co.elastic.apm.agent.impl.ElasticApmTracer;
import co.elastic.apm.agent.impl.metadata.MetaDataMock;
import co.elastic.apm.agent.impl.sampling.ConstantSampler;
import co.elastic.apm.agent.impl.stacktrace.StacktraceConfiguration;
import co.elastic.apm.agent.report.ApmServerClient;
import co.elastic.apm.agent.report.serialize.DslJsonSerializer;
import co.elastic.apm.agent.report.serialize.SerializationConstants;
import co.elastic.apm.agent.tracer.Outcome;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
Expand All @@ -36,6 +39,7 @@
import java.util.stream.Stream;

import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;

public class TransactionTest {
Expand All @@ -44,6 +48,9 @@ public class TransactionTest {

@BeforeEach
void setUp() {
CoreConfiguration coreConfig = MockTracer.createRealTracer().getConfig(CoreConfiguration.class);
SerializationConstants.init(coreConfig);
Comment on lines +51 to +52
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[for reviewer] this was necessary as this test failed when run in isolation, most tests actually trigger this init through instrumentation init.


jsonSerializer = new DslJsonSerializer(
mock(StacktraceConfiguration.class),
mock(ApmServerClient.class),
Expand Down Expand Up @@ -119,6 +126,50 @@ static Stream<Arguments> typeTestArguments() {
);
}

@Test
void skipChildSpanCreationWhenLimitReached() {
int limit = 3;

ElasticApmTracer tracer = MockTracer.createRealTracer();
doReturn(limit).when(tracer.getConfig(CoreConfiguration.class)).getTransactionMaxSpans();

Transaction transaction = tracer.startRootTransaction(TransactionTest.class.getClassLoader());
assertThat(transaction).isNotNull();
transaction.activate();

int dropped = 7;
int total = limit + dropped;
for (int i = 1; i <= total; i++) {
// emulates an instrumentation that will bypass span creation
// each call to createSpan is expected to be guarded by a single call to shouldSkipChildSpanCreation
// for proper dropped span accounting.

boolean shouldSkip = transaction.shouldSkipChildSpanCreation();
assertThat(shouldSkip)
.describedAs("span %d should be skipped, limit is %d", i, limit)
.isEqualTo(i > limit);

if (shouldSkip) {
assertThat(transaction.getSpanCount().getReported().get()).isEqualTo(limit);
assertThat(transaction.getSpanCount().getDropped().get()).isEqualTo(i - limit);
} else {
transaction.createSpan()
.withName("child span " + i)
.activate()
.deactivate()
.end();

assertThat(transaction.getSpanCount().getReported().get()).isEqualTo(i);
}
}
assertThat(transaction.getSpanCount().getReported().get()).isEqualTo(limit);
assertThat(transaction.getSpanCount().getDropped().get()).isEqualTo(dropped);
assertThat(transaction.getSpanCount().getTotal().get()).isEqualTo(total);

transaction.deactivate().end();

}

/**
* A utility to enable arbitrary tests to set an existing {@link Transaction} state without making this functionality globally accessible
* @param recorded should the provided trace context be recorded
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,24 +86,31 @@ public static Object onMethodEnter(
) boolean discardable
) {
final AbstractSpan<?> parent = tracer.getActive();
if (parent != null) {
Span<?> span = asExit ? parent.createExitSpan() : parent.createSpan();
if (span == null) {
return null;
}

span.withName(spanName.isEmpty() ? signature : spanName)
.activate();
((co.elastic.apm.agent.impl.transaction.Span) span).setType(type, subtype, action);

if (!discardable) {
span.setNonDiscardable();
}
return span;
} else {
if (parent == null) {
logger.debug("Not creating span for {} because there is no currently active span.", signature);
return null;
}
return null;
if (parent.shouldSkipChildSpanCreation()) {
// span limit reached means span will not be reported, thus we can optimize and skip creating one
logger.debug("Not creating span for {} because span limit is reached.", signature);
return null;
}

Span<?> span = asExit ? parent.createExitSpan() : parent.createSpan();
if (span == null) {
return null;
}

span.withName(spanName.isEmpty() ? signature : spanName)
.activate();

// using deprecated API to keep compatibility with existing behavior
((co.elastic.apm.agent.impl.transaction.Span) span).setType(type, subtype, action);

if (!discardable) {
span.setNonDiscardable();
}
return span;
}

@Advice.OnMethodExit(suppress = Throwable.class, onThrowable = Throwable.class, inline = false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
import co.elastic.apm.agent.impl.ElasticApmTracer;
import co.elastic.apm.agent.impl.stacktrace.StacktraceConfiguration;
import co.elastic.apm.agent.sdk.ElasticApmInstrumentation;
import co.elastic.apm.agent.sdk.logging.Logger;
import co.elastic.apm.agent.sdk.logging.LoggerFactory;
import co.elastic.apm.agent.tracer.AbstractSpan;
import co.elastic.apm.agent.tracer.GlobalTracer;
import co.elastic.apm.agent.tracer.Outcome;
Expand All @@ -46,10 +48,10 @@
import static co.elastic.apm.agent.bci.bytebuddy.CustomElementMatchers.isInAnyPackage;
import static co.elastic.apm.agent.bci.bytebuddy.CustomElementMatchers.isProxy;
import static co.elastic.apm.agent.bci.bytebuddy.CustomElementMatchers.overridesOrImplementsMethodThat;
import static co.elastic.apm.agent.tracer.AbstractSpan.PRIORITY_METHOD_SIGNATURE;
import static co.elastic.apm.agent.tracer.AbstractSpan.PRIORITY_USER_SUPPLIED;
import static co.elastic.apm.agent.pluginapi.ElasticApmApiInstrumentation.PUBLIC_API_INSTRUMENTATION_GROUP;
import static co.elastic.apm.agent.pluginapi.Utils.FRAMEWORK_NAME;
import static co.elastic.apm.agent.tracer.AbstractSpan.PRIORITY_METHOD_SIGNATURE;
import static co.elastic.apm.agent.tracer.AbstractSpan.PRIORITY_USER_SUPPLIED;
import static net.bytebuddy.matcher.ElementMatchers.declaresMethod;
import static net.bytebuddy.matcher.ElementMatchers.isAnnotatedWith;
import static net.bytebuddy.matcher.ElementMatchers.named;
Expand All @@ -59,6 +61,8 @@ public class TracedInstrumentation extends ElasticApmInstrumentation {

public static final Tracer tracer = GlobalTracer.get();

public static final Logger logger = LoggerFactory.getLogger(TracedInstrumentation.class);

private final CoreConfiguration coreConfig;
private final StacktraceConfiguration stacktraceConfig;

Expand Down Expand Up @@ -90,6 +94,11 @@ public static Object onMethodEnter(

final AbstractSpan<?> parent = tracer.getActive();
if (parent != null) {
if (parent.shouldSkipChildSpanCreation()) {
// span limit reached means span will not be reported, thus we can optimize and skip creating one
logger.debug("Not creating span for {} because span limit is reached.", signature);
return null;
}
Span<?> span = asExit ? parent.createExitSpan() : parent.createSpan();
if (span == null) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,4 +184,17 @@ public interface AbstractSpan<T extends AbstractSpan<T>> extends ElasticContext<
void incrementReferences();

void decrementReferences();

/**
* @return {@literal true} when span limit is reached and the caller can optimize and not create a span. The caller
* is expected to call this method before every span creation operation for proper dropped spans accounting. If not
* called before attempting span creation, a span will be created and dropped before reporting.
* <br/>
* Expected caller behavior depends on the returned value:
* <ul>
* <li>{@literal true} returned means the caller is expected to NOT call {@link #createSpan()} or {@link #createExitSpan()}</li>
* <li>{@literal false} returned means the caller MAY call {@link #createSpan()} or {@link #createExitSpan()}</li>
* </ul>
*/
boolean shouldSkipChildSpanCreation();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my opinion it would be cleaner to instead alter the contract of createSpan() to allow to return null values for the cases where the tracer decides that no spans should be created anymore. This would allow this optimization to also take effect on our own instrumentations.

However, this would be a lot of work as we need to revisit all createSpan() usages in plugins and make sure that they properly handle null values and that there are no side effects of not creating spans, which is a bit much for this PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's exactly why I took this simple approach here, and it would have been quite consistent with createExitSpan that is expected to return null in case there is already an active exit span.

Another alternative would be to have a way to return a no-op span, which could avoid most of the refactoring but could still have some unexpected side-effects.

I think it could be worth investing a bit of time for a refactoring/tech-debt PR.

}