Skip to content

Commit 4339ac7

Browse files
feat: leadership transfer test and some changes to devstack and devnet-sdk (ethereum-optimism#16219)
* chore: more enhancements to fixDevnetEnv and parse hex or dec chain id Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * chore: make faucet and funders to be optional Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * feat: support for conductor recognition in l2 network Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * feat: leadership transfer test Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * chore: to not break existing devnet-envs Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * lint :3 Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * chore: address comments Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * fix: allow conductor absence for backwards compatibility Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * chore: address comments Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * chore: new preset and gate for leadership transfer test, and support for testing conductor sets across multiple chains Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * nit Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * address comments and skip conductor sysgo tests Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * skip conductor tests until 5th july Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * nit: add a todo Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * clean up test skipability of conductor leadership transfer Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * nit Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * change skip tags Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> * nit Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co> --------- Signed-off-by: Yashvardhan Kukreja <yashvardhan@oplabs.co>
1 parent 8361c36 commit 4339ac7

18 files changed

Lines changed: 500 additions & 11 deletions

File tree

‎Makefile‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ go-tests-short-ci: ## Runs short Go tests with gotestsum for CI (assumes deps bu
293293
--rerun-fails=3 \
294294
--rerun-fails-max-failures=50 \
295295
--packages="$(TEST_PKGS) $(RPC_TEST_PKGS) $(FRAUD_PROOF_TEST_PKGS)" \
296-
-- -parallel=$$PARALLEL -coverprofile=coverage.out -short -timeout=$(TEST_TIMEOUT)
296+
-- -parallel=$$PARALLEL -coverprofile=coverage.out -short -timeout=$(TEST_TIMEOUT) -tags="ci"
297297
.PHONY: go-tests-short-ci
298298

299299
go-tests-ci: ## Runs comprehensive Go tests with gotestsum for CI (assumes deps built by CI)
@@ -308,7 +308,7 @@ go-tests-ci: ## Runs comprehensive Go tests with gotestsum for CI (assumes deps
308308
--rerun-fails=3 \
309309
--rerun-fails-max-failures=50 \
310310
--packages="$(TEST_PKGS) $(RPC_TEST_PKGS) $(FRAUD_PROOF_TEST_PKGS)" \
311-
-- -parallel=$$PARALLEL -coverprofile=coverage.out -timeout=$(TEST_TIMEOUT)
311+
-- -parallel=$$PARALLEL -coverprofile=coverage.out -timeout=$(TEST_TIMEOUT) -tags="ci"
312312
.PHONY: go-tests-ci
313313

314314
go-tests-fraud-proofs-ci: ## Runs fraud proofs Go tests with gotestsum for CI (assumes deps built by CI)

‎devnet-sdk/shell/env/devnet.go‎

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"github.com/ethereum-optimism/optimism/devnet-sdk/controller/kt"
1111
"github.com/ethereum-optimism/optimism/devnet-sdk/controller/surface"
1212
"github.com/ethereum-optimism/optimism/devnet-sdk/descriptors"
13+
"github.com/ethereum-optimism/optimism/op-node/rollup"
1314
"github.com/ethereum/go-ethereum/params"
1415
)
1516

@@ -131,15 +132,46 @@ func (d *DevnetEnv) Control() (surface.ControlSurface, error) {
131132

132133
func fixupDevnetConfig(config *descriptors.DevnetEnvironment) error {
133134
// we should really get this from the kurtosis output, but the data doesn't exist yet, so craft a minimal one.
135+
l1ID := new(big.Int)
136+
l1ID, ok := l1ID.SetString(config.L1.ID, 10)
137+
if !ok {
138+
return fmt.Errorf("invalid L1 ID: %s", config.L1.ID)
139+
}
134140
if config.L1.Config == nil {
135-
l1ID := new(big.Int)
136-
l1ID, ok := l1ID.SetString(config.L1.ID, 10)
137-
if !ok {
138-
return fmt.Errorf("invalid L1 ID: %s", config.L1.ID)
139-
}
140141
config.L1.Config = &params.ChainConfig{
141142
ChainID: l1ID,
142143
}
143144
}
145+
for _, l2Chain := range config.L2 {
146+
l2ChainId := l2Chain.Chain.ID
147+
148+
var l2ID *big.Int
149+
base := 10
150+
if len(l2ChainId) >= 2 && l2ChainId[:2] == "0x" {
151+
base = 16
152+
l2ChainId = l2ChainId[2:]
153+
}
154+
155+
l2ID, ok := new(big.Int).SetString(l2ChainId, base)
156+
if !ok {
157+
return fmt.Errorf("invalid L2 ID: %s", l2ChainId)
158+
}
159+
// Convert the L2 chain ID to decimal string format
160+
decimalId := l2ID.String()
161+
l2Chain.Chain.ID = decimalId
162+
163+
if l2Chain.Config == nil {
164+
l2Chain.Config = &params.ChainConfig{
165+
ChainID: l2ID,
166+
}
167+
}
168+
169+
if l2Chain.RollupConfig == nil {
170+
l2Chain.RollupConfig = &rollup.Config{
171+
L1ChainID: l1ID,
172+
L2ChainID: l2ID,
173+
}
174+
}
175+
}
144176
return nil
145177
}

‎devnet-sdk/system/chain.go‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,12 @@ func (c *chain) ID() types.ChainID {
140140
if c.id == "" {
141141
return types.ChainID(big.NewInt(0))
142142
}
143-
id, ok := new(big.Int).SetString(c.id, 10)
143+
base := 10
144+
if len(c.id) >= 2 && c.id[0:2] == "0x" {
145+
c.id = c.id[2:]
146+
base = 16
147+
}
148+
id, ok := new(big.Int).SetString(c.id, base)
144149
if !ok {
145150
return types.ChainID(big.NewInt(0))
146151
}

‎kurtosis-devnet/pkg/kurtosis/api/engine/engine.go‎

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,16 @@ func NewEngineManager(opts ...Option) *EngineManager {
4848
// EnsureRunning starts the Kurtosis engine with the configured version
4949
func (e *EngineManager) EnsureRunning() error {
5050
cmd := exec.Command(e.kurtosisBinary, "engine", "start", "--version", e.version)
51+
fmt.Println("Starting Kurtosis engine with version:", e.version)
52+
53+
// Capture stdout and stderr for more verbose output
54+
var stdout, stderr strings.Builder
55+
cmd.Stdout = &stdout
56+
cmd.Stderr = &stderr
57+
5158
if err := cmd.Run(); err != nil {
52-
return fmt.Errorf("failed to start kurtosis engine: %w", err)
59+
return fmt.Errorf("failed to start kurtosis engine: %w\nstdout: %s\nstderr: %s",
60+
err, stdout.String(), stderr.String())
5361
}
5462
return nil
5563
}

‎op-acceptance-tests/acceptance-tests.yaml‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77

88

99
gates:
10+
- id: conductor
11+
description: "Sanity/smoke acceptance tests for networks with conductors."
12+
tests:
13+
- package: github.com/ethereum-optimism/optimism/op-acceptance-tests/tests/base/conductor
14+
timeout: 10m
15+
1016
- id: base
1117
description: "Sanity/smoke acceptance tests for all networks."
1218
tests:
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package conductor
2+
3+
import (
4+
"testing"
5+
6+
"github.com/ethereum-optimism/optimism/op-devstack/presets"
7+
)
8+
9+
func TestMain(m *testing.M) {
10+
presets.DoMain(m, presets.WithMinimalWithConductors())
11+
}
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
//go:build !ci
2+
3+
// use a tag prefixed with "!". Such tag ensures that the default behaviour of this test would be to be built/run even when the go toolchain (go test) doesn't specify any tag filter.
4+
package conductor
5+
6+
import (
7+
"context"
8+
"fmt"
9+
"strings"
10+
"testing"
11+
"time"
12+
13+
"github.com/ethereum-optimism/optimism/op-conductor/consensus"
14+
"github.com/ethereum-optimism/optimism/op-devstack/devtest"
15+
"github.com/ethereum-optimism/optimism/op-devstack/dsl"
16+
"github.com/ethereum-optimism/optimism/op-devstack/presets"
17+
"github.com/ethereum-optimism/optimism/op-devstack/stack"
18+
"github.com/ethereum-optimism/optimism/op-service/testlog"
19+
"github.com/ethereum/go-ethereum/log"
20+
"github.com/stretchr/testify/require"
21+
)
22+
23+
type conductorWithInfo struct {
24+
*dsl.Conductor
25+
info consensus.ServerInfo
26+
}
27+
28+
// TestConductorLeadershipTransfer checks if the leadership transfer works correctly on the conductors
29+
func TestConductorLeadershipTransfer(gt *testing.T) {
30+
t := devtest.SerialT(gt)
31+
logger := testlog.Logger(t, log.LevelInfo).With("Test", "TestConductorLeadershipTransfer")
32+
33+
sys := presets.NewMinimalWithConductors(t)
34+
tracer := t.Tracer()
35+
ctx := t.Ctx()
36+
logger.Info("Started Conductor Leadership Transfer test")
37+
38+
ctx, span := tracer.Start(ctx, "test chains")
39+
defer span.End()
40+
41+
ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
42+
defer cancel()
43+
44+
// Test all L2 chains in the system
45+
for l2Chain, conductors := range sys.ConductorSets {
46+
chainId := l2Chain.String()
47+
48+
_, span = tracer.Start(ctx, fmt.Sprintf("test chain %s", chainId))
49+
defer span.End()
50+
51+
membership := conductors[0].FetchClusterMembership()
52+
require.Equal(t, len(membership.Servers), len(conductors), "cluster membership does not match the number of conductors", "chainId", chainId)
53+
54+
idToConductor := make(map[string]conductorWithInfo)
55+
for _, conductor := range conductors {
56+
conductorId := strings.TrimPrefix(conductor.String(), stack.ConductorKind.String()+"-")
57+
idToConductor[conductorId] = conductorWithInfo{conductor, consensus.ServerInfo{}}
58+
}
59+
for _, memberInfo := range membership.Servers {
60+
conductor, ok := idToConductor[memberInfo.ID]
61+
require.True(t, ok, "unknown conductor in cluster membership", "unknown conductor id", memberInfo.ID, "chainId", chainId)
62+
conductor.info = memberInfo
63+
idToConductor[memberInfo.ID] = conductor
64+
}
65+
66+
leaderInfo, err := conductors[0].Escape().RpcAPI().LeaderWithID(ctx)
67+
require.NoError(t, err, "failed to get current conductor info", "chainId", chainId)
68+
69+
leaderConductor := idToConductor[leaderInfo.ID]
70+
71+
voters := []conductorWithInfo{leaderConductor}
72+
for _, member := range membership.Servers {
73+
if member.ID == leaderInfo.ID || member.Suffrage == consensus.Nonvoter {
74+
continue
75+
}
76+
77+
voters = append(voters, idToConductor[member.ID])
78+
}
79+
80+
if len(voters) == 1 {
81+
t.Skip("only one voter found in the cluster, skipping leadership transfer test")
82+
continue
83+
}
84+
85+
t.Run(fmt.Sprintf("L2_Chain_%s", chainId), func(tt devtest.T) {
86+
numOfLeadershipTransfers := len(voters)
87+
for i := 0; i < numOfLeadershipTransfers; i++ {
88+
// the modulo operation is used to wrap around the list of voters whenever i or i+1 becomes >= len(voters)
89+
oldLeaderIndex, newLeaderIndex := i%len(voters), (i+1)%len(voters)
90+
oldLeader, newLeader := voters[oldLeaderIndex], voters[newLeaderIndex]
91+
92+
time.Sleep(3 * time.Second)
93+
94+
testTransferLeadershipAndCheck(t, oldLeader, newLeader)
95+
}
96+
})
97+
}
98+
}
99+
100+
// testTransferLeadershipAndCheck tests conductor's leadership transfer from one leader to another
101+
func testTransferLeadershipAndCheck(t devtest.T, oldLeader, targetLeader conductorWithInfo) {
102+
103+
t.Run(fmt.Sprintf("Conductor_%s_to_%s", oldLeader, targetLeader), func(tt devtest.T) {
104+
// ensure that the current and target leader are healthy and unpaused before transferring leadership
105+
require.True(tt, oldLeader.FetchSequencerHealthy(), "current leader's sequencer is not healthy, id", oldLeader)
106+
require.True(tt, targetLeader.FetchSequencerHealthy(), "target leader's sequencer is not healthy, id", targetLeader)
107+
require.False(tt, oldLeader.FetchPaused(), "current leader's sequencer is paused, id", oldLeader)
108+
require.False(tt, targetLeader.FetchPaused(), "target leader's sequencer is paused, id", targetLeader)
109+
110+
// ensure that the current leader is the leader before transferring leadership
111+
require.True(tt, oldLeader.IsLeader(), "current leader was not found to be the leader")
112+
require.False(tt, targetLeader.IsLeader(), "target leader was already found to be the leader")
113+
114+
oldLeader.TransferLeadershipTo(targetLeader.info)
115+
116+
require.Eventually(
117+
tt,
118+
func() bool { return targetLeader.IsLeader() },
119+
5*time.Second, 1*time.Second, "target leader was not found to be the leader",
120+
)
121+
122+
require.False(tt, oldLeader.IsLeader(), "old leader was still found to be the leader")
123+
124+
// sometimes leadership transfer can cause a very brief period of unhealthiness,
125+
// but eventually, they should be healthy again
126+
require.Eventually(
127+
tt,
128+
func() bool { return oldLeader.FetchSequencerHealthy() && targetLeader.FetchSequencerHealthy() },
129+
3*time.Second, 1*time.Second, "at least one of the sequencers was found to be unhealthy",
130+
)
131+
})
132+
}

‎op-devstack/dsl/conductor.go‎

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
package dsl
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"github.com/ethereum-optimism/optimism/op-conductor/consensus"
8+
"github.com/ethereum-optimism/optimism/op-devstack/stack"
9+
"github.com/ethereum-optimism/optimism/op-service/retry"
10+
)
11+
12+
type ConductorSet []*Conductor
13+
14+
func NewConductorSet(inner []stack.Conductor) ConductorSet {
15+
conductors := make([]*Conductor, len(inner))
16+
for i, c := range inner {
17+
conductors[i] = NewConductor(c)
18+
}
19+
return conductors
20+
}
21+
22+
type Conductor struct {
23+
commonImpl
24+
inner stack.Conductor
25+
}
26+
27+
func NewConductor(inner stack.Conductor) *Conductor {
28+
return &Conductor{
29+
commonImpl: commonFromT(inner.T()),
30+
inner: inner,
31+
}
32+
}
33+
34+
func (c *Conductor) String() string {
35+
return c.inner.ID().String()
36+
}
37+
38+
func (c *Conductor) Escape() stack.Conductor {
39+
return c.inner
40+
}
41+
42+
func (c *Conductor) FetchClusterMembership() *consensus.ClusterMembership {
43+
c.log.Debug("Fetching cluster membership")
44+
ctx, cancel := context.WithTimeout(c.ctx, DefaultTimeout)
45+
defer cancel()
46+
clusterMembership, err := retry.Do(ctx, 2, retry.Fixed(500*time.Millisecond), func() (*consensus.ClusterMembership, error) {
47+
clusterMembership, err := c.inner.RpcAPI().ClusterMembership(c.ctx)
48+
return clusterMembership, err
49+
})
50+
c.require.NoError(err, "Failed to fetch cluster membership")
51+
c.log.Info("Fetched cluster membership",
52+
"clusterMembership", clusterMembership)
53+
return clusterMembership
54+
}
55+
56+
func (c *Conductor) FetchLeader() *consensus.ServerInfo {
57+
c.log.Debug("Fetching leader information")
58+
ctx, cancel := context.WithTimeout(c.ctx, DefaultTimeout)
59+
defer cancel()
60+
leaderInfo, err := retry.Do[*consensus.ServerInfo](ctx, 2, retry.Fixed(500*time.Millisecond), func() (*consensus.ServerInfo, error) {
61+
leaderInfo, err := c.inner.RpcAPI().LeaderWithID(c.ctx)
62+
return leaderInfo, err
63+
})
64+
c.require.NoError(err, "Failed to fetch leader information")
65+
c.log.Info("Fetched leader information",
66+
"leaderInfo", leaderInfo)
67+
return leaderInfo
68+
}
69+
70+
func (c *Conductor) FetchSequencerHealthy() bool {
71+
c.log.Debug("Fetching sequencer healthy status")
72+
ctx, cancel := context.WithTimeout(c.ctx, DefaultTimeout)
73+
defer cancel()
74+
healthy, err := c.inner.RpcAPI().SequencerHealthy(ctx)
75+
c.require.NoError(err, "Failed to fetch sequencer healthy status")
76+
c.log.Info("Fetched sequencer healthy status", "healthy", healthy)
77+
return healthy
78+
}
79+
80+
func (c *Conductor) FetchPaused() bool {
81+
c.log.Debug("Fetching paused status")
82+
ctx, cancel := context.WithTimeout(c.ctx, DefaultTimeout)
83+
defer cancel()
84+
paused, err := c.inner.RpcAPI().Paused(ctx)
85+
c.require.NoError(err, "Failed to fetch paused status")
86+
c.log.Info("Fetched paused status", "paused", paused)
87+
return paused
88+
}
89+
90+
func (c *Conductor) IsLeader() bool {
91+
c.log.Debug("Checking if conductor is leader")
92+
ctx, cancel := context.WithTimeout(c.ctx, DefaultTimeout)
93+
defer cancel()
94+
leader, err := c.inner.RpcAPI().Leader(ctx)
95+
c.require.NoError(err, "Failed to check if conductor is leader")
96+
c.log.Info("Checked if conductor is leader", "leader", leader)
97+
return leader
98+
}
99+
100+
func (c *Conductor) TransferLeadershipTo(targetLeaderInfo consensus.ServerInfo) {
101+
c.log.Debug("Transferring leadership to target leader", "targetLeaderID", targetLeaderInfo.ID, "targetLeaderAddr", targetLeaderInfo.Addr)
102+
ctx, cancel := context.WithTimeout(c.ctx, DefaultTimeout)
103+
defer cancel()
104+
err := c.inner.RpcAPI().TransferLeaderToServer(ctx, targetLeaderInfo.ID, targetLeaderInfo.Addr)
105+
c.require.NoError(err, "Failed to transfer leadership to target leader", "targetLeaderID", targetLeaderInfo.ID)
106+
c.log.Info("Transferred leadership to target leader", "targetLeaderID", targetLeaderInfo.ID)
107+
}

0 commit comments

Comments
 (0)