Skip to content

Commit

Permalink
Errant GTID detection fix for VTOrc (#17287)
Browse files Browse the repository at this point in the history
Signed-off-by: Manan Gupta <manan@planetscale.com>
  • Loading branch information
GuptaManan100 authored Nov 27, 2024
1 parent 8648264 commit 16fa7a3
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 5 deletions.
11 changes: 8 additions & 3 deletions go/vt/vtorc/inst/instance_dao.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ Cleanup:
// Add replication group ancestry UUID as well. Otherwise, VTOrc thinks there are errant GTIDs in group
// members and its replicas, even though they are not.
instance.AncestryUUID = strings.Trim(instance.AncestryUUID, ",")
err = detectErrantGTIDs(tabletAlias, instance, tablet)
err = detectErrantGTIDs(instance, tablet)
}

latency.Stop("instance")
Expand Down Expand Up @@ -390,13 +390,18 @@ Cleanup:
}

// detectErrantGTIDs detects the errant GTIDs on an instance.
func detectErrantGTIDs(tabletAlias string, instance *Instance, tablet *topodatapb.Tablet) (err error) {
func detectErrantGTIDs(instance *Instance, tablet *topodatapb.Tablet) (err error) {
// If the tablet is not replicating from anyone, then it could be the previous primary.
// We should check for errant GTIDs by finding the difference with the shard's current primary.
if instance.primaryExecutedGtidSet == "" && instance.SourceHost == "" {
var primaryInstance *Instance
primaryAlias, _, _ := ReadShardPrimaryInformation(tablet.Keyspace, tablet.Shard)
if primaryAlias != "" {
// Check if the current tablet is the primary.
// If it is, then we don't need to run errant gtid detection on it.
if primaryAlias == instance.InstanceAlias {
return nil
}
primaryInstance, _, _ = ReadInstance(primaryAlias)
}
// Only run errant GTID detection, if we are sure that the data read of the current primary
Expand Down Expand Up @@ -434,7 +439,7 @@ func detectErrantGTIDs(tabletAlias string, instance *Instance, tablet *topodatap
if err == nil {
var gtidCount int64
gtidCount, err = replication.GTIDCount(instance.GtidErrant)
currentErrantGTIDCount.Set(tabletAlias, gtidCount)
currentErrantGTIDCount.Set(instance.InstanceAlias, gtidCount)
}
}
}
Expand Down
49 changes: 47 additions & 2 deletions go/vt/vtorc/inst/instance_dao_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -854,7 +854,7 @@ func TestDetectErrantGTIDs(t *testing.T) {
primaryTablet := &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone-1",
Uid: 100,
Uid: 101,
},
Keyspace: keyspaceName,
Shard: shardName,
Expand All @@ -881,7 +881,8 @@ func TestDetectErrantGTIDs(t *testing.T) {
require.NoError(t, err)
}

err = detectErrantGTIDs(topoproto.TabletAliasString(tablet.Alias), tt.instance, tablet)
tt.instance.InstanceAlias = topoproto.TabletAliasString(tablet.Alias)
err = detectErrantGTIDs(tt.instance, tablet)
if tt.wantErr {
require.Error(t, err)
return
Expand All @@ -891,3 +892,47 @@ func TestDetectErrantGTIDs(t *testing.T) {
})
}
}

// TestPrimaryErrantGTIDs tests that we don't run Errant GTID detection on the primary tablet itself!
func TestPrimaryErrantGTIDs(t *testing.T) {
// Clear the database after the test. The easiest way to do that is to run all the initialization commands again.
defer func() {
db.ClearVTOrcDatabase()
}()
db.ClearVTOrcDatabase()
keyspaceName := "ks"
shardName := "0"
tablet := &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone-1",
Uid: 100,
},
Keyspace: keyspaceName,
Shard: shardName,
}
instance := &Instance{
SourceHost: "",
ExecutedGtidSet: "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-341",
InstanceAlias: topoproto.TabletAliasString(tablet.Alias),
}

// Save shard record for the primary tablet.
err := SaveShard(topo.NewShardInfo(keyspaceName, shardName, &topodatapb.Shard{
PrimaryAlias: tablet.Alias,
}, nil))
require.NoError(t, err)

// Store the tablet record and the instance.
err = SaveTablet(tablet)
require.NoError(t, err)
err = WriteInstance(instance, true, nil)
require.NoError(t, err)

// After this if we read a new information for the record that updates its
// gtid set further, we shouldn't be detecting errant GTIDs on it since it is the primary!
// We shouldn't be comparing it with a previous version of itself!
instance.ExecutedGtidSet = "230ea8ea-81e3-11e4-972a-e25ec4bd140a:1-10589,8bc65c84-3fe4-11ed-a912-257f0fcdd6c9:1-34,316d193c-70e5-11e5-adb2-ecf4bb2262ff:1-351"
err = detectErrantGTIDs(instance, tablet)
require.NoError(t, err)
require.EqualValues(t, "", instance.GtidErrant)
}

0 comments on commit 16fa7a3

Please sign in to comment.