From 48ff85f45c65767d4cdfbc848c3e1eb66b73d04b Mon Sep 17 00:00:00 2001 From: Daniel T <30197399+danwt@users.noreply.github.com> Date: Sun, 28 Apr 2024 11:06:24 +0100 Subject: [PATCH] fix(produce loop): handle unauthenticated error in settlement layer (#726) Co-authored-by: github-actions --- CHANGELOG.md | 7 +- block/manager.go | 6 +- block/submit.go | 67 ++++----- gerr/doc.go | 15 ++ gerr/errors.go | 158 ++++++++++++++++++++++ settlement/dymension/cosmosclient.go | 17 ++- settlement/dymension/cosmosclient_test.go | 40 ++++++ settlement/dymension/dymension.go | 9 +- settlement/settlement.go | 4 +- testutil/block.go | 3 +- 10 files changed, 279 insertions(+), 47 deletions(-) create mode 100644 gerr/doc.go create mode 100644 gerr/errors.go create mode 100644 settlement/dymension/cosmosclient_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d18181af..2b4624001 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,9 @@ -# [](https://github.com/dymensionxyz/dymint/compare/v1.1.0-rc02...v) (2024-04-26) +# [](https://github.com/dymensionxyz/dymint/compare/v1.1.0-rc02...v) (2024-04-27) + + +### Bug Fixes + +* **p2p:** validate block before applying and not before caching in p2p gossiping ([#723](https://github.com/dymensionxyz/dymint/issues/723)) ([98371b5](https://github.com/dymensionxyz/dymint/commit/98371b5220613e70f3274fab5593e02ba532f7db)) diff --git a/block/manager.go b/block/manager.go index 6f7e7b4a7..5b221b8db 100644 --- a/block/manager.go +++ b/block/manager.go @@ -52,10 +52,8 @@ type Manager struct { // Data retrieval Retriever da.BatchRetriever - // Synchronization SyncTargetDiode diodes.Diode - - SyncTarget atomic.Uint64 + SyncTarget atomic.Uint64 // Block production shouldProduceBlocksCh chan bool @@ -83,9 +81,9 @@ type Manager struct { // pendingBatch is the result of the last DA submission // that is pending settlement layer submission. // It is used to avoid double submission of the same batch. + // It's protected by submitBatchMutex. pendingBatch *PendingBatch - // Logging logger types.Logger // Cached blocks and commits for applying at future heights. Invariant: the block and commit are .Valid() (validated sigs etc) diff --git a/block/submit.go b/block/submit.go index 6bc391955..e9042c69c 100644 --- a/block/submit.go +++ b/block/submit.go @@ -2,9 +2,12 @@ package block import ( "context" + "errors" "fmt" "time" + "github.com/dymensionxyz/dymint/gerr" + "github.com/dymensionxyz/dymint/da" "github.com/dymensionxyz/dymint/types" ) @@ -21,63 +24,70 @@ func (m *Manager) SubmitLoop(ctx context.Context) { return // trigger by time case <-ticker.C: - m.HandleSubmissionTrigger(ctx) + err := m.HandleSubmissionTrigger(ctx) + if errors.Is(err, gerr.ErrAborted) { + continue + } + if errors.Is(err, gerr.ErrUnauthenticated) { + panic(fmt.Errorf("handle submission trigger: %w", err)) + } + if err != nil { + m.logger.Error("handle submission trigger", "error", err) + } } } } -// handleSubmissionTrigger processes the submission trigger event. It checks if there are new blocks produced since the last submission. +// HandleSubmissionTrigger processes the submission trigger event. It checks if there are new blocks produced since the last submission. // If there are, it attempts to submit a batch of blocks. It then attempts to produce an empty block to ensure IBC messages // pass through during the batch submission process due to proofs requires for ibc messages only exist on the next block. // Finally, it submits the next batch of blocks and updates the sync target to the height of // the last block in the submitted batch. -func (m *Manager) HandleSubmissionTrigger(ctx context.Context) { - if !m.submitBatchMutex.TryLock() { // Attempt to lock for batch processing - m.logger.Debug("Batch submission already in process, skipping submission") - return +func (m *Manager) HandleSubmissionTrigger(ctx context.Context) error { + if !m.submitBatchMutex.TryLock() { + return fmt.Errorf("batch submission already in process, skipping submission: %w", gerr.ErrAborted) } defer m.submitBatchMutex.Unlock() // Ensure unlocking at the end // Load current sync target and height to determine if new blocks are available for submission. syncTarget, height := m.SyncTarget.Load(), m.Store.Height() if height <= syncTarget { // Check if there are new blocks since last sync target. - return // Exit if no new blocks are produced. + return nil // Exit if no new blocks are produced. } // We try and produce an empty block to make sure relevant ibc messages will pass through during the batch submission: https://github.com/dymensionxyz/research/issues/173. err := m.ProduceAndGossipBlock(ctx, true) if err != nil { - m.logger.Error("produce empty block", "error", err) + m.logger.Error("produce and gossip empty block", "error", err) } - if m.pendingBatch != nil { - m.logger.Info("pending batch exists", "startHeight", m.pendingBatch.batch.StartHeight, "endHeight", m.pendingBatch.batch.EndHeight) - } else { + if m.pendingBatch == nil { nextBatch, err := m.createNextBatch() if err != nil { - m.logger.Error("get next batch", "error", err) - return + return fmt.Errorf("create next batch: %w", err) } resultSubmitToDA, err := m.submitNextBatchToDA(nextBatch) if err != nil { - m.logger.Error("submit next batch", "error", err) - return + return fmt.Errorf("submit next batch to da: %w", err) } m.pendingBatch = &PendingBatch{ daResult: resultSubmitToDA, batch: nextBatch, } + } else { + m.logger.Info("pending batch already exists", "startHeight", m.pendingBatch.batch.StartHeight, "endHeight", m.pendingBatch.batch.EndHeight) } - syncHeight, err := m.submitPendingBatchToSL() + syncHeight, err := m.submitPendingBatchToSL(*m.pendingBatch) if err != nil { - m.logger.Error("submit next batch to SL", "error", err) - return + return fmt.Errorf("submit pending batch to sl: %w", err) } + m.pendingBatch = nil // Update the syncTarget to the height of the last block in the last batch as seen by this node. m.UpdateSyncParams(syncHeight) + return nil } func (m *Manager) createNextBatch() (*types.Batch, error) { @@ -106,7 +116,7 @@ func (m *Manager) submitNextBatchToDA(nextBatch *types.Batch) (*da.ResultSubmitB m.logger.Error("validate last block in batch is empty", "startHeight", startHeight, "endHeight", actualEndHeight, "error", err) return nil, err } - // Verify the last block in the batch is an empty block and that no ibc messages has accidentially passed through. + // Verify the last block in the batch is an empty block and that no ibc messages has accidentally passed through. // This block may not be empty if another block has passed it in line. If that's the case our empty block request will // be sent to the next batch. if !isLastBlockEmpty { @@ -124,23 +134,14 @@ func (m *Manager) submitNextBatchToDA(nextBatch *types.Batch) (*da.ResultSubmitB return &resultSubmitToDA, nil } -func (m *Manager) submitPendingBatchToSL() (uint64, error) { - if m.pendingBatch == nil { - return 0, fmt.Errorf("no pending batch to submit") - } - - // Submit batch to SL - startHeight := m.pendingBatch.batch.StartHeight - actualEndHeight := m.pendingBatch.batch.EndHeight - err := m.SLClient.SubmitBatch(m.pendingBatch.batch, m.DAClient.GetClientType(), m.pendingBatch.daResult) +func (m *Manager) submitPendingBatchToSL(p PendingBatch) (uint64, error) { + startHeight := p.batch.StartHeight + actualEndHeight := p.batch.EndHeight + err := m.SLClient.SubmitBatch(p.batch, m.DAClient.GetClientType(), p.daResult) if err != nil { - m.logger.Error("submit batch to SL", "startHeight", startHeight, "endHeight", actualEndHeight, "error", err) - return 0, err + return 0, fmt.Errorf("sl client submit batch: startheight: %d: actual end height: %d: %w", startHeight, actualEndHeight, err) } - // Clear pending batch - m.pendingBatch = nil - return actualEndHeight, nil } diff --git a/gerr/doc.go b/gerr/doc.go new file mode 100644 index 000000000..d1fd3c663 --- /dev/null +++ b/gerr/doc.go @@ -0,0 +1,15 @@ +// Package gerr provides a systematic way to think about errors. It is based on google API design advice here +// https://cloud.google.com/apis/design/errors +// https://cloud.google.com/apis/design/errors#handling_errors +// https://github.com/googleapis/googleapis/blob/master/google/rpc/code.proto +// In particular, it's important to avoid defining additional errors. Rather, all important errors should wrap one of +// the errors defined in this package. +// +// "Google APIs must use the canonical error codes defined by google.rpc.Code. Individual APIs must avoid defining +// additional error codes, since developers are very unlikely to write logic to handle a large number of error codes. +// For reference, handling an average of three error codes per API call would mean most application logic would just +// be for error handling, which would not be a good developer experience." +// +// Note: this package can be extended to automatically return the correct GRPC/HTTP codes too, if needed. +// Note: this package could be lifted out and shared across more dymension code, to help us standardise. +package gerr diff --git a/gerr/errors.go b/gerr/errors.go new file mode 100644 index 000000000..f2d72b7d7 --- /dev/null +++ b/gerr/errors.go @@ -0,0 +1,158 @@ +package gerr + +import "errors" + +/* +See https://github.com/googleapis/googleapis/blob/master/google/rpc/code.proto +See also package doc +*/ + +// ErrCancelled : The operation was cancelled, typically by the caller. +// +// HTTP Mapping: 499 Client Closed Request +var ErrCancelled = errors.New("cancelled") // CANCELLED + +// ErrUnknown : Unknown error. For example, this error may be returned when +// a `Status` value received from another address space belongs to +// an error space that is not known in this address space. Also +// errors raised by APIs that do not return enough error information +// may be converted to this error. +// +// HTTP Mapping: 500 Internal Server Error +var ErrUnknown = errors.New("unknown") // UNKNOWN + +// ErrInvalidArgument : The client specified an invalid argument. Note that this differs +// from `FAILED_PRECONDITION`. `INVALID_ARGUMENT` indicates arguments +// that are problematic regardless of the state of the system +// (e.g., a malformed file name). +// +// HTTP Mapping: 400 Bad Request +var ErrInvalidArgument = errors.New("invalid argument") // INVALID_ARGUMENT + +// ErrDeadlineExceeded : The deadline expired before the operation could complete. For operations +// that change the state of the system, this error may be returned +// even if the operation has completed successfully. For example, a +// successful response from a server could have been delayed long +// enough for the deadline to expire. +// +// HTTP Mapping: 504 Gateway Timeout +var ErrDeadlineExceeded = errors.New("deadline exceeded") // DEADLINE_EXCEEDED + +// ErrNotFound : Some requested entity (e.g., file or directory) was not found. +// +// Note to server developers: if a request is denied for an entire class +// of users, such as gradual feature rollout or undocumented allowlist, +// `NOT_FOUND` may be used. If a request is denied for some users within +// a class of users, such as user-based access control, `PERMISSION_DENIED` +// must be used. +// +// HTTP Mapping: 404 Not Found +var ErrNotFound = errors.New("not found") // NOT_FOUND + +// ErrAlreadyExist : The entity that a client attempted to create (e.g., file or directory) +// already exists. +// +// HTTP Mapping: 409 Conflict +var ErrAlreadyExist = errors.New("already exist") // ALREADY_EXIST + +// ErrPermissionDenied : The caller does not have permission to execute the specified +// operation. `PERMISSION_DENIED` must not be used for rejections +// caused by exhausting some resource (use `RESOURCE_EXHAUSTED` +// instead for those errors). `PERMISSION_DENIED` must not be +// used if the caller can not be identified (use `UNAUTHENTICATED` +// instead for those errors). This error code does not imply the +// request is valid or the requested entity exists or satisfies +// other pre-conditions. +// +// HTTP Mapping: 403 Forbidden +var ErrPermissionDenied = errors.New("permission denied") // PERMISSION_DENIED + +// ErrUnauthenticated : The request does not have valid authentication credentials for the +// operation. +// +// HTTP Mapping: 401 Unauthorized +var ErrUnauthenticated = errors.New("unauthenticated") // UNAUTHENTICATED + +// ErrResourceExhausted : Some resource has been exhausted, perhaps a per-user quota, or +// perhaps the entire file system is out of space. +// +// HTTP Mapping: 429 Too Many Requests +var ErrResourceExhausted = errors.New("resource exhausted") // RESOURCE_EXHAUSTED + +// ErrFailedPrecondition : The operation was rejected because the system is not in a state +// required for the operation's execution. For example, the directory +// to be deleted is non-empty, an rmdir operation is applied to +// a non-directory, etc. +// +// Service implementors can use the following guidelines to decide +// between `FAILED_PRECONDITION`, `ABORTED`, and `UNAVAILABLE`: +// +// (a) Use `UNAVAILABLE` if the client can retry just the failing call. +// (b) Use `ABORTED` if the client should retry at a higher level. For +// example, when a client-specified test-and-set fails, indicating the +// client should restart a read-modify-write sequence. +// (c) Use `FAILED_PRECONDITION` if the client should not retry until +// the system state has been explicitly fixed. For example, if an "rmdir" +// fails because the directory is non-empty, `FAILED_PRECONDITION` +// should be returned since the client should not retry unless +// the files are deleted from the directory. +// +// HTTP Mapping: 400 Bad Request +var ErrFailedPrecondition = errors.New("failed precondition") // FAILED_PRECONDITION + +// ErrAborted : The operation was aborted, typically due to a concurrency issue such as +// a sequencer check failure or transaction abort. +// +// See the guidelines above for deciding between `FAILED_PRECONDITION`, +// `ABORTED`, and `UNAVAILABLE`. +// +// HTTP Mapping: 409 Conflict +var ErrAborted = errors.New("aborted") // ABORTED + +// ErrOutOfRange : The operation was attempted past the valid range. E.g., seeking or +// reading past end-of-file. +// +// Unlike `INVALID_ARGUMENT`, this error indicates a problem that may +// be fixed if the system state changes. For example, a 32-bit file +// system will generate `INVALID_ARGUMENT` if asked to read at an +// offset that is not in the range [0,2^32-1], but it will generate +// `OUT_OF_RANGE` if asked to read from an offset past the current +// file size. +// +// There is a fair bit of overlap between `FAILED_PRECONDITION` and +// `OUT_OF_RANGE`. We recommend using `OUT_OF_RANGE` (the more specific +// error) when it applies so that callers who are iterating through +// a space can easily look for an `OUT_OF_RANGE` error to detect when +// they are done. +// +// HTTP Mapping: 400 Bad Request +var ErrOutOfRange = errors.New("out of range") // OUT_OF_RANGE + +// ErrUnimplemented : The operation is not implemented or is not supported/enabled in this +// service. +// +// HTTP Mapping: 501 Not Implemented +var ErrUnimplemented = errors.New("unimplemented") // UNIMPLEMENTED + +// ErrInternal : Internal errors. This means that some invariants expected by the +// underlying system have been broken. This error code is reserved +// for serious errors. +// +// HTTP Mapping: 500 Internal Server Error +var ErrInternal = errors.New("internal") // INTERNAL + +// ErrUnavailable : The service is currently unavailable. This is most likely a +// transient condition, which can be corrected by retrying with +// a backoff. Note that it is not always safe to retry +// non-idempotent operations. +// +// See the guidelines above for deciding between `FAILED_PRECONDITION`, +// `ABORTED`, and `UNAVAILABLE`. +// +// HTTP Mapping: 503 Service Unavailable +var ErrUnavailable = errors.New("unavailable") // UNAVAILABLE + +// ErrDataLoss : Unrecoverable data loss or corruption. +// +// HTTP Mapping: 500 Internal Server Error +var ErrDataLoss = errors.New("data loss") // DATA_LOSS diff --git a/settlement/dymension/cosmosclient.go b/settlement/dymension/cosmosclient.go index 632b6aac6..1a6f28b54 100644 --- a/settlement/dymension/cosmosclient.go +++ b/settlement/dymension/cosmosclient.go @@ -2,6 +2,11 @@ package dymension import ( "context" + "errors" + "fmt" + "strings" + + "github.com/dymensionxyz/dymint/gerr" sdkclient "github.com/cosmos/cosmos-sdk/client" sdktypes "github.com/cosmos/cosmos-sdk/types" @@ -64,5 +69,15 @@ func (c *cosmosClient) GetSequencerClient() sequencertypes.QueryClient { } func (c *cosmosClient) GetAccount(accountName string) (cosmosaccount.Account, error) { - return c.AccountRegistry.GetByName(accountName) + acc, err := c.AccountRegistry.GetByName(accountName) + if err != nil { + if strings.Contains(err.Error(), "too many failed passphrase attempts") { + return cosmosaccount.Account{}, fmt.Errorf("account registry get by name: %w:%w", gerr.ErrUnauthenticated, err) + } + var accNotExistErr *cosmosaccount.AccountDoesNotExistError + if errors.As(err, &accNotExistErr) { + return cosmosaccount.Account{}, fmt.Errorf("account registry get by name: %w:%w", gerr.ErrNotFound, err) + } + } + return acc, err } diff --git a/settlement/dymension/cosmosclient_test.go b/settlement/dymension/cosmosclient_test.go new file mode 100644 index 000000000..3df486882 --- /dev/null +++ b/settlement/dymension/cosmosclient_test.go @@ -0,0 +1,40 @@ +package dymension + +import ( + "fmt" + "testing" + + "github.com/dymensionxyz/cosmosclient/cosmosclient" + "github.com/ignite/cli/ignite/pkg/cosmosaccount" + "github.com/stretchr/testify/assert" +) + +func Test_cosmosClient_GetAccount(t *testing.T) { + type fields struct { + Client cosmosclient.Client + } + type args struct { + accountName string + } + tests := []struct { + name string + fields fields + args args + want cosmosaccount.Account + wantErr assert.ErrorAssertionFunc + }{ + // TODO: Add test cases. + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &cosmosClient{ + Client: tt.fields.Client, + } + got, err := c.GetAccount(tt.args.accountName) + if !tt.wantErr(t, err, fmt.Sprintf("GetAccount(%v)", tt.args.accountName)) { + return + } + assert.Equalf(t, tt.want, got, "GetAccount(%v)", tt.args.accountName) + }) + } +} diff --git a/settlement/dymension/dymension.go b/settlement/dymension/dymension.go index 67b5528f1..3fc2919a9 100644 --- a/settlement/dymension/dymension.go +++ b/settlement/dymension/dymension.go @@ -205,15 +205,14 @@ func (d *HubClient) Stop() error { func (d *HubClient) PostBatch(batch *types.Batch, daClient da.Client, daResult *da.ResultSubmitBatch) error { msgUpdateState, err := d.convertBatchToMsgUpdateState(batch, daResult) if err != nil { - return err + return fmt.Errorf("convert batch to msg update state: %w", err) } // TODO: probably should be changed to be a channel, as the eventHandler is also in the HubClient in he produces the event postBatchSubscriberClient := fmt.Sprintf("%s-%d-%s", postBatchSubscriberPrefix, batch.StartHeight, uuid.New().String()) subscription, err := d.pubsub.Subscribe(d.ctx, postBatchSubscriberClient, settlement.EventQueryNewSettlementBatchAccepted) if err != nil { - d.logger.Error("subscribe to state update events", "err", err) - return err + return fmt.Errorf("pub sub subscribe to settlement state updates: %w", err) } //nolint:errcheck @@ -439,12 +438,12 @@ func (d *HubClient) eventHandler() { func (d *HubClient) convertBatchToMsgUpdateState(batch *types.Batch, daResult *da.ResultSubmitBatch) (*rollapptypes.MsgUpdateState, error) { account, err := d.client.GetAccount(d.config.DymAccountName) if err != nil { - return nil, err + return nil, fmt.Errorf("get account: %w", err) } addr, err := account.Address(addressPrefix) if err != nil { - return nil, err + return nil, fmt.Errorf("derive address: %w", err) } blockDescriptors := make([]rollapptypes.BlockDescriptor, len(batch.Blocks)) diff --git a/settlement/settlement.go b/settlement/settlement.go index b6b7b21e8..3bb831059 100644 --- a/settlement/settlement.go +++ b/settlement/settlement.go @@ -62,7 +62,7 @@ type LayerI interface { // Stop is called once, after Start. It should stop the client service. Stop() error - // SubmitBatch tries submiting the batch in an async way to the settlement layer. This should create a transaction which (potentially) + // SubmitBatch tries submitting the batch in an async way to the settlement layer. This should create a transaction which (potentially) // triggers a state transition in the settlement layer. Events are emitted on success or failure. SubmitBatch(batch *types.Batch, daClient da.Client, daResult *da.ResultSubmitBatch) error @@ -76,7 +76,7 @@ type LayerI interface { GetProposer() *types.Sequencer } -// HubClient is an helper interface for a more granualr interaction with the hub. +// HubClient is a helper interface for a more granular interaction with the hub. // Implementing a new settlement layer client basically requires embedding the base client // and implementing the helper interfaces. type HubClient interface { diff --git a/testutil/block.go b/testutil/block.go index 35c98df6c..f6187e49e 100644 --- a/testutil/block.go +++ b/testutil/block.go @@ -92,7 +92,8 @@ func GetManagerWithProposerKey(conf config.BlockManagerConfig, proposerKey crypt p2pKey, _, _ := crypto.GenerateEd25519Key(rand.Reader) p2pClient, err := p2p.NewClient(config.P2PConfig{ GossipCacheSize: 50, - BoostrapTime: 30 * time.Second}, p2pKey, "TestChain", logger) + BoostrapTime: 30 * time.Second, + }, p2pKey, "TestChain", logger) if err != nil { return nil, err }