From dab21fc71219b17e4c5e26238c370f820033ce9e Mon Sep 17 00:00:00 2001 From: colin <102356659+colinlyguo@users.noreply.github.com> Date: Thu, 24 Aug 2023 22:59:33 +0800 Subject: [PATCH] refactor(coordinator): omit coordinator's error details to prover & add logs (#863) Co-authored-by: colinlyguo --- common/version/version.go | 2 +- .../internal/controller/api/get_task.go | 4 ++-- .../internal/controller/api/submit_proof.go | 14 +++++++------- .../logic/provertask/batch_prover_task.go | 13 +++++++++---- .../logic/provertask/chunk_prover_task.go | 19 ++++++++++++++----- .../logic/submitproof/proof_receiver.go | 14 ++++++++++---- 6 files changed, 43 insertions(+), 23 deletions(-) diff --git a/common/version/version.go b/common/version/version.go index 6b423c7363..a60658c760 100644 --- a/common/version/version.go +++ b/common/version/version.go @@ -7,7 +7,7 @@ import ( "strings" ) -var tag = "v4.1.103" +var tag = "v4.1.104" var commit = func() string { if info, ok := debug.ReadBuildInfo(); ok { diff --git a/coordinator/internal/controller/api/get_task.go b/coordinator/internal/controller/api/get_task.go index cbaec9ebd7..c6beee4379 100644 --- a/coordinator/internal/controller/api/get_task.go +++ b/coordinator/internal/controller/api/get_task.go @@ -41,7 +41,7 @@ func NewGetTaskController(cfg *config.Config, db *gorm.DB, vf *verifier.Verifier func (ptc *GetTaskController) GetTasks(ctx *gin.Context) { var getTaskParameter coordinatorType.GetTaskParameter if err := ctx.ShouldBind(&getTaskParameter); err != nil { - nerr := fmt.Errorf("prover tasks parameter invalid, err:%w", err) + nerr := fmt.Errorf("prover task parameter invalid, err:%w", err) coordinatorType.RenderJSON(ctx, types.ErrCoordinatorParameterInvalidNo, nerr, nil) return } @@ -49,7 +49,7 @@ func (ptc *GetTaskController) GetTasks(ctx *gin.Context) { proofType := ptc.proofType(&getTaskParameter) proverTask, isExist := ptc.proverTasks[proofType] if !isExist { - nerr := fmt.Errorf("parameter wrong proof type") + nerr := fmt.Errorf("parameter wrong proof type:%v", proofType) coordinatorType.RenderJSON(ctx, types.ErrCoordinatorParameterInvalidNo, nerr, nil) return } diff --git a/coordinator/internal/controller/api/submit_proof.go b/coordinator/internal/controller/api/submit_proof.go index cc6398d4a5..815b07f1b0 100644 --- a/coordinator/internal/controller/api/submit_proof.go +++ b/coordinator/internal/controller/api/submit_proof.go @@ -14,7 +14,7 @@ import ( "scroll-tech/coordinator/internal/config" "scroll-tech/coordinator/internal/logic/submitproof" "scroll-tech/coordinator/internal/logic/verifier" - coodinatorType "scroll-tech/coordinator/internal/types" + coordinatorType "scroll-tech/coordinator/internal/types" ) // SubmitProofController the submit proof api controller @@ -31,10 +31,10 @@ func NewSubmitProofController(cfg *config.Config, db *gorm.DB, vf *verifier.Veri // SubmitProof prover submit the proof to coordinator func (spc *SubmitProofController) SubmitProof(ctx *gin.Context) { - var spp coodinatorType.SubmitProofParameter + var spp coordinatorType.SubmitProofParameter if err := ctx.ShouldBind(&spp); err != nil { nerr := fmt.Errorf("parameter invalid, err:%w", err) - coodinatorType.RenderJSON(ctx, types.ErrCoordinatorParameterInvalidNo, nerr, nil) + coordinatorType.RenderJSON(ctx, types.ErrCoordinatorParameterInvalidNo, nerr, nil) return } @@ -52,7 +52,7 @@ func (spc *SubmitProofController) SubmitProof(ctx *gin.Context) { var tmpChunkProof message.ChunkProof if err := json.Unmarshal([]byte(spp.Proof), &tmpChunkProof); err != nil { nerr := fmt.Errorf("unmarshal parameter chunk proof invalid, err:%w", err) - coodinatorType.RenderJSON(ctx, types.ErrCoordinatorParameterInvalidNo, nerr, nil) + coordinatorType.RenderJSON(ctx, types.ErrCoordinatorParameterInvalidNo, nerr, nil) return } proofMsg.ChunkProof = &tmpChunkProof @@ -60,7 +60,7 @@ func (spc *SubmitProofController) SubmitProof(ctx *gin.Context) { var tmpBatchProof message.BatchProof if err := json.Unmarshal([]byte(spp.Proof), &tmpBatchProof); err != nil { nerr := fmt.Errorf("unmarshal parameter batch proof invalid, err:%w", err) - coodinatorType.RenderJSON(ctx, types.ErrCoordinatorParameterInvalidNo, nerr, nil) + coordinatorType.RenderJSON(ctx, types.ErrCoordinatorParameterInvalidNo, nerr, nil) return } proofMsg.BatchProof = &tmpBatchProof @@ -69,8 +69,8 @@ func (spc *SubmitProofController) SubmitProof(ctx *gin.Context) { if err := spc.submitProofReceiverLogic.HandleZkProof(ctx, &proofMsg, spp); err != nil { nerr := fmt.Errorf("handle zk proof failure, err:%w", err) - coodinatorType.RenderJSON(ctx, types.ErrCoordinatorHandleZkProofFailure, nerr, nil) + coordinatorType.RenderJSON(ctx, types.ErrCoordinatorHandleZkProofFailure, nerr, nil) return } - coodinatorType.RenderJSON(ctx, types.Success, nil, nil) + coordinatorType.RenderJSON(ctx, types.Success, nil, nil) } diff --git a/coordinator/internal/logic/provertask/batch_prover_task.go b/coordinator/internal/logic/provertask/batch_prover_task.go index 9832015203..bd261d170a 100644 --- a/coordinator/internal/logic/provertask/batch_prover_task.go +++ b/coordinator/internal/logic/provertask/batch_prover_task.go @@ -101,7 +101,8 @@ func (bp *BatchProverTask) Assign(ctx *gin.Context, getTaskParameter *coordinato } if len(batchTasks) != 1 { - return nil, fmt.Errorf("get unassigned batch proving task len not 1, batch tasks:%v", batchTasks) + log.Error("get unassigned batch proving task len not 1", "length", len(batchTasks), "batch tasks", batchTasks) + return nil, ErrCoordinatorInternalFailure } batchTask := batchTasks[0] @@ -109,7 +110,9 @@ func (bp *BatchProverTask) Assign(ctx *gin.Context, getTaskParameter *coordinato if !bp.checkAttemptsExceeded(batchTask.Hash, message.ProofTypeBatch) { bp.batchAttemptsExceedTotal.Inc() - return nil, fmt.Errorf("the batch task id:%s check attempts have reach the maximum", batchTask.Hash) + // TODO: retry fetching unassigned batch proving task + log.Error("batch task proving attempts reach the maximum", "hash", batchTask.Hash) + return nil, nil } proverTask := orm.ProverTask{ @@ -127,13 +130,15 @@ func (bp *BatchProverTask) Assign(ctx *gin.Context, getTaskParameter *coordinato // Store session info. if err = bp.proverTaskOrm.SetProverTask(ctx, &proverTask); err != nil { bp.recoverProvingStatus(ctx, batchTask) - return nil, fmt.Errorf("db set session info fail, session id:%s, error:%w", proverTask.TaskID, err) + log.Error("db set session info fail", "task hash", batchTask.Hash, "prover name", proverName.(string), "prover pubKey", publicKey.(string), "err", err) + return nil, ErrCoordinatorInternalFailure } taskMsg, err := bp.formatProverTask(ctx, batchTask.Hash) if err != nil { bp.recoverProvingStatus(ctx, batchTask) - return nil, fmt.Errorf("format prover failure, id:%s error:%w", batchTask.Hash, err) + log.Error("format prover task failure", "hash", batchTask.Hash, "err", err) + return nil, ErrCoordinatorInternalFailure } bp.batchTaskGetTaskTotal.Inc() diff --git a/coordinator/internal/logic/provertask/chunk_prover_task.go b/coordinator/internal/logic/provertask/chunk_prover_task.go index 9166fa4105..b62411f449 100644 --- a/coordinator/internal/logic/provertask/chunk_prover_task.go +++ b/coordinator/internal/logic/provertask/chunk_prover_task.go @@ -22,6 +22,9 @@ import ( coordinatorType "scroll-tech/coordinator/internal/types" ) +// ErrCoordinatorInternalFailure coordinator internal db failure +var ErrCoordinatorInternalFailure = fmt.Errorf("coordinator internal error") + // ChunkProverTask the chunk prover task type ChunkProverTask struct { BaseProverTask @@ -94,7 +97,8 @@ func (cp *ChunkProverTask) Assign(ctx *gin.Context, getTaskParameter *coordinato // load and send chunk tasks chunkTasks, err := cp.chunkOrm.UpdateUnassignedChunkReturning(ctx, getTaskParameter.ProverHeight, 1) if err != nil { - return nil, fmt.Errorf("failed to get unassigned chunk proving tasks, error:%w", err) + log.Error("failed to get unassigned chunk proving tasks", "height", getTaskParameter.ProverHeight, "err", err) + return nil, ErrCoordinatorInternalFailure } if len(chunkTasks) == 0 { @@ -102,7 +106,8 @@ func (cp *ChunkProverTask) Assign(ctx *gin.Context, getTaskParameter *coordinato } if len(chunkTasks) != 1 { - return nil, fmt.Errorf("get unassigned chunk proving task len not 1, chunk tasks:%v", chunkTasks) + log.Error("get unassigned chunk proving task len not 1", "length", len(chunkTasks), "chunk tasks", chunkTasks) + return nil, ErrCoordinatorInternalFailure } chunkTask := chunkTasks[0] @@ -111,7 +116,9 @@ func (cp *ChunkProverTask) Assign(ctx *gin.Context, getTaskParameter *coordinato if !cp.checkAttemptsExceeded(chunkTask.Hash, message.ProofTypeChunk) { cp.chunkAttemptsExceedTotal.Inc() - return nil, fmt.Errorf("chunk proof hash id:%s check attempts have reach the maximum", chunkTask.Hash) + // TODO: retry fetching unassigned chunk proving task + log.Error("chunk task proving attempts reach the maximum", "hash", chunkTask.Hash) + return nil, nil } proverTask := orm.ProverTask{ @@ -127,13 +134,15 @@ func (cp *ChunkProverTask) Assign(ctx *gin.Context, getTaskParameter *coordinato } if err = cp.proverTaskOrm.SetProverTask(ctx, &proverTask); err != nil { cp.recoverProvingStatus(ctx, chunkTask) - return nil, fmt.Errorf("db set session info fail, session id:%s , public key:%s, err:%w", chunkTask.Hash, publicKey, err) + log.Error("db set session info fail", "task hash", chunkTask.Hash, "prover name", proverName.(string), "prover pubKey", publicKey.(string), "err", err) + return nil, ErrCoordinatorInternalFailure } taskMsg, err := cp.formatProverTask(ctx, chunkTask.Hash) if err != nil { cp.recoverProvingStatus(ctx, chunkTask) - return nil, fmt.Errorf("format prover task failure, id:%s error:%w", chunkTask.Hash, err) + log.Error("format prover task failure", "hash", chunkTask.Hash, "err", err) + return nil, ErrCoordinatorInternalFailure } cp.chunkTaskGetTaskTotal.Inc() diff --git a/coordinator/internal/logic/submitproof/proof_receiver.go b/coordinator/internal/logic/submitproof/proof_receiver.go index f604d31806..23362c680d 100644 --- a/coordinator/internal/logic/submitproof/proof_receiver.go +++ b/coordinator/internal/logic/submitproof/proof_receiver.go @@ -34,6 +34,12 @@ var ( ErrValidatorFailureProofTimeout = errors.New("validator failure submit proof timeout") // ErrValidatorFailureTaskHaveVerifiedSuccess have proved success and verified success ErrValidatorFailureTaskHaveVerifiedSuccess = errors.New("validator failure chunk/batch have proved and verified success") + // ErrValidatorFailureVerifiedFailed failed to verify and the verifier returns error + ErrValidatorFailureVerifiedFailed = fmt.Errorf("verification failed, verifier returns error") + // ErrValidatorSuccessInvalidProof successful verified and the proof is invalid + ErrValidatorSuccessInvalidProof = fmt.Errorf("verification succeeded, it's an invalid proof") + // ErrCoordinatorInternalFailure coordinator internal db failure + ErrCoordinatorInternalFailure = fmt.Errorf("coordinator internal error") ) // ProofReceiverLogic the proof receiver logic @@ -162,10 +168,10 @@ func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofMsg *message.P log.Info("proof verified by coordinator failed", "proof id", proofMsg.ID, "prover name", proverTask.ProverName, "prover pk", pk, "prove type", proofMsg.Type, "proof time", proofTimeSec, "error", verifyErr) - if verifyErr == nil { - verifyErr = fmt.Errorf("verification succeeded and it's an invalid proof") + if verifyErr != nil { + return ErrValidatorFailureVerifiedFailed } - return verifyErr + return ErrValidatorSuccessInvalidProof } m.proverTaskProveDuration.Observe(time.Since(proverTask.CreatedAt).Seconds()) @@ -176,7 +182,7 @@ func (m *ProofReceiverLogic) HandleZkProof(ctx *gin.Context, proofMsg *message.P if err := m.closeProofTask(ctx, proofMsg.ID, pk, proofMsg, proofTimeSec); err != nil { m.proofSubmitFailure.Inc() m.proofRecover(ctx, proofMsg.ID, pk, proofMsg) - return err + return ErrCoordinatorInternalFailure } return nil