diff --git a/CHANGELOG.md b/CHANGELOG.md index 964cfae..85aba31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,17 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.0.2] - 2020-04-07 + +### Added +- Graceful shutdown upon receiving interrupt + +### Removed +- `ctx` from `cluster` object + +### Changed +- Refactored to use `fmt.Errorf(...)` instead of `errors.New(fmt.Sprintf(...))` + ## [1.0.1] - 2020-03-23 ### Added diff --git a/cluster/cluster.go b/cluster/cluster.go index 43de63d..80d1717 100644 --- a/cluster/cluster.go +++ b/cluster/cluster.go @@ -2,7 +2,6 @@ package cluster import ( "context" - "errors" "fmt" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/autoscaling" @@ -13,7 +12,6 @@ import ( ) type cluster struct { - ctx context.Context asgName string ec2 ec2iface.EC2API autoscale autoscalingiface.AutoScalingAPI @@ -22,14 +20,8 @@ type cluster struct { type NodeId string // New returns a new Cluster object -func New( - ctx context.Context, - asgName string, - ec2 ec2iface.EC2API, - asg autoscalingiface.AutoScalingAPI, -) Cluster { +func New(asgName string, ec2 ec2iface.EC2API, asg autoscalingiface.AutoScalingAPI) Cluster { return cluster{ - ctx: ctx, ec2: ec2, autoscale: asg, asgName: asgName, @@ -57,9 +49,7 @@ func (c cluster) Add(ctx context.Context, count int) error { }, ) if err != nil { - return errors.New( - fmt.Sprintf("failed to update autoscale group desired capacity: %v", err), - ) + return fmt.Errorf("failed to update autoscale group desired capacity: %v", err) } return nil } @@ -135,9 +125,7 @@ func (c cluster) describeSelfAsg(ctx context.Context) (*autoscaling.Group, error }, ) if err != nil { - return nil, errors.New( - fmt.Sprintf("failed to fetch info about agent autoscale group: %v", err), - ) + return nil, fmt.Errorf("failed to fetch info about agent autoscale group: %v", err) } return response.AutoScalingGroups[0], nil } diff --git a/cluster/interface.go b/cluster/interface.go index f184f8a..026b8a4 100644 --- a/cluster/interface.go +++ b/cluster/interface.go @@ -10,19 +10,19 @@ import ( type Cluster interface { // Add upscales the cluster by adding the given number of instances // to the autoscaling group - Add(ctx context.Context, count int) error + Add(context.Context, int) error // Destroy downscales the cluster by nuking the EC2 instances whose IDs // are given - Destroy(ctx context.Context, agents []NodeId) error + Destroy(context.Context, []NodeId) error // List returns IDs of running drone agent nodes - List(ctx context.Context) ([]NodeId, error) + List(context.Context) ([]NodeId, error) // Describe returns information about agents whose IDs are given - Describe(ctx context.Context, ids []NodeId) ([]*ec2.Instance, error) + Describe(context.Context, []NodeId) ([]*ec2.Instance, error) // ScalingActivityInProgress returns true if number of instances in // cluster ASG is not the same as its desired capacity - ScalingActivityInProgress(ctx context.Context) (bool, error) + ScalingActivityInProgress(context.Context) (bool, error) } diff --git a/cmd/main.go b/cmd/main.go index 37c43ed..ae0abca 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -13,12 +13,21 @@ import ( "golang.org/x/oauth2" "net/url" "os" + "os/signal" ) -const Version = "1.0.1" +const Version = "1.0.2" func main() { - ctx := context.Background() + ctx, cancel := context.WithCancel(context.Background()) + + signalCh := make(chan os.Signal, 1) + signal.Notify(signalCh, os.Interrupt) + go func() { + <-signalCh + cancel() + }() + conf, err := config.Load() if err != nil { panic(err) @@ -26,7 +35,7 @@ func main() { setupLogging(conf) client := setupDroneClient(ctx, conf) - fleet := setupAgentClusterClient(ctx, conf) + fleet := setupAgentClusterClient(conf) log. WithField("version", Version). @@ -62,10 +71,9 @@ func setupDroneClient(ctx context.Context, c config.Config) drone.Client { return drone.NewClient(uri.String(), authenticator) } -func setupAgentClusterClient(ctx context.Context, c config.Config) cluster.Cluster { +func setupAgentClusterClient(c config.Config) cluster.Cluster { sess := session.Must(session.NewSession()) return cluster.New( - ctx, c.Agent.AutoscalingGroup, ec2.New(sess), autoscaling.New(sess), diff --git a/engine/engine.go b/engine/engine.go index 9e6d593..7da643a 100644 --- a/engine/engine.go +++ b/engine/engine.go @@ -57,6 +57,7 @@ func (e *Engine) Start(ctx context.Context) { for { select { case <-ctx.Done(): + log.Infoln("Shutting down gracefully") return case <-time.After(e.probeInterval): diff --git a/engine/plan.go b/engine/plan.go index a228fb9..600fbb0 100644 --- a/engine/plan.go +++ b/engine/plan.go @@ -3,7 +3,6 @@ package engine import ( "context" "encoding/json" - "errors" "fmt" "github.com/Shuttl-Tech/drone-autoscaler/cluster" "github.com/drone/drone-go/drone" @@ -74,9 +73,7 @@ func (e *Engine) Plan(ctx context.Context) (*Plan, error) { // let the cluster autoscale group reconcile before acting any further ok, err := e.drone.agent.cluster.ScalingActivityInProgress(ctx) if err != nil { - return nil, errors.New( - fmt.Sprintf("failed to check for any scaling activity in progress: %v", err), - ) + return nil, fmt.Errorf("failed to check for any scaling activity in progress: %v", err) } if ok { log.Debugln("Cluster has a scaling activity in progress, recommending noop") @@ -85,9 +82,7 @@ func (e *Engine) Plan(ctx context.Context) (*Plan, error) { runningAgents, err := e.drone.agent.cluster.List(ctx) if err != nil { - return nil, errors.New( - fmt.Sprintf("couldn't fetch list of running agent nodes: %v", err), - ) + return nil, fmt.Errorf("couldn't fetch list of running agent nodes: %v", err) } runningAgentCount := len(runningAgents) @@ -105,9 +100,7 @@ func (e *Engine) Plan(ctx context.Context) (*Plan, error) { stages, err := e.drone.client.Queue() if err != nil { - return nil, errors.New( - fmt.Sprintf("couldn't fetch build queue from drone: %v", err), - ) + return nil, fmt.Errorf("couldn't fetch build queue from drone: %v", err) } // remove all builds that are pending or running for longer than their @@ -166,9 +159,7 @@ func (e *Engine) Plan(ctx context.Context) (*Plan, error) { expendable, err := e.listAgentsAboveMinRetirementAge(ctx, idleAgents) if err != nil { - return nil, errors.New( - fmt.Sprintf("couldn't fetch agents above retirement age: %v", err), - ) + return nil, fmt.Errorf("couldn't fetch agents above retirement age: %v", err) } if len(expendable) == 0 { // we have newly created agents, so they're not busy yet because it @@ -218,9 +209,7 @@ func (e *Engine) countBuilds(stages []*drone.Stage) (pending, running int) { func (e *Engine) calcRequiredAgentCount(buildCount int) (int, error) { maxCountPerAgent := e.drone.agent.maxBuilds if maxCountPerAgent < 1 { - return 0, errors.New( - fmt.Sprintf("max builds per agent cannot be %d", maxCountPerAgent), - ) + return 0, fmt.Errorf("max builds per agent cannot be %d", maxCountPerAgent) } res := math.Ceil(float64(buildCount) / float64(maxCountPerAgent)) return int(res), nil diff --git a/engine/plan_test.go b/engine/plan_test.go index 201b035..56d5771 100644 --- a/engine/plan_test.go +++ b/engine/plan_test.go @@ -34,7 +34,7 @@ func TestPlan_ScalingInProgress(t *testing.T) { }, }, nil) - c := cluster.New(context.TODO(), "test-asg", nil, asg) + c := cluster.New("test-asg", nil, asg) e := &Engine{ drone: &droneConfig{ agent: &droneAgentConfig{cluster: c}, @@ -106,7 +106,7 @@ func TestPlan_BelowMinRetirement(t *testing.T) { {Status: drone.StatusRunning, Machine: "i-001"}, }, nil) - c := cluster.New(context.TODO(), "test-asg", ec2Client, asg) + c := cluster.New("test-asg", ec2Client, asg) e := &Engine{ drone: &droneConfig{ build: &droneBuildConfig{ @@ -180,7 +180,7 @@ func TestPlan_MinAgentCount(t *testing.T) { Queue(). Return([]*drone.Stage{}, nil) - c := cluster.New(context.TODO(), "test-asg", ec2Client, asg) + c := cluster.New("test-asg", ec2Client, asg) e := &Engine{ drone: &droneConfig{ build: &droneBuildConfig{ @@ -240,7 +240,7 @@ func TestPlan_NoExtra(t *testing.T) { {Status: drone.StatusRunning}, }, nil) - c := cluster.New(context.TODO(), "test-asg", nil, asg) + c := cluster.New("test-asg", nil, asg) e := &Engine{ drone: &droneConfig{ client: droneClient, @@ -299,7 +299,7 @@ func TestPlan_NoneIdle(t *testing.T) { {Status: drone.StatusRunning, Machine: "i-002"}, }, nil) - c := cluster.New(context.TODO(), "test-asg", nil, asg) + c := cluster.New("test-asg", nil, asg) e := &Engine{ drone: &droneConfig{ client: droneClient, @@ -345,7 +345,7 @@ func TestPlan_BelowMinCount(t *testing.T) { }, nil). Times(2) - c := cluster.New(context.TODO(), "test-asg", nil, asg) + c := cluster.New("test-asg", nil, asg) e := &Engine{ drone: &droneConfig{ agent: &droneAgentConfig{cluster: c, minCount: 3}, @@ -418,7 +418,7 @@ func TestPlan_PendingBuilds(t *testing.T) { nil, ) - c := cluster.New(context.TODO(), "test-asg", nil, asg) + c := cluster.New("test-asg", nil, asg) e := &Engine{ drone: &droneConfig{ client: droneClient, @@ -491,7 +491,7 @@ func TestPlan_ExtraDestroyable(t *testing.T) { Queue(). Return([]*drone.Stage{}, nil) - c := cluster.New(context.TODO(), "test-asg", ec2Client, asg) + c := cluster.New("test-asg", ec2Client, asg) e := &Engine{ drone: &droneConfig{ build: &droneBuildConfig{ diff --git a/engine/scale.go b/engine/scale.go index 5de6a2f..919494e 100644 --- a/engine/scale.go +++ b/engine/scale.go @@ -2,7 +2,6 @@ package engine import ( "context" - "errors" "fmt" "github.com/Shuttl-Tech/drone-autoscaler/cluster" log "github.com/sirupsen/logrus" @@ -21,9 +20,7 @@ func (e *Engine) Upscale(ctx context.Context, count int) error { func (e *Engine) Downscale(ctx context.Context, agents []cluster.NodeId) error { log.Infoln("Pausing build queue to destroy agents") if err := e.drone.client.QueuePause(); err != nil { - return errors.New( - fmt.Sprintf("couldn't pause drone queue while downscaling: %v", err), - ) + return fmt.Errorf("couldn't pause drone queue while downscaling: %v", err) } defer e.resumeBuildQueue() log. @@ -40,8 +37,6 @@ func (e *Engine) resumeBuildQueue() { // stuck if the queue was previously paused, so the app must fail // immediately and queue must be resumed manually before re-starting it. if err := e.drone.client.QueueResume(); err != nil { - panic( - errors.New(fmt.Sprintf("failed to resume build queue: %v", err)), - ) + panic(fmt.Errorf("failed to resume build queue: %v", err)) } } diff --git a/engine/scale_test.go b/engine/scale_test.go index b5b0216..da198cc 100644 --- a/engine/scale_test.go +++ b/engine/scale_test.go @@ -32,7 +32,7 @@ func TestScale_Upscale(t *testing.T) { Return(nil, nil). After(describe) - c := cluster.New(context.TODO(), "test-asg", nil, asg) + c := cluster.New("test-asg", nil, asg) e := &Engine{ drone: &droneConfig{ agent: &droneAgentConfig{cluster: c}, @@ -69,7 +69,7 @@ func TestScale_Downscale(t *testing.T) { droneClient.EXPECT().QueueResume().Return(nil).After(downscale) - c := cluster.New(context.TODO(), "test-asg", nil, asg) + c := cluster.New("test-asg", nil, asg) e := &Engine{ drone: &droneConfig{ client: droneClient,