Skip to content

Commit

Permalink
fix: add a 1m timeout to signal shutdown (#92)
Browse files Browse the repository at this point in the history
- It's possible for a number of reasons for the bootstrap script
  to not exit. This PR adds a hard timeout of 1 minute before
  we forcefully exit.
  • Loading branch information
sreya authored Aug 21, 2024
1 parent f73e3d8 commit 0d37a62
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 15 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ The environment variables can be used to configure various aspects of the inner
| `CODER_CPUS` | Dictates the number of CPUs to allocate the inner container. It is recommended to set this using the Kubernetes [Downward API](https://kubernetes.io/docs/tasks/inject-data-application/environment-variable-expose-pod-information/#use-container-fields-as-values-for-environment-variables). | false |
| `CODER_MEMORY` | Dictates the max memory (in bytes) to allocate the inner container. It is recommended to set this using the Kubernetes [Downward API](https://kubernetes.io/docs/tasks/inject-data-application/environment-variable-expose-pod-information/#use-container-fields-as-values-for-environment-variables). | false |
| `CODER_DISABLE_IDMAPPED_MOUNT` | Disables idmapped mounts in sysbox. For more information, see the [Sysbox Documentation](https://github.com/nestybox/sysbox/blob/master/docs/user-guide/configuration.md#disabling-id-mapped-mounts-on-sysbox). | false |
| `CODER_SHUTDOWN_TIMEOUT` | Configure a custom shutdown timeout to wait for the boostrap command to exit. Defaults to 1 minute. | false |

## Coder Template

Expand Down
36 changes: 28 additions & 8 deletions cli/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"sort"
"strconv"
"strings"
"time"

dockertypes "github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/container"
Expand All @@ -39,6 +40,10 @@ const (
// EnvBoxContainerName is the name of the inner user container.
EnvBoxPullImageSecretEnvVar = "CODER_IMAGE_PULL_SECRET" //nolint:gosec
EnvBoxContainerName = "CODER_CVM_CONTAINER_NAME"
// We define a custom exit code to distinguish from the generic '1' when envbox exits due to a shutdown timeout.
// Docker claims exit codes 125-127 so we start at 150 to
// ensure we don't collide.
ExitCodeShutdownTimeout = 150
)

const (
Expand Down Expand Up @@ -77,8 +82,9 @@ const (
// with UID/GID 1000 will be mapped to `UserNamespaceOffset` + 1000
// on the host. Changing this value will result in improper mappings
// on existing containers.
UserNamespaceOffset = 100000
devDir = "/dev"
UserNamespaceOffset = 100000
devDir = "/dev"
defaultShutdownTimeout = time.Minute
)

var (
Expand All @@ -102,6 +108,7 @@ var (
EnvDockerConfig = "CODER_DOCKER_CONFIG"
EnvDebug = "CODER_DEBUG"
EnvDisableIDMappedMount = "CODER_DISABLE_IDMAPPED_MOUNT"
EnvShutdownTimeout = "CODER_SHUTDOWN_TIMEOUT"
)

var envboxPrivateMounts = map[string]struct{}{
Expand Down Expand Up @@ -139,6 +146,7 @@ type flags struct {
cpus int
memory int
disableIDMappedMount bool
shutdownTimeout time.Duration

// Test flags.
noStartupLogs bool
Expand Down Expand Up @@ -348,6 +356,7 @@ func dockerCmd(ch chan func() error) *cobra.Command {
cliflag.IntVarP(cmd.Flags(), &flags.cpus, "cpus", "", EnvCPUs, 0, "Number of CPUs to allocate inner container. e.g. 2")
cliflag.IntVarP(cmd.Flags(), &flags.memory, "memory", "", EnvMemory, 0, "Max memory to allocate to the inner container in bytes.")
cliflag.BoolVarP(cmd.Flags(), &flags.disableIDMappedMount, "disable-idmapped-mount", "", EnvDisableIDMappedMount, false, "Disable idmapped mounts in sysbox. Note that you may need an alternative (e.g. shiftfs).")
cliflag.DurationVarP(cmd.Flags(), &flags.shutdownTimeout, "shutdown-timeout", "", EnvShutdownTimeout, defaultShutdownTimeout, "Duration after which envbox will be forcefully terminated.")

// Test flags.
cliflag.BoolVarP(cmd.Flags(), &flags.noStartupLogs, "no-startup-log", "", "", false, "Do not log startup logs. Useful for testing.")
Expand Down Expand Up @@ -724,27 +733,38 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Docker
return xerrors.Errorf("exec inspect: %w", err)
}

shutdownCh <- func() error {
log.Debug(ctx, "killing container", slog.F("bootstrap_pid", bootstrapPID))
shutdownCh <- killBootstrapCmd(ctx, log, bootstrapPID, bootstrapExec.ID, client, flags.shutdownTimeout)

return nil
}

// KillBootstrapCmd is the command we run when we receive a signal
// to kill the envbox container.
func killBootstrapCmd(ctx context.Context, log slog.Logger, pid int, execID string, client dockerutil.DockerClient, timeout time.Duration) func() error {
return func() error {
log.Debug(ctx, "killing container",
slog.F("bootstrap_pid", pid),
slog.F("timeout", timeout.String()),
)

ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
// The PID returned is the PID _outside_ the container...
//nolint:gosec
out, err := exec.Command("kill", "-TERM", strconv.Itoa(bootstrapPID)).CombinedOutput()
out, err := exec.CommandContext(ctx, "kill", "-TERM", strconv.Itoa(pid)).CombinedOutput()
if err != nil {
return xerrors.Errorf("kill bootstrap process (%s): %w", out, err)
}

log.Debug(ctx, "sent kill signal waiting for process to exit")
err = dockerutil.WaitForExit(ctx, client, bootstrapExec.ID)
err = dockerutil.WaitForExit(ctx, client, execID)
if err != nil {
return xerrors.Errorf("wait for exit: %w", err)
}

log.Debug(ctx, "bootstrap process successfully exited")
return nil
}

return nil
}

//nolint:revive
Expand Down
5 changes: 5 additions & 0 deletions cmd/envbox/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"runtime"
"syscall"

"golang.org/x/xerrors"

"cdr.dev/slog"
"cdr.dev/slog/sloggers/slogjson"
"github.com/coder/envbox/cli"
Expand All @@ -29,6 +31,9 @@ func main() {
err := fn()
if err != nil {
log.Error(ctx, "shutdown function failed", slog.Error(err))
if xerrors.Is(err, context.DeadlineExceeded) {
os.Exit(cli.ExitCodeShutdownTimeout)
}
os.Exit(1)
}
default:
Expand Down
48 changes: 44 additions & 4 deletions integration/docker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,10 +306,8 @@ func TestDocker(t *testing.T) {
require.Error(t, err)

// Simulate a shutdown.
integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second)

err = resource.Close()
require.NoError(t, err)
exitCode := integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second)
require.Equal(t, 0, exitCode)

t.Logf("envbox %q started successfully, recreating...", resource.Container.ID)
// Run the envbox container.
Expand All @@ -326,6 +324,34 @@ func TestDocker(t *testing.T) {
})
require.NoError(t, err)
})

t.Run("ShutdownTimeout", func(t *testing.T) {
t.Parallel()

pool, err := dockertest.NewPool("")
require.NoError(t, err)

var (
tmpdir = integrationtest.TmpDir(t)
binds = integrationtest.DefaultBinds(t, tmpdir)
)

envs := []string{fmt.Sprintf("%s=%s", cli.EnvShutdownTimeout, "1s")}

// Run the envbox container.
resource := integrationtest.RunEnvbox(t, pool, &integrationtest.CreateDockerCVMConfig{
Image: integrationtest.UbuntuImage,
Username: "root",
Envs: envs,
Binds: binds,
BootstrapScript: sigtrapForeverScript,
})

// Simulate a shutdown.
exitCode := integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second)
// We expect it to timeout which should result in a special exit code.
require.Equal(t, cli.ExitCodeShutdownTimeout, exitCode)
})
}

func requireSliceNoContains(t *testing.T, ss []string, els ...string) {
Expand Down Expand Up @@ -358,6 +384,20 @@ func bindMount(src, dest string, ro bool) string {
return fmt.Sprintf("%s:%s", src, dest)
}

const sigtrapForeverScript = `#!/bin/bash
cleanup() {
echo "Got a signal, going to sleep!" && sleep infinity
exit 0
}
trap 'cleanup' INT TERM
while true; do
echo "Working..."
sleep 1
done
`

const sigtrapScript = `#!/bin/bash
cleanup() {
echo "HANDLING A SIGNAL!" && touch /home/coder/foo && echo "touched file"
Expand Down
12 changes: 9 additions & 3 deletions integration/integrationtest/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,12 @@ func RunEnvbox(t *testing.T, pool *dockertest.Pool, conf *CreateDockerCVMConfig)
host.CPUQuota = int64(conf.CPUs) * int64(dockerutil.DefaultCPUPeriod)
})
require.NoError(t, err)
// t.Cleanup(func() { _ = pool.Purge(resource) })
t.Cleanup(func() {
// Only delete the container if the test passes.
if !t.Failed() {
resource.Close()
}
})

waitForCVM(t, pool, resource)

Expand Down Expand Up @@ -264,7 +269,7 @@ func ExecEnvbox(t *testing.T, pool *dockertest.Pool, conf ExecConfig) ([]byte, e
return buf.Bytes(), nil
}

func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Duration) {
func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Duration) int {
t.Helper()

err := pool.Client.KillContainer(docker.KillContainerOptions{
Expand All @@ -283,10 +288,11 @@ func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Durat
continue
}

return
return cnt.State.ExitCode
}

t.Fatalf("timed out waiting for container %s to stop", id)
return 1
}

// cmdLineEnvs returns args passed to the /envbox command
Expand Down

0 comments on commit 0d37a62

Please sign in to comment.